diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 3b4f1f0d70c..1064daa4060 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -470,7 +470,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { fn init_locals( &mut self, n: usize, - n_params: usize, + sig: FunctionType, calling_convention: CallingConvention, ) -> Vec> { // How many machine stack slots will all the locals use? @@ -560,15 +560,29 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Load in-register parameters into the allocated locations. // Locals are allocated on the stack from higher address to lower address, // so we won't skip the stack guard page here. - for i in 0..n_params { - let loc = self.machine.get_param_location(i + 1, calling_convention); - self.machine.move_location(Size::S64, loc, locations[i]); + let mut stack_offset: usize = 0; + for (i, param) in sig.params().iter().enumerate() { + let sz = match *param { + Type::I32 | Type::F32 => Size::S32, + Type::I64 | Type::F64 => Size::S64, + Type::ExternRef | Type::FuncRef => Size::S64, + _ => unimplemented!(), + }; + let loc = self.machine.get_call_param_location( + i + 1, + sz, + &mut stack_offset, + calling_convention, + ); + self.machine + .move_location_extend(sz, false, loc, Size::S64, locations[i]); } // Load vmctx into R15. self.machine.move_location( Size::S64, - self.machine.get_param_location(0, calling_convention), + self.machine + .get_simple_param_location(0, calling_convention), Location::GPR(self.machine.get_vmctx_reg()), ); @@ -576,14 +590,17 @@ impl<'a, M: Machine> FuncGen<'a, M> { // // `rep stosq` writes data from low address to high address and may skip the stack guard page. // so here we probe it explicitly when needed. - for i in (n_params..n).step_by(NATIVE_PAGE_SIZE / 8).skip(1) { + for i in (sig.params().len()..n) + .step_by(NATIVE_PAGE_SIZE / 8) + .skip(1) + { self.machine.zero_location(Size::S64, locations[i]); } // Initialize all normal locals to zero. let mut init_stack_loc_cnt = 0; let mut last_stack_loc = Location::Memory(self.machine.local_pointer(), i32::MAX); - for i in n_params..n { + for i in sig.params().len()..n { match locations[i] { Location::Memory(_, _) => { init_stack_loc_cnt += 1; @@ -699,15 +716,27 @@ impl<'a, M: Machine> FuncGen<'a, M> { /// /// The caller MUST NOT hold any temporary registers allocated by `acquire_temp_gpr` when calling /// this function. - fn emit_call_native>, F: FnOnce(&mut Self)>( + fn emit_call_native< + I: Iterator>, + J: Iterator, + F: FnOnce(&mut Self), + >( &mut self, cb: F, params: I, + params_type: J, ) -> Result<(), CodegenError> { // Values pushed in this function are above the shadow region. self.state.stack_values.push(MachineValue::ExplicitShadow); let params: Vec<_> = params.collect(); + let params_size: Vec<_> = params_type + .map(|x| match x { + WpType::F32 | WpType::I32 => Size::S32, + WpType::V128 => unimplemented!(), + _ => Size::S64, + }) + .collect(); // Save used GPRs. Preserve correct stack alignment let mut used_stack = self.machine.push_used_gpr(); @@ -746,39 +775,37 @@ impl<'a, M: Machine> FuncGen<'a, M> { }; let mut stack_offset: usize = 0; - + let mut args: Vec> = vec![]; + let mut pushed_args: usize = 0; // Calculate stack offset. for (i, _param) in params.iter().enumerate() { - if let Location::Memory(_, _) = - self.machine.get_param_location(1 + i, calling_convention) - { - stack_offset += 8; - } + args.push(self.machine.get_param_location( + 1 + i, + params_size[i], + &mut stack_offset, + calling_convention, + )); } // Align stack to 16 bytes. - if (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset) - % 16 - != 0 - { - if self.machine.round_stack_adjust(8) == 8 { - self.machine.adjust_stack(8); - } else { - self.machine.emit_push(Size::S64, Location::Imm32(0)); - } - stack_offset += 8; - self.state.stack_values.push(MachineValue::Undefined); + let stack_unaligned = + (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset) + % 16; + if stack_unaligned != 0 { + stack_offset += 16 - stack_unaligned; } + self.machine.adjust_stack(stack_offset as u32); let mut call_movs: Vec<(Location, M::GPR)> = vec![]; // Prepare register & stack parameters. for (i, param) in params.iter().enumerate().rev() { - let loc = self.machine.get_param_location(1 + i, calling_convention); + let loc = args[i]; match loc { Location::GPR(x) => { call_movs.push((*param, x)); } Location::Memory(_, _) => { + pushed_args += 1; match *param { Location::GPR(x) => { let content = self.state.register_values @@ -813,7 +840,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(MachineValue::Undefined); } } - self.machine.push_location_for_native(*param); + self.machine.move_location(params_size[i], *param, loc); } _ => { return Err(CodegenError { @@ -838,17 +865,10 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.machine.move_location( Size::S64, Location::GPR(self.machine.get_vmctx_reg()), - self.machine.get_param_location(0, calling_convention), + self.machine + .get_simple_param_location(0, calling_convention), ); // vmctx - if self.machine.round_stack_adjust(8) == 8 { - if (self.state.stack_values.len() % 2) != 1 { - return Err(CodegenError { - message: "emit_call_native: explicit shadow takes one slot".to_string(), - }); - } - } - if stack_padding > 0 { self.machine.adjust_stack(stack_padding as u32); } @@ -884,7 +904,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { message: "emit_call_native: Bad restoring stack alignement".to_string(), }); } - for _ in 0..stack_offset / 8 { + for _ in 0..pushed_args { self.state.stack_values.pop().unwrap(); } } @@ -912,12 +932,20 @@ impl<'a, M: Machine> FuncGen<'a, M> { } /// Emits a System V call sequence, specialized for labels as the call target. - fn _emit_call_native_label>>( + fn _emit_call_native_label< + I: Iterator>, + J: Iterator, + >( &mut self, label: Label, params: I, + params_type: J, ) -> Result<(), CodegenError> { - self.emit_call_native(|this| this.machine.emit_call_label(label), params)?; + self.emit_call_native( + |this| this.machine.emit_call_label(label), + params, + params_type, + )?; Ok(()) } @@ -964,7 +992,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Initialize locals. self.locals = self.init_locals( self.local_types.len(), - self.signature.params().len(), + self.signature.clone(), self.calling_convention, ); @@ -2583,6 +2611,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { this.machine.mark_instruction_address_end(offset); }, params.iter().copied(), + param_types.iter().copied(), )?; self.release_locations_only_stack(¶ms); @@ -2794,7 +2823,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { gpr_for_call, vmcaller_checked_anyfunc_vmctx as i32, ), - this.machine.get_param_location(0, calling_convention), + this.machine + .get_simple_param_location(0, calling_convention), ); this.machine.emit_call_location(Location::Memory( @@ -2805,6 +2835,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } }, params.iter().copied(), + param_types.iter().copied(), )?; self.release_locations_only_stack(¶ms); @@ -3038,6 +3069,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, memory_index] iter::once(Location::Imm32(memory_index.index() as u32)), + iter::once(WpType::I64), )?; let ret = self.acquire_locations( &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], @@ -3085,6 +3117,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I64, + WpType::I64, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst, src, len]); } @@ -3107,6 +3148,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, segment_index] iter::once(Location::Imm32(segment)), + iter::once(WpType::I64), )?; } Operator::MemoryCopy { src, dst } => { @@ -3157,6 +3199,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst_pos, src_pos, len]); } @@ -3201,6 +3246,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(memory_index.index() as u32), dst, val, len] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst, val, len]); } @@ -3235,6 +3283,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // [vmctx, val, memory_index] iter::once(param_pages) .chain(iter::once(Location::Imm32(memory_index.index() as u32))), + [WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[param_pages]); @@ -5432,6 +5481,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, func_index] -> funcref iter::once(Location::Imm32(function_index as u32)), + iter::once(WpType::I64), )?; let ret = self.acquire_locations( @@ -5490,6 +5540,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(table_index.index() as u32), index, value] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[index, value]); @@ -5524,6 +5575,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(table_index.index() as u32), index] .iter() .cloned(), + [WpType::I32, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[index]); @@ -5567,6 +5619,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, table_index] -> i32 iter::once(Location::Imm32(table_index.index() as u32)), + iter::once(WpType::I32), )?; let ret = self.acquire_locations( @@ -5616,6 +5669,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [WpType::I64, WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[init_value, delta]); @@ -5668,6 +5722,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I32, + WpType::I32, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, src, len]); @@ -5699,6 +5762,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, table_index, start_idx, item, len] [Location::Imm32(table), dest, val, len].iter().cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, val, len]); @@ -5737,6 +5803,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I32, + WpType::I32, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, src, len]); @@ -5762,6 +5837,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, elem_index] [Location::Imm32(segment)].iter().cloned(), + [WpType::I32].iter().cloned(), )?; } _ => { diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 0667c29e934..7f1b5b1818b 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -2445,7 +2445,7 @@ impl EmitterARM64 for Assembler { pub fn gen_std_trampoline_arm64( sig: &FunctionType, - _calling_convention: CallingConvention, + calling_convention: CallingConvention, ) -> FunctionBody { let mut a = Assembler::new(0); @@ -2494,6 +2494,29 @@ pub fn gen_std_trampoline_arm64( ); } _ => { + match calling_convention { + CallingConvention::AppleAarch64 => { + match sz { + Size::S8 => (), + Size::S16 => { + if caller_stack_offset & 1 != 0 { + caller_stack_offset = (caller_stack_offset + 1) & !1; + } + } + Size::S32 => { + if caller_stack_offset & 3 != 0 { + caller_stack_offset = (caller_stack_offset + 3) & !3; + } + } + Size::S64 => { + if caller_stack_offset & 7 != 0 { + caller_stack_offset = (caller_stack_offset + 7) & !7; + } + } + }; + } + _ => (), + }; // using X16 as scratch reg a.emit_ldr( sz, @@ -2505,7 +2528,19 @@ pub fn gen_std_trampoline_arm64( Location::GPR(GPR::X16), Location::Memory(GPR::XzrSp, caller_stack_offset), ); - caller_stack_offset += 8; + match calling_convention { + CallingConvention::AppleAarch64 => { + caller_stack_offset += match sz { + Size::S8 => 1, + Size::S16 => 2, + Size::S32 => 4, + Size::S64 => 8, + }; + } + _ => { + caller_stack_offset += 8; + } + } } } } @@ -2579,12 +2614,28 @@ pub fn gen_std_dynamic_import_trampoline_arm64( Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr), Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), None => { + let sz = match calling_convention { + CallingConvention::AppleAarch64 => match *ty { + Type::I32 | Type::F32 => Size::S32, + _ => { + if stack_param_count & 7 != 0 { + stack_param_count = (stack_param_count + 7) & !7; + }; + Size::S64 + } + }, + _ => Size::S64, + }; a.emit_ldr( - Size::S64, + sz, Location::GPR(GPR::X26), Location::Memory(GPR::XzrSp, (stack_offset + 16 + stack_param_count) as _), ); - stack_param_count += 8; + stack_param_count += match sz { + Size::S32 => 4, + Size::S64 => 8, + _ => unreachable!(), + }; Location::GPR(GPR::X26) } }; diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index e61e03e4184..a68daa91ec5 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -157,8 +157,24 @@ pub trait Machine { &self, calling_convention: CallingConvention, ) -> Vec>; - /// Get param location + /// Get param location (to build a call, using SP for stack args) fn get_param_location( + &self, + idx: usize, + sz: Size, + stack_offset: &mut usize, + calling_convention: CallingConvention, + ) -> Location; + /// Get call param location (from a call, using FP for stack args) + fn get_call_param_location( + &self, + idx: usize, + sz: Size, + stack_offset: &mut usize, + calling_convention: CallingConvention, + ) -> Location; + /// Get simple param location + fn get_simple_param_location( &self, idx: usize, calling_convention: CallingConvention, diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 032c9da7333..96fcf0e2a48 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1472,8 +1472,118 @@ impl Machine for MachineARM64 { vec![] } - // Get param location - fn get_param_location(&self, idx: usize, calling_convention: CallingConvention) -> Location { + // Get param location, MUST be called in order! + fn get_param_location( + &self, + idx: usize, + sz: Size, + stack_args: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::AppleAarch64 => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let sz = match sz { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + // align first + if sz > 1 { + if *stack_args & !((1 << sz) - 1) != 0 { + *stack_args = (*stack_args + ((1 << sz) - 1)) & !((1 << sz) - 1); + } + } + let loc = Location::Memory(GPR::XzrSp, *stack_args as i32); + *stack_args += 1 << sz; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let loc = Location::Memory(GPR::XzrSp, *stack_args as i32); + *stack_args += 8; + loc + } + }, + } + } + // Get call param location, MUST be called in order! + fn get_call_param_location( + &self, + idx: usize, + sz: Size, + stack_args: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::AppleAarch64 => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let sz = match sz { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + // align first + if sz > 1 { + if *stack_args & !((1 << sz) - 1) != 0 { + *stack_args = (*stack_args + ((1 << sz) - 1)) & !((1 << sz) - 1); + } + } + let loc = Location::Memory(GPR::X29, 16 * 2 + *stack_args as i32); + *stack_args += 1 << sz; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let loc = Location::Memory(GPR::X29, 16 * 2 + *stack_args as i32); + *stack_args += 8; + loc + } + }, + } + } + // Get simple param location, Will not be accurate for Apple calling convention on "stack" arguments + fn get_simple_param_location( + &self, + idx: usize, + calling_convention: CallingConvention, + ) -> Location { match calling_convention { _ => match idx { 0 => Location::GPR(GPR::X0), @@ -1529,6 +1639,12 @@ impl Machine for MachineARM64 { }, Location::Imm8(_) => match dest { Location::GPR(_) => self.assembler.emit_mov(size, source, dest), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest @@ -1536,6 +1652,12 @@ impl Machine for MachineARM64 { }, Location::Imm32(val) => match dest { Location::GPR(_) => self.assembler.emit_mov_imm(dest, val as u64), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest @@ -1543,6 +1665,12 @@ impl Machine for MachineARM64 { }, Location::Imm64(val) => match dest { Location::GPR(_) => self.assembler.emit_mov_imm(dest, val), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, _ => panic!( "singlepass can't emit move_location {:?} {:?} => {:?}", size, source, dest @@ -1597,13 +1725,49 @@ impl Machine for MachineARM64 { // move a location to another fn move_location_extend( &mut self, - _size_val: Size, - _signed: bool, - _source: Location, - _size_op: Size, - _dest: Location, + size_val: Size, + signed: bool, + source: Location, + size_op: Size, + dest: Location, ) { - unimplemented!(); + if size_op != Size::S64 { + unreachable!(); + } + let mut temps = vec![]; + let dst = self.location_to_reg(size_op, dest, &mut temps, ImmType::None, false, None); + let src = match (size_val, signed, source) { + (Size::S64, _, _) => source, + (Size::S32, false, Location::GPR(_)) => { + self.assembler.emit_mov(size_val, source, dst); + dst + } + (Size::S32, true, Location::GPR(_)) => { + self.assembler.emit_sxtw(size_val, source, dst); + dst + } + (Size::S32, false, Location::Memory(_, _)) => { + self.emit_relaxed_ldr32(size_op, dst, source); + dst + } + (Size::S32, true, Location::Memory(_, _)) => { + self.emit_relaxed_ldr32s(size_op, dst, source); + dst + } + _ => panic!( + "singlepass can't emit move_location_extend {:?} {:?} {:?} => {:?} {:?}", + size_val, signed, source, size_op, dest + ), + }; + if src != dst { + self.move_location(size_op, src, dst); + } + if dst != dest { + self.move_location(size_op, dst, dest); + } + for r in temps { + self.release_gpr(r); + } } fn load_address(&mut self, _size: Size, _reg: Location, _mem: Location) { unimplemented!(); diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 627fb1b670f..8678d72d272 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1877,14 +1877,80 @@ impl Machine for MachineX86_64 { } // Get param location - fn get_param_location(&self, idx: usize, calling_convention: CallingConvention) -> Location { + fn get_param_location( + &self, + idx: usize, + _sz: Size, + stack_location: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::WindowsFastcall => match idx { + 0 => Location::GPR(GPR::RCX), + 1 => Location::GPR(GPR::RDX), + 2 => Location::GPR(GPR::R8), + 3 => Location::GPR(GPR::R9), + _ => { + let loc = Location::Memory(GPR::RSP, *stack_location as i32); + *stack_location += 8; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => { + let loc = Location::Memory(GPR::RSP, *stack_location as i32); + *stack_location += 8; + loc + } + }, + } + } + // Get call param location + fn get_call_param_location( + &self, + idx: usize, + _sz: Size, + _stack_location: &mut usize, + calling_convention: CallingConvention, + ) -> Location { match calling_convention { CallingConvention::WindowsFastcall => match idx { 0 => Location::GPR(GPR::RCX), 1 => Location::GPR(GPR::RDX), 2 => Location::GPR(GPR::R8), 3 => Location::GPR(GPR::R9), - _ => Location::Memory(GPR::RBP, (16 + 32 + (idx - 4) * 8) as i32), + _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32), + }, + _ => match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32), + }, + } + } + // Get simple param location + fn get_simple_param_location( + &self, + idx: usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::WindowsFastcall => match idx { + 0 => Location::GPR(GPR::RCX), + 1 => Location::GPR(GPR::RDX), + 2 => Location::GPR(GPR::R8), + 3 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32), }, _ => match idx { 0 => Location::GPR(GPR::RDI), @@ -6542,7 +6608,9 @@ impl Machine for MachineX86_64 { // Calculate stack offset. let mut stack_offset: u32 = 0; for (i, _param) in sig.params().iter().enumerate() { - if let Location::Memory(_, _) = self.get_param_location(1 + i, calling_convention) { + if let Location::Memory(_, _) = + self.get_simple_param_location(1 + i, calling_convention) + { stack_offset += 8; } } @@ -6570,12 +6638,12 @@ impl Machine for MachineX86_64 { // Arguments a.emit_mov( Size::S64, - self.get_param_location(1, calling_convention), + self.get_simple_param_location(1, calling_convention), Location::GPR(GPR::R15), ); // func_ptr a.emit_mov( Size::S64, - self.get_param_location(2, calling_convention), + self.get_simple_param_location(2, calling_convention), Location::GPR(GPR::R14), ); // args_rets @@ -6585,7 +6653,7 @@ impl Machine for MachineX86_64 { let mut n_stack_args: usize = 0; for (i, _param) in sig.params().iter().enumerate() { let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i] - let dst_loc = self.get_param_location(1 + i, calling_convention); + let dst_loc = self.get_simple_param_location(1 + i, calling_convention); match dst_loc { Location::GPR(_) => {