diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 9ade3ebea363..1c5eb7b96b70 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1149,6 +1149,7 @@ impl ABIMachineSpec for AArch64MachineDeps { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -1188,10 +1189,17 @@ impl ABIMachineSpec for AArch64MachineDeps { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x9 as a temp if needed: clobbered, not a + // retval. + regs::writable_xreg(9) + } } impl AArch64MachineDeps { diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index adc7396c4787..0d6e11e32f80 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2951,6 +2951,17 @@ impl MachInstEmit for Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + start_off = sink.cur_offset(); } &Inst::CallInd { ref info } => { let user_stack_map = state.take_stack_map(); @@ -2970,6 +2981,17 @@ impl MachInstEmit for Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + start_off = sink.cur_offset(); } &Inst::ReturnCall { ref info } => { emit_return_call_common_sequence(sink, emit_info, state, info); diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ba7103a32a8c..2dae98b929f3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -839,8 +839,11 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -852,8 +855,11 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index a29c1146cadd..dcf28b62ab53 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -544,6 +544,7 @@ where is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -578,6 +579,7 @@ where setup_area_size: setup_area_size.into(), clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } @@ -592,6 +594,12 @@ where // Pulley doesn't need inline probestacks because it always checks stack // decrements. } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x15 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(15)) + } } /// Different styles of management of fp/lr and clobbered registers. diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index e68d68dfe22c..bdca3c8d978f 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -89,6 +89,9 @@ ;; emit this instruction and auto-generated methods for other various ;; bits and pieces of boilerplate in the backend. (Raw (raw RawInst)) + + ;; Island generation prior to variable-length instructions. + (EmitIsland (space_needed u32)) ) ) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 7adec74bcddc..1a5095e6fe21 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -188,6 +188,17 @@ fn pulley_emit

( for i in PulleyMachineDeps::

::gen_sp_reg_adjust(adjust) { >::from(i).emit(sink, emit_info, state); } + + // Load any stack-carried return values. + info.emit_retval_loads::, _, _>( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + *start_offset = sink.cur_offset(); } Inst::IndirectCall { info } => { @@ -204,6 +215,17 @@ fn pulley_emit

( for i in PulleyMachineDeps::

::gen_sp_reg_adjust(adjust) { >::from(i).emit(sink, emit_info, state); } + + // Load any stack-carried return values. + info.emit_retval_loads::, _, _>( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + *start_offset = sink.cur_offset(); } Inst::ReturnCall { info } => { @@ -517,6 +539,13 @@ fn pulley_emit

( } super::generated::emit(raw, sink) } + + Inst::EmitIsland { space_needed } => { + let label = sink.get_label(); + >::from(Inst::Jump { label }).emit(sink, emit_info, state); + sink.emit_island(space_needed + 8, &mut state.ctrl_plane); + sink.bind_label(label, &mut state.ctrl_plane); + } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 78b1ea0eb882..bbaa10b25532 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -169,8 +169,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -179,8 +182,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -190,8 +196,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -298,6 +307,8 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { } Inst::Raw { raw } => generated::get_operands(raw, collector), + + Inst::EmitIsland { .. } => {} } } @@ -745,6 +756,8 @@ impl Inst { format!("br_table {idx} {default:?} {targets:?}") } Inst::Raw { raw } => generated::print(raw), + + Inst::EmitIsland { space_needed } => format!("emit_island {space_needed}"), } } } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 3bde5ea9bb17..5364b78dd0d2 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -649,6 +649,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -684,6 +685,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } @@ -719,6 +721,12 @@ impl ABIMachineSpec for Riscv64MachineDeps { }); } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x12 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(12)) + } } impl Riscv64ABICallSite { diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index acdfa35f5da8..7d6b5c96adac 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -345,6 +345,10 @@ (flags MemFlags) (mask VecOpMasking) (vstate VState)) + + (EmitIsland + ;; The needed space before the next deadline. + (needed_space u32)) )) (type AtomicOP (enum diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 2951e2a8f6db..36e72db5c2e8 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -210,6 +210,7 @@ impl Inst { // some cases. Inst::VecLoad { vstate, .. } | Inst::VecStore { vstate, .. } => Some(vstate), + Inst::EmitIsland { .. } => None, } } } @@ -1133,6 +1134,15 @@ impl Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + *start_off = sink.cur_offset(); } &Inst::CallInd { ref info } => { Inst::Jalr { @@ -1155,6 +1165,15 @@ impl Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + *start_off = sink.cur_offset(); } &Inst::ReturnCall { ref info } => { @@ -2577,7 +2596,14 @@ impl Inst { to.nf(), )); } - }; + + Inst::EmitIsland { needed_space } => { + let jump_around_label = sink.get_label(); + Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); + sink.emit_island(needed_space + 4, &mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 6e593f6901c6..090585b3eb9e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -335,8 +335,11 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -348,8 +351,11 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -680,6 +686,7 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(from); vec_mask_operands(mask, collector); } + Inst::EmitIsland { .. } => {} } } @@ -1612,6 +1619,9 @@ impl Inst { format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") } + Inst::EmitIsland { needed_space } => { + format!("emit_island {needed_space}") + } } } } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 845599cb04a6..50acf177d004 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -914,6 +914,7 @@ impl ABIMachineSpec for S390xMachineDeps { _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, mut outgoing_args_size: u32, ) -> FrameLayout { @@ -985,10 +986,15 @@ impl ABIMachineSpec for S390xMachineDeps { setup_area_size: 0, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + panic!("Should not be called"); + } } impl S390xMachineDeps { diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 139255c2657c..09245bf056a9 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -879,9 +879,14 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + collector.reg_fixed_def(vreg, *preg); + } + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(clobbers); } @@ -899,9 +904,14 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + collector.reg_fixed_def(vreg, *preg); + } + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(clobbers); } diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index c65aa9c2184a..c3e241f859fa 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -11,7 +11,7 @@ use crate::isa::s390x::inst::{ ReturnCallInfo, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, WritableRegPair, }; use crate::isa::s390x::S390xBackend; -use crate::machinst::isle::*; +use crate::machinst::{isle::*, RetLocation}; use crate::machinst::{CallInfo, MachLabel, Reg}; use crate::{ ir::{ @@ -135,7 +135,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { let value_regs = self.lower_ctx.alloc_tmp(ty); defs.push(CallRetPair { vreg: value_regs.only_reg().unwrap(), - preg: reg.into(), + location: RetLocation::Reg(reg.into()), }); } _ => {} @@ -149,8 +149,10 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { fn defs_lookup(&mut self, defs: &CallRetList, reg: RealReg) -> Reg { let reg = Reg::from(reg); for def in defs { - if def.preg == reg { - return def.vreg.to_reg(); + if let RetLocation::Reg(preg) = def.location { + if preg == reg { + return def.vreg.to_reg(); + } } } unreachable!() diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index c911c97014be..55a6821aa137 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -929,6 +929,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -968,10 +969,18 @@ impl ABIMachineSpec for X64ABIMachineSpec { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use r11 as a temp: clobbered anyway, and + // not otherwise used as a return value in any of our + // supported calling conventions. + Writable::from_reg(regs::r11()) + } } impl X64CallSite { @@ -1126,7 +1135,8 @@ fn get_intreg_for_retval( 5 => Some(regs::r8()), 6 => Some(regs::r9()), 7 => Some(regs::r10()), - 8 => Some(regs::r11()), + // NB: `r11` is reserved as a scratch register that is + // also part of the clobber set. // NB: `r15` is reserved as a scratch register. _ => None, }, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b812fab48b13..01f8e281e065 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1624,6 +1624,13 @@ pub(crate) fn emit( ) .emit(sink, info, state); } + + // Load any stack-carried return values. + call_info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, info, state), + |_space_needed| None, + ); } Inst::ReturnCallKnown { info: call_info } => { @@ -1706,6 +1713,13 @@ pub(crate) fn emit( ) .emit(sink, info, state); } + + // Load any stack-carried return values. + call_info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, info, state), + |_space_needed| None, + ); } Inst::Args { .. } => {} diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 033e96c8f96a..3a8f873c75bc 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2445,8 +2445,11 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(*clobbers); } @@ -2472,8 +2475,11 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(*clobbers); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index eea24464a443..d8b0d4ce9eaa 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -179,20 +179,14 @@ fn emit_vm_call( abi.gen_arg(ctx, i, ValueRegs::one(*input)); } - let mut retval_insts: SmallInstVec<_> = smallvec![]; let mut outputs: SmallVec<[_; 1]> = smallvec![]; for i in 0..ctx.sigs().num_rets(ctx.sigs().abi_sig_for_signature(&sig)) { - let (retval_inst, retval_regs) = abi.gen_retval(ctx, i); - retval_insts.extend(retval_inst.into_iter()); + let retval_regs = abi.gen_retval(ctx, i); outputs.push(retval_regs.only_reg().unwrap()); } abi.emit_call(ctx); - for inst in retval_insts { - ctx.emit(inst); - } - Ok(outputs) } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index fbe14d6c2cb1..d661847bfb21 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -282,6 +282,20 @@ pub enum StackAMode { OutgoingArg(i64), } +impl StackAMode { + fn offset_by(&self, offset: u32) -> Self { + match self { + StackAMode::IncomingArg(off, size) => { + StackAMode::IncomingArg(off.checked_add(i64::from(offset)).unwrap(), *size) + } + StackAMode::Slot(off) => StackAMode::Slot(off.checked_add(i64::from(offset)).unwrap()), + StackAMode::OutgoingArg(off) => { + StackAMode::OutgoingArg(off.checked_add(i64::from(offset)).unwrap()) + } + } + } +} + /// Trait implemented by machine-specific backend to represent ISA flags. pub trait IsaFlags: Clone { /// Get a flag indicating whether forward-edge CFI is enabled. @@ -479,6 +493,7 @@ pub trait ABIMachineSpec { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout; @@ -578,6 +593,12 @@ pub trait ABIMachineSpec { call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension; + + /// Get a temporary register that is available to use after a call + /// completes and that does not interfere with register-carried + /// return values. This is used to move stack-carried return + /// values directly into spillslots if needed. + fn retval_temp_reg(call_conv_of_callee: isa::CallConv) -> Writable; } /// Out-of-line data for calls, to keep the size of `Inst` down. @@ -1018,6 +1039,9 @@ pub struct FrameLayout { /// This contains stack slots and spill slots. pub fixed_frame_storage_size: u32, + /// The size of all stackslots. + pub stackslots_size: u32, + /// Stack size to be reserved for outgoing arguments, if used by /// the current ABI, or 0 otherwise. After gen_clobber_save and /// before gen_clobber_restore, the stack pointer points to the @@ -1760,6 +1784,7 @@ impl Callee { self.is_leaf, self.stack_args_size(sigs), self.tail_args_size, + self.stackslots_size, total_stacksize, self.outgoing_args_size, )); @@ -1962,13 +1987,23 @@ pub struct CallArgPair { } /// An output return value from a call instruction: the vreg that is -/// defined, and the preg it is constrained to (per the ABI). +/// defined, and the preg or stack location it is constrained to (per +/// the ABI). #[derive(Clone, Debug)] pub struct CallRetPair { /// The virtual register to define from this return value. pub vreg: Writable, /// The real register from which the return value is read. - pub preg: Reg, + pub location: RetLocation, +} + +/// A location to load a return-value from after a call completes. +#[derive(Clone, Debug)] +pub enum RetLocation { + /// A physical register. + Reg(Reg), + /// A stack location, identified by a `StackAMode`. + Stack(StackAMode, Type), } pub type CallArgList = SmallVec<[CallArgPair; 8]>; @@ -2297,12 +2332,7 @@ impl CallSite { } /// Define a return value after the call returns. - pub fn gen_retval( - &mut self, - ctx: &mut Lower, - idx: usize, - ) -> (SmallInstVec, ValueRegs) { - let mut insts = smallvec![]; + pub fn gen_retval(&mut self, ctx: &mut Lower, idx: usize) -> ValueRegs { let mut into_regs: SmallVec<[Reg; 2]> = smallvec![]; let ret = ctx.sigs().rets(self.sig)[idx].clone(); match ret { @@ -2315,7 +2345,7 @@ impl CallSite { let into_reg = ctx.alloc_tmp(ty).only_reg().unwrap(); self.defs.push(CallRetPair { vreg: into_reg, - preg: reg.into(), + location: RetLocation::Reg(reg.into()), }); into_regs.push(into_reg.to_reg()); } @@ -2326,11 +2356,11 @@ impl CallSite { // ensuring that the return values will be in a consistent place after // any call. let ret_area_base = sig_data.sized_stack_arg_space(); - insts.push(M::gen_load_stack( - StackAMode::OutgoingArg(offset + ret_area_base), - into_reg, - ty, - )); + let amode = StackAMode::OutgoingArg(offset + ret_area_base); + self.defs.push(CallRetPair { + vreg: into_reg, + location: RetLocation::Stack(amode, ty), + }); into_regs.push(into_reg.to_reg()); } } @@ -2349,7 +2379,7 @@ impl CallSite { [a, b] => ValueRegs::two(a, b), _ => panic!("Expected to see one or two slots only from {ret:?}"), }; - (insts, value_regs) + value_regs } /// Emit the call itself. @@ -2386,7 +2416,9 @@ impl CallSite { // Remove retval regs from clobbers. for def in &defs { - clobbers.remove(PReg::from(def.preg.to_real_reg().unwrap())); + if let RetLocation::Reg(preg) = def.location { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + } } clobbers @@ -2438,6 +2470,87 @@ impl CallSite { } } +impl CallInfo { + /// Emit loads for any stack-carried return values using the call + /// info and allocations. + pub fn emit_retval_loads< + M: ABIMachineSpec, + EmitFn: FnMut(M::I), + IslandFn: Fn(u32) -> Option, + >( + &self, + stackslots_size: u32, + mut emit: EmitFn, + emit_island: IslandFn, + ) { + // Count stack-ret locations and emit an island to account for + // this space usage. + let mut space_needed = 0; + for CallRetPair { location, .. } in &self.defs { + if let RetLocation::Stack(..) = location { + // Assume up to ten instructions, semi-arbitrarily: + // load from stack, store to spillslot, codegen of + // large offsets on RISC ISAs. + space_needed += 10 * M::I::worst_case_size(); + } + } + if space_needed > 0 { + if let Some(island_inst) = emit_island(space_needed) { + emit(island_inst); + } + } + + let temp = M::retval_temp_reg(self.callee_conv); + // The temporary must be noted as clobbered. + debug_assert!(M::get_regs_clobbered_by_call(self.callee_conv) + .contains(PReg::from(temp.to_reg().to_real_reg().unwrap()))); + + for CallRetPair { vreg, location } in &self.defs { + match location { + RetLocation::Reg(preg) => { + // The temporary must not also be an actual return + // value register. + debug_assert!(*preg != temp.to_reg()); + } + RetLocation::Stack(amode, ty) => { + if let Some(spillslot) = vreg.to_reg().to_spillslot() { + // `temp` is an integer register of machine word + // width, but `ty` may be floating-point/vector, + // which (i) may not be loadable directly into an + // int reg, and (ii) may be wider than a machine + // word. For simplicity, and because there are not + // always easy choices for volatile float/vec regs + // (see e.g. x86-64, where fastcall clobbers only + // xmm0-xmm5, but tail uses xmm0-xmm7 for + // returns), we use the integer temp register in + // steps. + let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes(); + for part in 0..parts { + emit(M::gen_load_stack( + amode.offset_by(part * M::word_bytes()), + temp, + M::word_type(), + )); + emit(M::gen_store_stack( + StackAMode::Slot( + i64::from(stackslots_size) + + i64::from(M::word_bytes()) + * ((spillslot.index() as i64) + (part as i64)), + ), + temp.to_reg(), + M::word_type(), + )); + } + } else { + assert_ne!(*vreg, temp); + emit(M::gen_load_stack(*amode, *vreg, *ty)); + } + } + } + } + } +} + #[cfg(test)] mod tests { use super::SigData; diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 8875097f1c51..8066d89d7748 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -889,26 +889,19 @@ pub fn gen_call_common( gen_call_common_args(ctx, &mut caller, args); // Handle retvals prior to emitting call, so the - // constraints are on the call instruction; but buffer the - // instructions till after the call. + // constraints are on the call instruction. let mut outputs = InstOutput::new(); - let mut retval_insts = crate::machinst::abi::SmallInstVec::new(); // We take the *last* `num_rets` returns of the sig: // this skips a StructReturn, if any, that is present. let sigdata_num_rets = caller.num_rets(ctx.sigs()); debug_assert!(num_rets <= sigdata_num_rets); for i in (sigdata_num_rets - num_rets)..sigdata_num_rets { - let (retval_inst, retval_regs) = caller.gen_retval(ctx, i); - retval_insts.extend(retval_inst.into_iter()); + let retval_regs = caller.gen_retval(ctx, i); outputs.push(retval_regs); } caller.emit_call(ctx); - for inst in retval_insts { - ctx.emit(inst); - } - outputs } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 8ffc68b4e81f..fce309471ada 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -112,7 +112,8 @@ pub trait MachInst: Clone + Debug { /// Is this an "args" pseudoinst? fn is_args(&self) -> bool; - /// Should this instruction be included in the clobber-set? + /// Should this instruction's clobber-list be included in the + /// clobber-set? fn is_included_in_clobbers(&self) -> bool; /// Does this instruction access memory? diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 2670e3ad12c3..5a6ea1f9b326 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -38,27 +38,41 @@ pub fn first_user_vreg_index() -> usize { PINNED_VREGS } -/// A register named in an instruction. This register can be either a -/// virtual register or a fixed physical register. It does not have -/// any constraints applied to it: those can be added later in -/// `MachInst::get_operands()` when the `Reg`s are converted to -/// `Operand`s. +/// A register named in an instruction. This register can be a virtual +/// register, a fixed physical register, or a named spillslot (after +/// regalloc). It does not have any constraints applied to it: those +/// can be added later in `MachInst::get_operands()` when the `Reg`s +/// are converted to `Operand`s. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Reg(VReg); +pub struct Reg(u32); + +const REG_SPILLSLOT_BIT: u32 = 0x8000_0000; +const REG_SPILLSLOT_MASK: u32 = !REG_SPILLSLOT_BIT; impl Reg { /// Get the physical register (`RealReg`), if this register is /// one. pub fn to_real_reg(self) -> Option { - pinned_vreg_to_preg(self.0).map(RealReg) + pinned_vreg_to_preg(self.0.into()).map(RealReg) } /// Get the virtual (non-physical) register, if this register is /// one. pub fn to_virtual_reg(self) -> Option { - if pinned_vreg_to_preg(self.0).is_none() { - Some(VirtualReg(self.0)) + if self.to_spillslot().is_some() { + None + } else if pinned_vreg_to_preg(self.0.into()).is_none() { + Some(VirtualReg(self.0.into())) + } else { + None + } + } + + /// Get the spillslot, if this register is one. + pub fn to_spillslot(self) -> Option { + if (self.0 & REG_SPILLSLOT_BIT) != 0 { + Some(SpillSlot::new((self.0 & REG_SPILLSLOT_MASK) as usize)) } else { None } @@ -66,7 +80,8 @@ impl Reg { /// Get the class of this register. pub fn class(self) -> RegClass { - self.0.class() + assert!(!self.to_spillslot().is_some()); + VReg::from(self.0).class() } /// Is this a real (physical) reg? @@ -78,12 +93,19 @@ impl Reg { pub fn is_virtual(self) -> bool { self.to_virtual_reg().is_some() } + + /// Is this a spillslot? + pub fn is_spillslot(self) -> bool { + self.to_spillslot().is_some() + } } impl std::fmt::Debug for Reg { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if self.0 == VReg::invalid() { + if VReg::from(self.0) == VReg::invalid() { write!(f, "") + } else if let Some(spillslot) = self.to_spillslot() { + write!(f, "{spillslot}") } else if let Some(rreg) = self.to_real_reg() { let preg: PReg = rreg.into(); write!(f, "{preg}") @@ -197,7 +219,7 @@ impl Writable { impl std::convert::From for Reg { fn from(vreg: regalloc2::VReg) -> Reg { - Reg(vreg) + Reg(vreg.bits() as u32) } } @@ -213,12 +235,12 @@ impl std::convert::From for regalloc2::VReg { /// registers also map to particular (special) VRegs, so this /// method can be used either on virtual or physical `Reg`s. fn from(reg: Reg) -> regalloc2::VReg { - reg.0 + reg.0.into() } } impl std::convert::From<&Reg> for regalloc2::VReg { fn from(reg: &Reg) -> regalloc2::VReg { - reg.0 + reg.0.into() } } @@ -256,19 +278,25 @@ impl std::convert::From for Reg { impl std::convert::From for Reg { fn from(reg: RealReg) -> Reg { - Reg(reg.into()) + Reg(VReg::from(reg).bits() as u32) } } impl std::convert::From for Reg { fn from(reg: VirtualReg) -> Reg { - Reg(reg.0) + Reg(reg.0.bits() as u32) } } /// A spill slot. pub type SpillSlot = regalloc2::SpillSlot; +impl std::convert::From for Reg { + fn from(spillslot: regalloc2::SpillSlot) -> Reg { + Reg(REG_SPILLSLOT_BIT | spillslot.index() as u32) + } +} + /// A register class. Each register in the ISA has one class, and the /// classes are disjoint. Most modern ISAs will have just two classes: /// the integer/general-purpose registers (GPRs), and the float/vector @@ -429,6 +457,19 @@ pub trait OperandVisitorImpl: OperandVisitor { self.add_operand(reg, constraint, OperandKind::Def, OperandPos::Late); } } + + /// Add a def that can be allocated to either a register or a + /// spillslot, at the end of the instruction (`After` + /// position). Use only when this def will be written after all + /// uses are read. + fn any_def(&mut self, reg: &mut Writable>) { + self.add_operand( + reg.reg.as_mut(), + OperandConstraint::Any, + OperandKind::Def, + OperandPos::Late, + ); + } } impl OperandVisitorImpl for T {} @@ -441,9 +482,10 @@ impl<'a, F: Fn(VReg) -> VReg> OperandVisitor for OperandCollector<'a, F> { kind: OperandKind, pos: OperandPos, ) { - reg.0 = (self.renamer)(reg.0); + debug_assert!(!reg.is_spillslot()); + reg.0 = (self.renamer)(VReg::from(reg.0)).bits() as u32; self.operands - .push(Operand::new(reg.0, constraint, kind, pos)); + .push(Operand::new(VReg::from(reg.0), constraint, kind, pos)); } fn debug_assert_is_allocatable_preg(&self, reg: PReg, expected: bool) { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 45c58a78ae83..dd6f48668b75 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -672,16 +672,6 @@ impl VCode { } for (i, range) in self.operand_ranges.iter() { - // Skip this instruction if not "included in clobbers" as - // per the MachInst. (Some backends use this to implement - // ABI specifics; e.g., excluding calls of the same ABI as - // the current function from clobbers, because by - // definition everything clobbered by the call can be - // clobbered by this function without saving as well.) - if !self.insts[i].is_included_in_clobbers() { - continue; - } - let operands = &self.operands[range.clone()]; let allocs = ®alloc.allocs[range]; for (operand, alloc) in operands.iter().zip(allocs.iter()) { @@ -693,8 +683,28 @@ impl VCode { } // Also add explicitly-clobbered registers. - if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { - clobbered.union_from(inst_clobbered); + // + // Skip merging this instruction's clobber list if not + // "included in clobbers" as per the MachInst. (Some + // backends use this to implement ABI specifics; e.g., + // excluding calls of the same ABI as the current function + // from clobbers, because by definition everything + // clobbered by the call can be clobbered by this function + // without saving as well. + // + // This is important for a particular optimization: when + // some registers are "half-clobbered", e.g. vector/float + // registers on aarch64, we want them to be seen as + // clobbered by regalloc so it avoids carrying values + // across calls in these registers but not seen as + // clobbered by prologue generation here (because the + // actual half-clobber implied by the clobber list fits + // within the clobbers that we allow without + // clobber-saves). + if self.insts[i].is_included_in_clobbers() { + if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { + clobbered.union_from(inst_clobbered); + } } } @@ -933,17 +943,19 @@ impl VCode { let mut allocs = regalloc.inst_allocs(iix).iter(); self.insts[iix.index()].get_operands( &mut |reg: &mut Reg, constraint, _kind, _pos| { - let alloc = allocs - .next() - .expect("enough allocations for all operands") - .as_reg() - .expect("only register allocations, not stack allocations") - .into(); - - if let OperandConstraint::FixedReg(rreg) = constraint { - debug_assert_eq!(Reg::from(rreg), alloc); + let alloc = + allocs.next().expect("enough allocations for all operands"); + + if let Some(alloc) = alloc.as_reg() { + let alloc: Reg = alloc.into(); + if let OperandConstraint::FixedReg(rreg) = constraint { + debug_assert_eq!(Reg::from(rreg), alloc); + } + *reg = alloc; + } else if let Some(alloc) = alloc.as_stack() { + let alloc: Reg = alloc.into(); + *reg = alloc; } - *reg = alloc; }, ); debug_assert!(allocs.next().is_none()); diff --git a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif index 41045f0acb7b..9ba81c4ec643 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif @@ -380,32 +380,23 @@ block0: ; VCode: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; sub sp, sp, #160 +; stp x27, x28, [sp, #-16]! +; stp x25, x26, [sp, #-16]! +; stp x23, x24, [sp, #-16]! +; stp x21, x22, [sp, #-16]! +; stp x19, x20, [sp, #-16]! +; sub sp, sp, #240 ; block0: ; mov x8, sp ; load_ext_name x12, TestCase(%tail_callee_stack_rets)+0 ; blr x12 -; ldr x9, [sp] -; ldr x11, [sp, #8] -; ldr x13, [sp, #16] -; ldr x15, [sp, #24] -; ldr x1, [sp, #32] -; ldr x3, [sp, #40] -; ldr x5, [sp, #48] -; ldr x7, [sp, #56] -; ldr x9, [sp, #64] -; ldr x11, [sp, #72] -; ldr x13, [sp, #80] -; ldr x15, [sp, #88] -; ldr x1, [sp, #96] -; ldr x3, [sp, #104] -; ldr x5, [sp, #112] -; ldr x7, [sp, #120] -; ldr x9, [sp, #128] -; ldr x11, [sp, #136] -; ldr x13, [sp, #144] -; ldr x2, [sp, #152] -; add sp, sp, #160 +; ldr x2, [sp, #232] +; add sp, sp, #240 +; ldp x19, x20, [sp], #16 +; ldp x21, x22, [sp], #16 +; ldp x23, x24, [sp], #16 +; ldp x25, x26, [sp], #16 +; ldp x27, x28, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret ; @@ -413,35 +404,56 @@ block0: ; block0: ; offset 0x0 ; stp x29, x30, [sp, #-0x10]! ; mov x29, sp -; sub sp, sp, #0xa0 -; block1: ; offset 0xc +; stp x27, x28, [sp, #-0x10]! +; stp x25, x26, [sp, #-0x10]! +; stp x23, x24, [sp, #-0x10]! +; stp x21, x22, [sp, #-0x10]! +; stp x19, x20, [sp, #-0x10]! +; sub sp, sp, #0xf0 +; block1: ; offset 0x20 ; mov x8, sp -; ldr x12, #0x18 -; b #0x20 +; ldr x12, #0x2c +; b #0x34 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x12 ; ldur x9, [sp] -; ldur x11, [sp, #8] -; ldur x13, [sp, #0x10] -; ldur x15, [sp, #0x18] -; ldur x1, [sp, #0x20] -; ldur x3, [sp, #0x28] -; ldur x5, [sp, #0x30] -; ldur x7, [sp, #0x38] +; stur x9, [sp, #0xa0] +; ldur x9, [sp, #8] +; stur x9, [sp, #0xa8] +; ldur x9, [sp, #0x10] +; stur x9, [sp, #0xb0] +; ldur x9, [sp, #0x18] +; stur x9, [sp, #0xb8] +; ldur x9, [sp, #0x20] +; stur x9, [sp, #0xc0] +; ldur x9, [sp, #0x28] +; stur x9, [sp, #0xc8] +; ldur x9, [sp, #0x30] +; stur x9, [sp, #0xd0] +; ldur x9, [sp, #0x38] +; stur x9, [sp, #0xd8] ; ldur x9, [sp, #0x40] -; ldur x11, [sp, #0x48] -; ldur x13, [sp, #0x50] -; ldur x15, [sp, #0x58] -; ldur x1, [sp, #0x60] -; ldur x3, [sp, #0x68] -; ldur x5, [sp, #0x70] -; ldur x7, [sp, #0x78] -; ldur x9, [sp, #0x80] -; ldur x11, [sp, #0x88] -; ldur x13, [sp, #0x90] -; ldur x2, [sp, #0x98] -; add sp, sp, #0xa0 +; stur x9, [sp, #0xe0] +; ldur x25, [sp, #0x48] +; ldur x26, [sp, #0x50] +; ldur x27, [sp, #0x58] +; ldur x28, [sp, #0x60] +; ldur x21, [sp, #0x68] +; ldur x19, [sp, #0x70] +; ldur x20, [sp, #0x78] +; ldur x22, [sp, #0x80] +; ldur x23, [sp, #0x88] +; ldur x24, [sp, #0x90] +; ldur x9, [sp, #0x98] +; stur x9, [sp, #0xe8] +; ldur x2, [sp, #0xe8] +; add sp, sp, #0xf0 +; ldp x19, x20, [sp], #0x10 +; ldp x21, x22, [sp], #0x10 +; ldp x23, x24, [sp], #0x10 +; ldp x25, x26, [sp], #0x10 +; ldp x27, x28, [sp], #0x10 ; ldp x29, x30, [sp], #0x10 ; ret @@ -620,7 +632,7 @@ block0: ; stp x23, x24, [sp, #-16]! ; stp x21, x22, [sp, #-16]! ; stp x19, x20, [sp, #-16]! -; sub sp, sp, #320 +; sub sp, sp, #400 ; block0: ; movz x2, #10 ; movz x3, #15 @@ -671,27 +683,8 @@ block0: ; add x8, sp, #160 ; load_ext_name x10, TestCase(%tail_callee_stack_args_and_rets)+0 ; blr x10 -; ldr x7, [sp, #160] -; ldr x9, [sp, #168] -; ldr x11, [sp, #176] -; ldr x13, [sp, #184] -; ldr x15, [sp, #192] -; ldr x1, [sp, #200] -; ldr x3, [sp, #208] -; ldr x5, [sp, #216] -; ldr x7, [sp, #224] -; ldr x9, [sp, #232] -; ldr x11, [sp, #240] -; ldr x13, [sp, #248] -; ldr x15, [sp, #256] -; ldr x1, [sp, #264] -; ldr x3, [sp, #272] -; ldr x5, [sp, #280] -; ldr x7, [sp, #288] -; ldr x9, [sp, #296] -; ldr x11, [sp, #304] -; ldr x2, [sp, #312] -; add sp, sp, #320 +; ldr x2, [sp, #392] +; add sp, sp, #400 ; ldp x19, x20, [sp], #16 ; ldp x21, x22, [sp], #16 ; ldp x23, x24, [sp], #16 @@ -709,7 +702,7 @@ block0: ; stp x23, x24, [sp, #-0x10]! ; stp x21, x22, [sp, #-0x10]! ; stp x19, x20, [sp, #-0x10]! -; sub sp, sp, #0x140 +; sub sp, sp, #0x190 ; block1: ; offset 0x20 ; mov x2, #0xa ; mov x3, #0xf @@ -764,27 +757,38 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x10 ; sub sp, sp, #0xa0 -; ldur x7, [sp, #0xa0] +; ldur x9, [sp, #0xa0] +; str x9, [sp, #0x140] ; ldur x9, [sp, #0xa8] -; ldur x11, [sp, #0xb0] -; ldur x13, [sp, #0xb8] -; ldur x15, [sp, #0xc0] -; ldur x1, [sp, #0xc8] -; ldur x3, [sp, #0xd0] -; ldur x5, [sp, #0xd8] -; ldur x7, [sp, #0xe0] -; ldur x9, [sp, #0xe8] -; ldur x11, [sp, #0xf0] -; ldur x13, [sp, #0xf8] -; ldr x15, [sp, #0x100] -; ldr x1, [sp, #0x108] -; ldr x3, [sp, #0x110] -; ldr x5, [sp, #0x118] -; ldr x7, [sp, #0x120] -; ldr x9, [sp, #0x128] -; ldr x11, [sp, #0x130] -; ldr x2, [sp, #0x138] -; add sp, sp, #0x140 +; str x9, [sp, #0x148] +; ldur x9, [sp, #0xb0] +; str x9, [sp, #0x150] +; ldur x9, [sp, #0xb8] +; str x9, [sp, #0x158] +; ldur x9, [sp, #0xc0] +; str x9, [sp, #0x160] +; ldur x9, [sp, #0xc8] +; str x9, [sp, #0x168] +; ldur x9, [sp, #0xd0] +; str x9, [sp, #0x170] +; ldur x9, [sp, #0xd8] +; str x9, [sp, #0x178] +; ldur x9, [sp, #0xe0] +; str x9, [sp, #0x180] +; ldur x25, [sp, #0xe8] +; ldur x26, [sp, #0xf0] +; ldur x27, [sp, #0xf8] +; ldr x28, [sp, #0x100] +; ldr x21, [sp, #0x108] +; ldr x19, [sp, #0x110] +; ldr x20, [sp, #0x118] +; ldr x22, [sp, #0x120] +; ldr x23, [sp, #0x128] +; ldr x24, [sp, #0x130] +; ldr x9, [sp, #0x138] +; str x9, [sp, #0x188] +; ldr x2, [sp, #0x188] +; add sp, sp, #0x190 ; ldp x19, x20, [sp], #0x10 ; ldp x21, x22, [sp], #0x10 ; ldp x23, x24, [sp], #0x10 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6231d59c0dc0..943300b2f3df 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -214,80 +214,71 @@ block0: } ; VCode: -; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; xmov x21, x12 -; x28 = xload64 OutgoingArg(0) // flags = notrap aligned -; x16 = xload64 OutgoingArg(8) // flags = notrap aligned -; x12 = xload64 OutgoingArg(16) // flags = notrap aligned -; x15 = xload64 OutgoingArg(24) // flags = notrap aligned -; x22 = xload64 OutgoingArg(32) // flags = notrap aligned -; x24 = xload64 OutgoingArg(40) // flags = notrap aligned -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; ret ; ; Disassembled: -; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28 +; push_frame_save 112, x16, x17, x18, x19, x26, x27, x28, x29 ; xmov x12, sp ; call1 x12, 0x0 // target = 0x8 -; xmov x21, x12 -; xload64le_o32 x28, sp, 0 -; xload64le_o32 x16, sp, 8 -; xload64le_o32 x12, sp, 16 -; xload64le_o32 x15, sp, 24 -; xload64le_o32 x22, sp, 32 -; xload64le_o32 x24, sp, 40 -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28 +; jump 0x5 // target = 0x13 +; xload64le_o32 x27, sp, 0 +; xload64le_o32 x19, sp, 8 +; xload64le_o32 x29, sp, 16 +; xload64le_o32 x16, sp, 24 +; xload64le_o32 x17, sp, 32 +; xload64le_o32 x18, sp, 40 +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, x16, x17, x18, x19, x26, x27, x28, x29 ; ret function %call_indirect(i32) -> i64 { @@ -301,7 +292,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index bde96cabe9a0..fb2c0e4c4cc7 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -214,80 +214,71 @@ block0: } ; VCode: -; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; xmov x21, x12 -; x28 = xload64 OutgoingArg(0) // flags = notrap aligned -; x16 = xload64 OutgoingArg(8) // flags = notrap aligned -; x12 = xload64 OutgoingArg(16) // flags = notrap aligned -; x15 = xload64 OutgoingArg(24) // flags = notrap aligned -; x22 = xload64 OutgoingArg(32) // flags = notrap aligned -; x24 = xload64 OutgoingArg(40) // flags = notrap aligned -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; ret ; ; Disassembled: -; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28 +; push_frame_save 112, x16, x17, x18, x19, x26, x27, x28, x29 ; xmov x12, sp ; call1 x12, 0x0 // target = 0x8 -; xmov x21, x12 -; xload64le_o32 x28, sp, 0 -; xload64le_o32 x16, sp, 8 -; xload64le_o32 x12, sp, 16 -; xload64le_o32 x15, sp, 24 -; xload64le_o32 x22, sp, 32 -; xload64le_o32 x24, sp, 40 -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28 +; jump 0x5 // target = 0x13 +; xload64le_o32 x27, sp, 0 +; xload64le_o32 x19, sp, 8 +; xload64le_o32 x29, sp, 16 +; xload64le_o32 x16, sp, 24 +; xload64le_o32 x17, sp, 32 +; xload64le_o32 x18, sp, 40 +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, x16, x17, x18, x19, x26, x27, x28, x29 ; ret function %call_indirect(i64) -> i64 { @@ -301,7 +292,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -397,7 +388,7 @@ block0(v0: i32): ; xstore64 sp+1000008, x20 // flags = notrap aligned ; block0: ; xmov x20, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x5, x20 ; xadd32 x0, x5, x0 ; x20 = xload64 sp+1000008 // flags = notrap aligned diff --git a/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif index b276f8865781..219c0e01849d 100644 --- a/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif @@ -475,36 +475,35 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; addi sp,sp,-192 +; addi sp,sp,-400 +; sd s1,392(sp) +; sd s2,384(sp) +; sd s3,376(sp) +; sd s4,368(sp) +; sd s5,360(sp) +; sd s6,352(sp) +; sd s7,344(sp) +; sd s8,336(sp) +; sd s9,328(sp) +; sd s10,320(sp) +; sd s11,312(sp) ; block0: ; load_addr a0,0(sp) ; load_sym a4,%tail_callee_stack_rets+0 ; callind a4 -; ld a5,0(sp) -; ld a1,8(sp) -; ld a3,16(sp) -; ld a5,24(sp) -; ld a1,32(sp) -; ld a3,40(sp) -; ld a5,48(sp) -; ld a1,56(sp) -; ld a3,64(sp) -; ld a5,72(sp) -; ld a1,80(sp) -; ld a3,88(sp) -; ld a5,96(sp) -; ld a1,104(sp) -; ld a3,112(sp) -; ld a5,120(sp) -; ld a1,128(sp) -; ld a3,136(sp) -; ld a5,144(sp) -; ld a1,152(sp) -; ld a3,160(sp) -; ld a5,168(sp) -; ld a1,176(sp) -; ld a0,184(sp) -; addi sp,sp,192 +; ld a0,96(slot) +; ld s1,392(sp) +; ld s2,384(sp) +; ld s3,376(sp) +; ld s4,368(sp) +; ld s5,360(sp) +; ld s6,352(sp) +; ld s7,344(sp) +; ld s8,336(sp) +; ld s9,328(sp) +; ld s10,320(sp) +; ld s11,312(sp) +; addi sp,sp,400 ; ld ra,8(sp) ; ld fp,0(sp) ; addi sp,sp,16 @@ -516,8 +515,19 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; addi sp, sp, -0xc0 -; block1: ; offset 0x14 +; addi sp, sp, -0x190 +; sd s1, 0x188(sp) +; sd s2, 0x180(sp) +; sd s3, 0x178(sp) +; sd s4, 0x170(sp) +; sd s5, 0x168(sp) +; sd s6, 0x160(sp) +; sd s7, 0x158(sp) +; sd s8, 0x150(sp) +; sd s9, 0x148(sp) +; sd s10, 0x140(sp) +; sd s11, 0x138(sp) +; block1: ; offset 0x40 ; mv a0, sp ; auipc a4, 0 ; ld a4, 0xc(a4) @@ -525,31 +535,57 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; jalr a4 -; ld a5, 0(sp) -; ld a1, 8(sp) -; ld a3, 0x10(sp) -; ld a5, 0x18(sp) -; ld a1, 0x20(sp) -; ld a3, 0x28(sp) -; ld a5, 0x30(sp) -; ld a1, 0x38(sp) -; ld a3, 0x40(sp) -; ld a5, 0x48(sp) -; ld a1, 0x50(sp) -; ld a3, 0x58(sp) -; ld a5, 0x60(sp) -; ld a1, 0x68(sp) -; ld a3, 0x70(sp) -; ld a5, 0x78(sp) -; ld a1, 0x80(sp) -; ld a3, 0x88(sp) -; ld a5, 0x90(sp) -; ld a1, 0x98(sp) -; ld a3, 0xa0(sp) -; ld a5, 0xa8(sp) -; ld a1, 0xb0(sp) -; ld a0, 0xb8(sp) -; addi sp, sp, 0xc0 +; j 4 +; ld a2, 0(sp) +; sd a2, 0xc0(sp) +; ld a2, 8(sp) +; sd a2, 0xc8(sp) +; ld a2, 0x10(sp) +; sd a2, 0xd0(sp) +; ld a2, 0x18(sp) +; sd a2, 0xd8(sp) +; ld a2, 0x20(sp) +; sd a2, 0xe0(sp) +; ld a2, 0x28(sp) +; sd a2, 0xe8(sp) +; ld a2, 0x30(sp) +; sd a2, 0xf0(sp) +; ld a2, 0x38(sp) +; sd a2, 0xf8(sp) +; ld a2, 0x40(sp) +; sd a2, 0x100(sp) +; ld a2, 0x48(sp) +; sd a2, 0x108(sp) +; ld a2, 0x50(sp) +; sd a2, 0x110(sp) +; ld a2, 0x58(sp) +; sd a2, 0x118(sp) +; ld s8, 0x60(sp) +; ld s9, 0x68(sp) +; ld s10, 0x70(sp) +; ld s11, 0x78(sp) +; ld s7, 0x80(sp) +; ld s6, 0x88(sp) +; ld s5, 0x90(sp) +; ld s4, 0x98(sp) +; ld s3, 0xa0(sp) +; ld s2, 0xa8(sp) +; ld s1, 0xb0(sp) +; ld a2, 0xb8(sp) +; sd a2, 0x120(sp) +; ld a0, 0x120(sp) +; ld s1, 0x188(sp) +; ld s2, 0x180(sp) +; ld s3, 0x178(sp) +; ld s4, 0x170(sp) +; ld s5, 0x168(sp) +; ld s6, 0x160(sp) +; ld s7, 0x158(sp) +; ld s8, 0x150(sp) +; ld s9, 0x148(sp) +; ld s10, 0x140(sp) +; ld s11, 0x138(sp) +; addi sp, sp, 0x190 ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10 @@ -773,23 +809,23 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; addi sp,sp,-464 -; sd s1,456(sp) -; sd s2,448(sp) -; sd s3,440(sp) -; sd s4,432(sp) -; sd s5,424(sp) -; sd s6,416(sp) -; sd s7,408(sp) -; sd s8,400(sp) -; sd s9,392(sp) -; sd s10,384(sp) -; sd s11,376(sp) +; addi sp,sp,-560 +; sd s1,552(sp) +; sd s2,544(sp) +; sd s3,536(sp) +; sd s4,528(sp) +; sd s5,520(sp) +; sd s6,512(sp) +; sd s7,504(sp) +; sd s8,496(sp) +; sd s9,488(sp) +; sd s10,480(sp) +; sd s11,472(sp) ; block0: ; li a2,10 -; sd a2,8(slot) +; sd a2,0(slot) ; li a3,15 -; sd a3,0(slot) +; sd a3,96(slot) ; li a3,20 ; li a4,25 ; li a5,30 @@ -835,45 +871,22 @@ block0: ; sd a2,144(sp) ; load_addr a0,160(sp) ; load_sym t1,%tail_callee_stack_args_and_rets+0 -; ld a1,8(slot) -; ld a2,0(slot) +; ld a1,0(slot) +; ld a2,96(slot) ; callind t1 -; ld a2,160(sp) -; ld a4,168(sp) -; ld a0,176(sp) -; ld a2,184(sp) -; ld a4,192(sp) -; ld a0,200(sp) -; ld a2,208(sp) -; ld a4,216(sp) -; ld a0,224(sp) -; ld a2,232(sp) -; ld a4,240(sp) -; ld a0,248(sp) -; ld a2,256(sp) -; ld a4,264(sp) -; ld a0,272(sp) -; ld a2,280(sp) -; ld a4,288(sp) -; ld a0,296(sp) -; ld a2,304(sp) -; ld a4,312(sp) -; ld a0,320(sp) -; ld a2,328(sp) -; ld a4,336(sp) -; ld a0,344(sp) -; ld s1,456(sp) -; ld s2,448(sp) -; ld s3,440(sp) -; ld s4,432(sp) -; ld s5,424(sp) -; ld s6,416(sp) -; ld s7,408(sp) -; ld s8,400(sp) -; ld s9,392(sp) -; ld s10,384(sp) -; ld s11,376(sp) -; addi sp,sp,464 +; ld a0,96(slot) +; ld s1,552(sp) +; ld s2,544(sp) +; ld s3,536(sp) +; ld s4,528(sp) +; ld s5,520(sp) +; ld s6,512(sp) +; ld s7,504(sp) +; ld s8,496(sp) +; ld s9,488(sp) +; ld s10,480(sp) +; ld s11,472(sp) +; addi sp,sp,560 ; ld ra,8(sp) ; ld fp,0(sp) ; addi sp,sp,16 @@ -885,23 +898,23 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; addi sp, sp, -0x1d0 -; sd s1, 0x1c8(sp) -; sd s2, 0x1c0(sp) -; sd s3, 0x1b8(sp) -; sd s4, 0x1b0(sp) -; sd s5, 0x1a8(sp) -; sd s6, 0x1a0(sp) -; sd s7, 0x198(sp) -; sd s8, 0x190(sp) -; sd s9, 0x188(sp) -; sd s10, 0x180(sp) -; sd s11, 0x178(sp) +; addi sp, sp, -0x230 +; sd s1, 0x228(sp) +; sd s2, 0x220(sp) +; sd s3, 0x218(sp) +; sd s4, 0x210(sp) +; sd s5, 0x208(sp) +; sd s6, 0x200(sp) +; sd s7, 0x1f8(sp) +; sd s8, 0x1f0(sp) +; sd s9, 0x1e8(sp) +; sd s10, 0x1e0(sp) +; sd s11, 0x1d8(sp) ; block1: ; offset 0x40 ; addi a2, zero, 0xa -; sd a2, 0x168(sp) +; sd a2, 0x160(sp) ; addi a3, zero, 0xf -; sd a3, 0x160(sp) +; sd a3, 0x1c0(sp) ; addi a3, zero, 0x14 ; addi a4, zero, 0x19 ; addi a5, zero, 0x1e @@ -951,46 +964,61 @@ block0: ; j 0xc ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 -; ld a1, 0x168(sp) -; ld a2, 0x160(sp) +; ld a1, 0x160(sp) +; ld a2, 0x1c0(sp) ; jalr t1 ; addi sp, sp, -0xa0 +; j 4 ; ld a2, 0xa0(sp) -; ld a4, 0xa8(sp) -; ld a0, 0xb0(sp) +; sd a2, 0x160(sp) +; ld a2, 0xa8(sp) +; sd a2, 0x168(sp) +; ld a2, 0xb0(sp) +; sd a2, 0x170(sp) ; ld a2, 0xb8(sp) -; ld a4, 0xc0(sp) -; ld a0, 0xc8(sp) +; sd a2, 0x178(sp) +; ld a2, 0xc0(sp) +; sd a2, 0x180(sp) +; ld a2, 0xc8(sp) +; sd a2, 0x188(sp) ; ld a2, 0xd0(sp) -; ld a4, 0xd8(sp) -; ld a0, 0xe0(sp) +; sd a2, 0x190(sp) +; ld a2, 0xd8(sp) +; sd a2, 0x198(sp) +; ld a2, 0xe0(sp) +; sd a2, 0x1a0(sp) ; ld a2, 0xe8(sp) -; ld a4, 0xf0(sp) -; ld a0, 0xf8(sp) -; ld a2, 0x100(sp) -; ld a4, 0x108(sp) -; ld a0, 0x110(sp) -; ld a2, 0x118(sp) -; ld a4, 0x120(sp) -; ld a0, 0x128(sp) -; ld a2, 0x130(sp) -; ld a4, 0x138(sp) -; ld a0, 0x140(sp) -; ld a2, 0x148(sp) -; ld a4, 0x150(sp) -; ld a0, 0x158(sp) -; ld s1, 0x1c8(sp) -; ld s2, 0x1c0(sp) -; ld s3, 0x1b8(sp) -; ld s4, 0x1b0(sp) -; ld s5, 0x1a8(sp) -; ld s6, 0x1a0(sp) -; ld s7, 0x198(sp) -; ld s8, 0x190(sp) -; ld s9, 0x188(sp) -; ld s10, 0x180(sp) -; ld s11, 0x178(sp) -; addi sp, sp, 0x1d0 +; sd a2, 0x1a8(sp) +; ld a2, 0xf0(sp) +; sd a2, 0x1b0(sp) +; ld a2, 0xf8(sp) +; sd a2, 0x1b8(sp) +; ld s8, 0x100(sp) +; ld s9, 0x108(sp) +; ld s10, 0x110(sp) +; ld s11, 0x118(sp) +; ld s7, 0x120(sp) +; ld s6, 0x128(sp) +; ld s5, 0x130(sp) +; ld s4, 0x138(sp) +; ld s3, 0x140(sp) +; ld s2, 0x148(sp) +; ld s1, 0x150(sp) +; ld a2, 0x158(sp) +; sd a2, 0x1c0(sp) +; ld a0, 0x1c0(sp) +; ld s1, 0x228(sp) +; ld s2, 0x220(sp) +; ld s3, 0x218(sp) +; ld s4, 0x210(sp) +; ld s5, 0x208(sp) +; ld s6, 0x200(sp) +; ld s7, 0x1f8(sp) +; ld s8, 0x1f0(sp) +; ld s9, 0x1e8(sp) +; ld s10, 0x1e0(sp) +; ld s11, 0x1d8(sp) +; addi sp, sp, 0x230 ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10 diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index 13d6ae00f19a..92270306919e 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -314,6 +314,7 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: } ; VCode: +; stmg %r6, %r15, 48(%r15) ; block0: ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) @@ -335,10 +336,13 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: ; vaq %v5, %v6, %v7 ; vaq %v4, %v4, %v5 ; vst %v4, 0(%r2) +; lmg %r6, %r15, 48(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 +; stmg %r6, %r15, 0x30(%r15) +; block1: ; offset 0x6 ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) ; vl %v5, 0(%r5) @@ -359,6 +363,7 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: ; vaq %v5, %v6, %v7 ; vaq %v4, %v4, %v5 ; vst %v4, 0(%r2) +; lmg %r6, %r15, 0x30(%r15) ; br %r14 function %call_sret() -> i64 { diff --git a/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif b/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif new file mode 100644 index 000000000000..eecbda69d2b2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif @@ -0,0 +1,179 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +target x86_64 + +function %f(i32) -> i64 { + fn0 = %ext(i32) -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 + +block0(v0: i32): + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20 = call fn0(v0) + + v21 = iadd v1, v2 + v22 = iadd v3, v4 + v23 = iadd v5, v6 + v24 = iadd v7, v8 + v25 = iadd v9, v10 + v26 = iadd v11, v12 + v27 = iadd v13, v14 + v28 = iadd v15, v16 + v29 = iadd v17, v18 + v30 = iadd v19, v20 + + v31 = iadd v21, v22 + v32 = iadd v23, v24 + v33 = iadd v25, v26 + v34 = iadd v27, v28 + v35 = iadd v29, v30 + + v36 = iadd v31, v32 + v37 = iadd v33, v34 + v38 = iadd v35, v36 + v39 = iadd v37, v38 + + return v39 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $304, %rsp +; movq %rbx, 256(%rsp) +; movq %r12, 264(%rsp) +; movq %r13, 272(%rsp) +; movq %r14, 280(%rsp) +; movq %r15, 288(%rsp) +; block0: +; movq %rdi, %rsi +; lea 0(%rsp), %rdi +; load_ext_name %ext+0, %r10 +; call *%r10 +; lea 0(%rax,%rdx,1), %r8 +; lea 0(%rbx,%r15,1), %r9 +; lea 0(%r13,%r12,1), %r10 +; movq rsp(0 + virtual offset), %rcx +; lea 0(%rcx,%r14,1), %r11 +; movq rsp(8 + virtual offset), %rcx +; movq rsp(16 + virtual offset), %rdi +; lea 0(%rcx,%rdi,1), %rsi +; movq rsp(32 + virtual offset), %rdx +; movq rsp(24 + virtual offset), %rdi +; lea 0(%rdi,%rdx,1), %rdi +; movq rsp(40 + virtual offset), %rax +; movq rsp(48 + virtual offset), %rcx +; lea 0(%rax,%rcx,1), %rax +; movq rsp(64 + virtual offset), %rcx +; movq rsp(56 + virtual offset), %rdx +; lea 0(%rdx,%rcx,1), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(72 + virtual offset), %r14 +; lea 0(%r14,%rdx,1), %rdx +; movq rsp(96 + virtual offset), %rbx +; movq rsp(88 + virtual offset), %r13 +; lea 0(%r13,%rbx,1), %r14 +; lea 0(%r8,%r9,1), %r8 +; lea 0(%r10,%r11,1), %r9 +; lea 0(%rsi,%rdi,1), %r10 +; lea 0(%rax,%rcx,1), %r11 +; lea 0(%rdx,%r14,1), %rsi +; lea 0(%r8,%r9,1), %r8 +; lea 0(%r10,%r11,1), %r9 +; lea 0(%rsi,%r8,1), %r8 +; lea 0(%r9,%r8,1), %rax +; movq 256(%rsp), %rbx +; movq 264(%rsp), %r12 +; movq 272(%rsp), %r13 +; movq 280(%rsp), %r14 +; movq 288(%rsp), %r15 +; addq %rsp, $304, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x130, %rsp +; movq %rbx, 0x100(%rsp) +; movq %r12, 0x108(%rsp) +; movq %r13, 0x110(%rsp) +; movq %r14, 0x118(%rsp) +; movq %r15, 0x120(%rsp) +; block1: ; offset 0x33 +; movq %rdi, %rsi +; leaq (%rsp), %rdi +; movabsq $0, %r10 ; reloc_external Abs8 %ext 0 +; callq *%r10 +; movq (%rsp), %rbx +; movq 8(%rsp), %r15 +; movq 0x10(%rsp), %r13 +; movq 0x18(%rsp), %r12 +; movq 0x20(%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 0x28(%rsp), %r14 +; movq 0x30(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0xa0(%rsp) +; movq 0x40(%rsp), %r11 +; movq %r11, 0xa8(%rsp) +; movq 0x48(%rsp), %r11 +; movq %r11, 0xb0(%rsp) +; movq 0x50(%rsp), %r11 +; movq %r11, 0xb8(%rsp) +; movq 0x58(%rsp), %r11 +; movq %r11, 0xc0(%rsp) +; movq 0x60(%rsp), %r11 +; movq %r11, 0xc8(%rsp) +; movq 0x68(%rsp), %r11 +; movq %r11, 0xd0(%rsp) +; movq 0x70(%rsp), %r11 +; movq %r11, 0xd8(%rsp) +; movq 0x78(%rsp), %r11 +; movq %r11, 0xe0(%rsp) +; movq 0x80(%rsp), %r11 +; movq %r11, 0xe8(%rsp) +; movq 0x88(%rsp), %r11 +; movq %r11, 0xf0(%rsp) +; leaq (%rax, %rdx), %r8 +; leaq (%rbx, %r15), %r9 +; leaq (%r13, %r12), %r10 +; movq 0x90(%rsp), %rcx +; leaq (%rcx, %r14), %r11 +; movq 0x98(%rsp), %rcx +; movq 0xa0(%rsp), %rdi +; leaq (%rcx, %rdi), %rsi +; movq 0xb0(%rsp), %rdx +; movq 0xa8(%rsp), %rdi +; addq %rdx, %rdi +; movq 0xb8(%rsp), %rax +; movq 0xc0(%rsp), %rcx +; addq %rcx, %rax +; movq 0xd0(%rsp), %rcx +; movq 0xc8(%rsp), %rdx +; addq %rdx, %rcx +; movq 0xe0(%rsp), %rdx +; movq 0xd8(%rsp), %r14 +; addq %r14, %rdx +; movq 0xf0(%rsp), %rbx +; movq 0xe8(%rsp), %r13 +; leaq (%r13, %rbx), %r14 +; addq %r9, %r8 +; leaq (%r10, %r11), %r9 +; leaq (%rsi, %rdi), %r10 +; leaq (%rax, %rcx), %r11 +; leaq (%rdx, %r14), %rsi +; addq %r9, %r8 +; leaq (%r10, %r11), %r9 +; addq %rsi, %r8 +; leaq (%r9, %r8), %rax +; movq 0x100(%rsp), %rbx +; movq 0x108(%rsp), %r12 +; movq 0x110(%rsp), %r13 +; movq 0x118(%rsp), %r14 +; movq 0x120(%rsp), %r15 +; addq $0x130, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 1a03ba7aa5ea..6b16c75d3fcb 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1287,16 +1287,17 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $32, %rsp -; movq %r13, 16(%rsp) +; movq %r12, 16(%rsp) +; movq %r13, 24(%rsp) ; block0: ; movq %rdi, %r13 ; lea 0(%rsp), %rdi ; load_ext_name %g+0, %r9 ; call *%r9 -; movq 0(%rsp), %r8 ; movq %r13, %rdi -; movq %r8, 0(%rdi) -; movq 16(%rsp), %r13 +; movq %r12, 0(%rdi) +; movq 16(%rsp), %r12 +; movq 24(%rsp), %r13 ; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -1307,16 +1308,18 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; subq $0x20, %rsp -; movq %r13, 0x10(%rsp) -; block1: ; offset 0xd +; movq %r12, 0x10(%rsp) +; movq %r13, 0x18(%rsp) +; block1: ; offset 0x12 ; movq %rdi, %r13 ; leaq (%rsp), %rdi ; movabsq $0, %r9 ; reloc_external Abs8 %g 0 ; callq *%r9 -; movq (%rsp), %r8 +; movq (%rsp), %r12 ; movq %r13, %rdi -; movq %r8, (%rdi) -; movq 0x10(%rsp), %r13 +; movq %r12, (%rdi) +; movq 0x10(%rsp), %r12 +; movq 0x18(%rsp), %r13 ; addq $0x20, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif index 828be1ef89d3..44e26d71a9d0 100644 --- a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif @@ -208,24 +208,24 @@ block0: ; movq %r9, rsp(64 + virtual offset) ; movl $40, %r10d ; movq %r10, rsp(56 + virtual offset) -; movl $45, %r11d -; movq %r11, rsp(48 + virtual offset) -; movl $50, %r11d -; movq %r11, rsp(40 + virtual offset) +; movl $45, %r10d +; movq %r10, rsp(48 + virtual offset) +; movl $50, %r13d ; movl $55, %r14d ; movl $60, %r15d ; movl $65, %ebx ; movl $70, %r12d -; movl $75, %r13d +; movl $75, %r11d ; movl $80, %eax ; movl $85, %ecx ; movl $90, %edx ; movl $95, %esi -; movq %rsi, rsp(32 + virtual offset) +; movq %rsi, rsp(40 + virtual offset) ; movl $100, %r8d ; movl $105, %r9d ; movl $110, %r10d -; movl $115, %r11d +; movl $115, %esi +; movq %rsi, rsp(32 + virtual offset) ; movl $120, %esi ; movq %rsi, rsp(24 + virtual offset) ; movl $125, %esi @@ -234,28 +234,30 @@ block0: ; movq %rsi, rsp(8 + virtual offset) ; movl $135, %esi ; movq %rsi, rsp(0 + virtual offset) -; movq %r14, 0(%rdi) -; movq %r15, 8(%rdi) -; movq %rbx, 16(%rdi) -; movq %r12, 24(%rdi) -; movq %r13, 32(%rdi) -; movq %rax, 40(%rdi) -; movq %rcx, 48(%rdi) -; movq %rdx, 56(%rdi) -; movq rsp(32 + virtual offset), %rax -; movq %rax, 64(%rdi) -; movq %r8, 72(%rdi) -; movq %r9, 80(%rdi) -; movq %r10, 88(%rdi) -; movq %r11, 96(%rdi) -; movq rsp(24 + virtual offset), %rsi +; movq %r13, 0(%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 16(%rdi) +; movq %rbx, 24(%rdi) +; movq %r12, 32(%rdi) +; movq %r11, 40(%rdi) +; movq %rax, 48(%rdi) +; movq %rcx, 56(%rdi) +; movq %rdx, 64(%rdi) +; movq rsp(40 + virtual offset), %rax +; movq %rax, 72(%rdi) +; movq %r8, 80(%rdi) +; movq %r9, 88(%rdi) +; movq %r10, 96(%rdi) +; movq rsp(32 + virtual offset), %rsi ; movq %rsi, 104(%rdi) -; movq rsp(16 + virtual offset), %rsi +; movq rsp(24 + virtual offset), %rsi ; movq %rsi, 112(%rdi) -; movq rsp(8 + virtual offset), %rsi +; movq rsp(16 + virtual offset), %rsi ; movq %rsi, 120(%rdi) -; movq rsp(0 + virtual offset), %rsi +; movq rsp(8 + virtual offset), %rsi ; movq %rsi, 128(%rdi) +; movq rsp(0 + virtual offset), %rsi +; movq %rsi, 136(%rdi) ; movq rsp(104 + virtual offset), %rax ; movq rsp(96 + virtual offset), %rcx ; movq rsp(88 + virtual offset), %rdx @@ -264,7 +266,6 @@ block0: ; movq rsp(64 + virtual offset), %r8 ; movq rsp(56 + virtual offset), %r9 ; movq rsp(48 + virtual offset), %r10 -; movq rsp(40 + virtual offset), %r11 ; movq 112(%rsp), %rbx ; movq 120(%rsp), %r12 ; movq 128(%rsp), %r13 @@ -300,24 +301,24 @@ block0: ; movq %r9, 0x40(%rsp) ; movl $0x28, %r10d ; movq %r10, 0x38(%rsp) -; movl $0x2d, %r11d -; movq %r11, 0x30(%rsp) -; movl $0x32, %r11d -; movq %r11, 0x28(%rsp) +; movl $0x2d, %r10d +; movq %r10, 0x30(%rsp) +; movl $0x32, %r13d ; movl $0x37, %r14d ; movl $0x3c, %r15d ; movl $0x41, %ebx ; movl $0x46, %r12d -; movl $0x4b, %r13d +; movl $0x4b, %r11d ; movl $0x50, %eax ; movl $0x55, %ecx ; movl $0x5a, %edx ; movl $0x5f, %esi -; movq %rsi, 0x20(%rsp) +; movq %rsi, 0x28(%rsp) ; movl $0x64, %r8d ; movl $0x69, %r9d ; movl $0x6e, %r10d -; movl $0x73, %r11d +; movl $0x73, %esi +; movq %rsi, 0x20(%rsp) ; movl $0x78, %esi ; movq %rsi, 0x18(%rsp) ; movl $0x7d, %esi @@ -326,28 +327,30 @@ block0: ; movq %rsi, 8(%rsp) ; movl $0x87, %esi ; movq %rsi, (%rsp) -; movq %r14, (%rdi) -; movq %r15, 8(%rdi) -; movq %rbx, 0x10(%rdi) -; movq %r12, 0x18(%rdi) -; movq %r13, 0x20(%rdi) -; movq %rax, 0x28(%rdi) -; movq %rcx, 0x30(%rdi) -; movq %rdx, 0x38(%rdi) -; movq 0x20(%rsp), %rax -; movq %rax, 0x40(%rdi) -; movq %r8, 0x48(%rdi) -; movq %r9, 0x50(%rdi) -; movq %r10, 0x58(%rdi) -; movq %r11, 0x60(%rdi) -; movq 0x18(%rsp), %rsi +; movq %r13, (%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 0x10(%rdi) +; movq %rbx, 0x18(%rdi) +; movq %r12, 0x20(%rdi) +; movq %r11, 0x28(%rdi) +; movq %rax, 0x30(%rdi) +; movq %rcx, 0x38(%rdi) +; movq %rdx, 0x40(%rdi) +; movq 0x28(%rsp), %rax +; movq %rax, 0x48(%rdi) +; movq %r8, 0x50(%rdi) +; movq %r9, 0x58(%rdi) +; movq %r10, 0x60(%rdi) +; movq 0x20(%rsp), %rsi ; movq %rsi, 0x68(%rdi) -; movq 0x10(%rsp), %rsi +; movq 0x18(%rsp), %rsi ; movq %rsi, 0x70(%rdi) -; movq 8(%rsp), %rsi +; movq 0x10(%rsp), %rsi ; movq %rsi, 0x78(%rdi) -; movq (%rsp), %rsi +; movq 8(%rsp), %rsi ; movq %rsi, 0x80(%rdi) +; movq (%rsp), %rsi +; movq %rsi, 0x88(%rdi) ; movq 0x68(%rsp), %rax ; movq 0x60(%rsp), %rcx ; movq 0x58(%rsp), %rdx @@ -356,7 +359,6 @@ block0: ; movq 0x40(%rsp), %r8 ; movq 0x38(%rsp), %r9 ; movq 0x30(%rsp), %r10 -; movq 0x28(%rsp), %r11 ; movq 0x70(%rsp), %rbx ; movq 0x78(%rsp), %r12 ; movq 0x80(%rsp), %r13 @@ -378,28 +380,22 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $144, %rsp +; subq %rsp, $304, %rsp +; movq %rbx, 256(%rsp) +; movq %r12, 264(%rsp) +; movq %r13, 272(%rsp) +; movq %r14, 280(%rsp) +; movq %r15, 288(%rsp) ; block0: ; lea 0(%rsp), %rdi ; call TestCase(%tail_callee_stack_rets) -; movq 0(%rsp), %rax -; movq 8(%rsp), %rdx -; movq 16(%rsp), %r9 -; movq 24(%rsp), %r11 -; movq 32(%rsp), %rdi -; movq 40(%rsp), %rcx -; movq 48(%rsp), %r8 -; movq 56(%rsp), %r10 -; movq 64(%rsp), %rsi -; movq 72(%rsp), %rax -; movq 80(%rsp), %rdx -; movq 88(%rsp), %r9 -; movq 96(%rsp), %r11 -; movq 104(%rsp), %rdi -; movq 112(%rsp), %rcx -; movq 120(%rsp), %r8 -; movq 128(%rsp), %rax -; addq %rsp, $144, %rsp +; movq rsp(96 + virtual offset), %rax +; movq 256(%rsp), %rbx +; movq 264(%rsp), %r12 +; movq 272(%rsp), %r13 +; movq 280(%rsp), %r14 +; movq 288(%rsp), %r15 +; addq %rsp, $304, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -408,28 +404,53 @@ block0: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x90, %rsp -; block1: ; offset 0xb +; subq $0x130, %rsp +; movq %rbx, 0x100(%rsp) +; movq %r12, 0x108(%rsp) +; movq %r13, 0x110(%rsp) +; movq %r14, 0x118(%rsp) +; movq %r15, 0x120(%rsp) +; block1: ; offset 0x33 ; leaq (%rsp), %rdi -; callq 0x14 ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 -; movq (%rsp), %rax -; movq 8(%rsp), %rdx -; movq 0x10(%rsp), %r9 +; callq 0x3c ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 +; movq (%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 8(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x10(%rsp), %r11 +; movq %r11, 0xa0(%rsp) ; movq 0x18(%rsp), %r11 -; movq 0x20(%rsp), %rdi -; movq 0x28(%rsp), %rcx -; movq 0x30(%rsp), %r8 -; movq 0x38(%rsp), %r10 -; movq 0x40(%rsp), %rsi -; movq 0x48(%rsp), %rax -; movq 0x50(%rsp), %rdx -; movq 0x58(%rsp), %r9 -; movq 0x60(%rsp), %r11 -; movq 0x68(%rsp), %rdi -; movq 0x70(%rsp), %rcx -; movq 0x78(%rsp), %r8 -; movq 0x80(%rsp), %rax -; addq $0x90, %rsp +; movq %r11, 0xa8(%rsp) +; movq 0x20(%rsp), %r11 +; movq %r11, 0xb0(%rsp) +; movq 0x28(%rsp), %r11 +; movq %r11, 0xb8(%rsp) +; movq 0x30(%rsp), %r11 +; movq %r11, 0xc0(%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0xc8(%rsp) +; movq 0x40(%rsp), %r11 +; movq %r11, 0xd0(%rsp) +; movq 0x48(%rsp), %r11 +; movq %r11, 0xd8(%rsp) +; movq 0x50(%rsp), %r11 +; movq %r11, 0xe0(%rsp) +; movq 0x58(%rsp), %r11 +; movq %r11, 0xe8(%rsp) +; movq 0x60(%rsp), %rbx +; movq 0x68(%rsp), %r12 +; movq 0x70(%rsp), %r13 +; movq 0x78(%rsp), %r14 +; movq 0x80(%rsp), %r15 +; movq 0x88(%rsp), %r11 +; movq %r11, 0xf0(%rsp) +; movq 0xf0(%rsp), %rax +; movq 0x100(%rsp), %rbx +; movq 0x108(%rsp), %r12 +; movq 0x110(%rsp), %r13 +; movq 0x118(%rsp), %r14 +; movq 0x120(%rsp), %r15 +; addq $0x130, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq @@ -461,52 +482,56 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq %r9, rsp(40 + virtual offset) ; movq rbp(stack args max - 168), %r10 ; movq %r10, rsp(48 + virtual offset) -; movq rbp(stack args max - 160), %r11 -; movq %r11, rsp(56 + virtual offset) -; movq rbp(stack args max - 152), %r11 -; movq %r11, rsp(64 + virtual offset) +; movq rbp(stack args max - 160), %r10 +; movq %r10, rsp(56 + virtual offset) +; movq rbp(stack args max - 152), %rcx +; movq %rcx, rsp(64 + virtual offset) ; movq rbp(stack args max - 144), %r8 +; movq %r8, rsp(72 + virtual offset) ; movq rbp(stack args max - 136), %r10 -; movq %r10, rsp(72 + virtual offset) +; movq %r10, rsp(80 + virtual offset) ; movq rbp(stack args max - 128), %rsi -; movq %rsi, rsp(80 + virtual offset) +; movq %rsi, rsp(88 + virtual offset) ; movq rbp(stack args max - 120), %rax -; movq %rax, rsp(88 + virtual offset) -; movq rbp(stack args max - 112), %r11 -; movq rbp(stack args max - 104), %r10 -; movq rbp(stack args max - 96), %r9 -; movq rbp(stack args max - 88), %rax ; movq %rax, rsp(96 + virtual offset) -; movq rbp(stack args max - 80), %rdx -; movq rbp(stack args max - 72), %rcx -; movq rbp(stack args max - 64), %rsi +; movq rbp(stack args max - 112), %r10 +; movq rbp(stack args max - 104), %r9 +; movq rbp(stack args max - 96), %r8 +; movq rbp(stack args max - 88), %rdx +; movq rbp(stack args max - 80), %rcx +; movq %rcx, rsp(104 + virtual offset) +; movq rbp(stack args max - 72), %rsi +; movq rbp(stack args max - 64), %r15 ; movq rbp(stack args max - 56), %r12 ; movq rbp(stack args max - 48), %r14 ; movq rbp(stack args max - 40), %rbx ; movq rbp(stack args max - 32), %r13 -; movq rbp(stack args max - 24), %r15 +; movq rbp(stack args max - 24), %r11 ; movq rbp(stack args max - 16), %rax -; movq %r8, 0(%rdi) -; movq rsp(72 + virtual offset), %r8 -; movq %r8, 8(%rdi) -; movq rsp(80 + virtual offset), %r8 -; movq %r8, 16(%rdi) -; movq rsp(88 + virtual offset), %r8 -; movq %r8, 24(%rdi) -; movq %r11, 32(%rdi) +; movq rsp(64 + virtual offset), %rcx +; movq %rcx, 0(%rdi) +; movq rsp(72 + virtual offset), %rcx +; movq %rcx, 8(%rdi) +; movq rsp(80 + virtual offset), %rcx +; movq %rcx, 16(%rdi) +; movq rsp(88 + virtual offset), %rcx +; movq %rcx, 24(%rdi) +; movq rsp(96 + virtual offset), %rcx +; movq %rcx, 32(%rdi) ; movq %r10, 40(%rdi) ; movq %r9, 48(%rdi) -; movq rsp(96 + virtual offset), %r8 ; movq %r8, 56(%rdi) ; movq %rdx, 64(%rdi) -; movq %rcx, 72(%rdi) +; movq rsp(104 + virtual offset), %rdx +; movq %rdx, 72(%rdi) ; movq %rsi, 80(%rdi) -; movq %r12, 88(%rdi) -; movq %r14, 96(%rdi) -; movq %rbx, 104(%rdi) -; movq %r13, 112(%rdi) -; movq %r15, 120(%rdi) -; movq %rax, 128(%rdi) +; movq %r15, 88(%rdi) +; movq %r12, 96(%rdi) +; movq %r14, 104(%rdi) +; movq %rbx, 112(%rdi) +; movq %r13, 120(%rdi) +; movq %r11, 128(%rdi) +; movq %rax, 136(%rdi) ; movq rsp(0 + virtual offset), %rax ; movq rsp(8 + virtual offset), %rcx ; movq rsp(16 + virtual offset), %rdx @@ -515,7 +540,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq rsp(40 + virtual offset), %r8 ; movq rsp(48 + virtual offset), %r9 ; movq rsp(56 + virtual offset), %r10 -; movq rsp(64 + virtual offset), %r11 ; movq 112(%rsp), %rbx ; movq 120(%rsp), %r12 ; movq 128(%rsp), %r13 @@ -546,52 +570,56 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq %r9, 0x28(%rsp) ; movq 0x18(%rbp), %r10 ; movq %r10, 0x30(%rsp) -; movq 0x20(%rbp), %r11 -; movq %r11, 0x38(%rsp) -; movq 0x28(%rbp), %r11 -; movq %r11, 0x40(%rsp) +; movq 0x20(%rbp), %r10 +; movq %r10, 0x38(%rsp) +; movq 0x28(%rbp), %rcx +; movq %rcx, 0x40(%rsp) ; movq 0x30(%rbp), %r8 +; movq %r8, 0x48(%rsp) ; movq 0x38(%rbp), %r10 -; movq %r10, 0x48(%rsp) +; movq %r10, 0x50(%rsp) ; movq 0x40(%rbp), %rsi -; movq %rsi, 0x50(%rsp) +; movq %rsi, 0x58(%rsp) ; movq 0x48(%rbp), %rax -; movq %rax, 0x58(%rsp) -; movq 0x50(%rbp), %r11 -; movq 0x58(%rbp), %r10 -; movq 0x60(%rbp), %r9 -; movq 0x68(%rbp), %rax ; movq %rax, 0x60(%rsp) -; movq 0x70(%rbp), %rdx -; movq 0x78(%rbp), %rcx -; movq 0x80(%rbp), %rsi +; movq 0x50(%rbp), %r10 +; movq 0x58(%rbp), %r9 +; movq 0x60(%rbp), %r8 +; movq 0x68(%rbp), %rdx +; movq 0x70(%rbp), %rcx +; movq %rcx, 0x68(%rsp) +; movq 0x78(%rbp), %rsi +; movq 0x80(%rbp), %r15 ; movq 0x88(%rbp), %r12 ; movq 0x90(%rbp), %r14 ; movq 0x98(%rbp), %rbx ; movq 0xa0(%rbp), %r13 -; movq 0xa8(%rbp), %r15 +; movq 0xa8(%rbp), %r11 ; movq 0xb0(%rbp), %rax -; movq %r8, (%rdi) -; movq 0x48(%rsp), %r8 -; movq %r8, 8(%rdi) -; movq 0x50(%rsp), %r8 -; movq %r8, 0x10(%rdi) -; movq 0x58(%rsp), %r8 -; movq %r8, 0x18(%rdi) -; movq %r11, 0x20(%rdi) +; movq 0x40(%rsp), %rcx +; movq %rcx, (%rdi) +; movq 0x48(%rsp), %rcx +; movq %rcx, 8(%rdi) +; movq 0x50(%rsp), %rcx +; movq %rcx, 0x10(%rdi) +; movq 0x58(%rsp), %rcx +; movq %rcx, 0x18(%rdi) +; movq 0x60(%rsp), %rcx +; movq %rcx, 0x20(%rdi) ; movq %r10, 0x28(%rdi) ; movq %r9, 0x30(%rdi) -; movq 0x60(%rsp), %r8 ; movq %r8, 0x38(%rdi) ; movq %rdx, 0x40(%rdi) -; movq %rcx, 0x48(%rdi) +; movq 0x68(%rsp), %rdx +; movq %rdx, 0x48(%rdi) ; movq %rsi, 0x50(%rdi) -; movq %r12, 0x58(%rdi) -; movq %r14, 0x60(%rdi) -; movq %rbx, 0x68(%rdi) -; movq %r13, 0x70(%rdi) -; movq %r15, 0x78(%rdi) -; movq %rax, 0x80(%rdi) +; movq %r15, 0x58(%rdi) +; movq %r12, 0x60(%rdi) +; movq %r14, 0x68(%rdi) +; movq %rbx, 0x70(%rdi) +; movq %r13, 0x78(%rdi) +; movq %r11, 0x80(%rdi) +; movq %rax, 0x88(%rdi) ; movq (%rsp), %rax ; movq 8(%rsp), %rcx ; movq 0x10(%rsp), %rdx @@ -600,7 +628,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq 0x28(%rsp), %r8 ; movq 0x30(%rsp), %r9 ; movq 0x38(%rsp), %r10 -; movq 0x40(%rsp), %r11 ; movq 0x70(%rsp), %rbx ; movq 0x78(%rsp), %r12 ; movq 0x80(%rsp), %r13 @@ -656,17 +683,17 @@ block0: ; movq %r15, 464(%rsp) ; block0: ; movl $10, %edx -; movq %rdx, rsp(96 + virtual offset) +; movq %rdx, rsp(88 + virtual offset) ; movl $15, %ecx -; movq %rcx, rsp(88 + virtual offset) +; movq %rcx, rsp(80 + virtual offset) ; movl $20, %r8d -; movq %r8, rsp(80 + virtual offset) +; movq %r8, rsp(72 + virtual offset) ; movl $25, %r9d -; movq %r9, rsp(72 + virtual offset) -; movl $30, %r9d ; movq %r9, rsp(64 + virtual offset) +; movl $30, %r9d +; movq %r9, rsp(56 + virtual offset) ; movl $35, %esi -; movq %rsi, rsp(56 + virtual offset) +; movq %rsi, rsp(48 + virtual offset) ; movl $40, %edi ; movl $45, %eax ; movl $50, %r10d @@ -681,20 +708,20 @@ block0: ; movl $95, %r8d ; movl $100, %r9d ; movl $105, %r11d -; movq %r11, rsp(48 + virtual offset) -; movl $110, %r11d ; movq %r11, rsp(40 + virtual offset) -; movl $115, %r11d +; movl $110, %r11d ; movq %r11, rsp(32 + virtual offset) -; movl $120, %r11d +; movl $115, %r11d ; movq %r11, rsp(24 + virtual offset) -; movl $125, %r11d +; movl $120, %r11d ; movq %r11, rsp(16 + virtual offset) -; movl $130, %r11d +; movl $125, %r11d ; movq %r11, rsp(8 + virtual offset) -; movl $135, %r11d +; movl $130, %r11d ; movq %r11, rsp(0 + virtual offset) -; movq rsp(56 + virtual offset), %r11 +; movl $135, %r11d +; movq %r11, rsp(96 + virtual offset) +; movq rsp(48 + virtual offset), %r11 ; movq %r11, 0(%rsp) ; movq %rdi, 8(%rsp) ; movq %rax, 16(%rsp) @@ -709,45 +736,29 @@ block0: ; movq %rcx, 88(%rsp) ; movq %r8, 96(%rsp) ; movq %r9, 104(%rsp) -; movq rsp(48 + virtual offset), %r11 -; movq %r11, 112(%rsp) ; movq rsp(40 + virtual offset), %r11 -; movq %r11, 120(%rsp) +; movq %r11, 112(%rsp) ; movq rsp(32 + virtual offset), %r11 -; movq %r11, 128(%rsp) +; movq %r11, 120(%rsp) ; movq rsp(24 + virtual offset), %r11 -; movq %r11, 136(%rsp) +; movq %r11, 128(%rsp) ; movq rsp(16 + virtual offset), %r11 -; movq %r11, 144(%rsp) +; movq %r11, 136(%rsp) ; movq rsp(8 + virtual offset), %r11 -; movq %r11, 152(%rsp) +; movq %r11, 144(%rsp) ; movq rsp(0 + virtual offset), %r11 +; movq %r11, 152(%rsp) +; movq rsp(96 + virtual offset), %r11 ; movq %r11, 160(%rsp) ; lea 176(%rsp), %rdi ; load_ext_name %tail_callee_stack_args_and_rets+0, %r10 -; movq rsp(80 + virtual offset), %rcx -; movq rsp(88 + virtual offset), %rdx -; movq rsp(96 + virtual offset), %rsi -; movq rsp(72 + virtual offset), %r8 -; movq rsp(64 + virtual offset), %r9 +; movq rsp(72 + virtual offset), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(88 + virtual offset), %rsi +; movq rsp(64 + virtual offset), %r8 +; movq rsp(56 + virtual offset), %r9 ; call *%r10 -; movq 176(%rsp), %r8 -; movq 184(%rsp), %r10 -; movq 192(%rsp), %rsi -; movq 200(%rsp), %rax -; movq 208(%rsp), %rdx -; movq 216(%rsp), %r9 -; movq 224(%rsp), %r11 -; movq 232(%rsp), %rdi -; movq 240(%rsp), %rcx -; movq 248(%rsp), %r8 -; movq 256(%rsp), %r10 -; movq 264(%rsp), %rsi -; movq 272(%rsp), %rax -; movq 280(%rsp), %rdx -; movq 288(%rsp), %r9 -; movq 296(%rsp), %r11 -; movq 304(%rsp), %rax +; movq rsp(96 + virtual offset), %rax ; movq 432(%rsp), %rbx ; movq 440(%rsp), %r12 ; movq 448(%rsp), %r13 @@ -770,17 +781,17 @@ block0: ; movq %r15, 0x1d0(%rsp) ; block1: ; offset 0x33 ; movl $0xa, %edx -; movq %rdx, 0x1a0(%rsp) +; movq %rdx, 0x198(%rsp) ; movl $0xf, %ecx -; movq %rcx, 0x198(%rsp) +; movq %rcx, 0x190(%rsp) ; movl $0x14, %r8d -; movq %r8, 0x190(%rsp) +; movq %r8, 0x188(%rsp) ; movl $0x19, %r9d -; movq %r9, 0x188(%rsp) -; movl $0x1e, %r9d ; movq %r9, 0x180(%rsp) +; movl $0x1e, %r9d +; movq %r9, 0x178(%rsp) ; movl $0x23, %esi -; movq %rsi, 0x178(%rsp) +; movq %rsi, 0x170(%rsp) ; movl $0x28, %edi ; movl $0x2d, %eax ; movl $0x32, %r10d @@ -795,20 +806,20 @@ block0: ; movl $0x5f, %r8d ; movl $0x64, %r9d ; movl $0x69, %r11d -; movq %r11, 0x170(%rsp) -; movl $0x6e, %r11d ; movq %r11, 0x168(%rsp) -; movl $0x73, %r11d +; movl $0x6e, %r11d ; movq %r11, 0x160(%rsp) -; movl $0x78, %r11d +; movl $0x73, %r11d ; movq %r11, 0x158(%rsp) -; movl $0x7d, %r11d +; movl $0x78, %r11d ; movq %r11, 0x150(%rsp) -; movl $0x82, %r11d +; movl $0x7d, %r11d ; movq %r11, 0x148(%rsp) -; movl $0x87, %r11d +; movl $0x82, %r11d ; movq %r11, 0x140(%rsp) -; movq 0x178(%rsp), %r11 +; movl $0x87, %r11d +; movq %r11, 0x1a0(%rsp) +; movq 0x170(%rsp), %r11 ; movq %r11, (%rsp) ; movq %rdi, 8(%rsp) ; movq %rax, 0x10(%rsp) @@ -823,46 +834,61 @@ block0: ; movq %rcx, 0x58(%rsp) ; movq %r8, 0x60(%rsp) ; movq %r9, 0x68(%rsp) -; movq 0x170(%rsp), %r11 -; movq %r11, 0x70(%rsp) ; movq 0x168(%rsp), %r11 -; movq %r11, 0x78(%rsp) +; movq %r11, 0x70(%rsp) ; movq 0x160(%rsp), %r11 -; movq %r11, 0x80(%rsp) +; movq %r11, 0x78(%rsp) ; movq 0x158(%rsp), %r11 -; movq %r11, 0x88(%rsp) +; movq %r11, 0x80(%rsp) ; movq 0x150(%rsp), %r11 -; movq %r11, 0x90(%rsp) +; movq %r11, 0x88(%rsp) ; movq 0x148(%rsp), %r11 -; movq %r11, 0x98(%rsp) +; movq %r11, 0x90(%rsp) ; movq 0x140(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x1a0(%rsp), %r11 ; movq %r11, 0xa0(%rsp) ; leaq 0xb0(%rsp), %rdi ; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 -; movq 0x190(%rsp), %rcx -; movq 0x198(%rsp), %rdx -; movq 0x1a0(%rsp), %rsi -; movq 0x188(%rsp), %r8 -; movq 0x180(%rsp), %r9 +; movq 0x188(%rsp), %rcx +; movq 0x190(%rsp), %rdx +; movq 0x198(%rsp), %rsi +; movq 0x180(%rsp), %r8 +; movq 0x178(%rsp), %r9 ; callq *%r10 ; subq $0xb0, %rsp -; movq 0xb0(%rsp), %r8 -; movq 0xb8(%rsp), %r10 -; movq 0xc0(%rsp), %rsi -; movq 0xc8(%rsp), %rax -; movq 0xd0(%rsp), %rdx -; movq 0xd8(%rsp), %r9 +; movq 0xb0(%rsp), %r11 +; movq %r11, 0x140(%rsp) +; movq 0xb8(%rsp), %r11 +; movq %r11, 0x148(%rsp) +; movq 0xc0(%rsp), %r11 +; movq %r11, 0x150(%rsp) +; movq 0xc8(%rsp), %r11 +; movq %r11, 0x158(%rsp) +; movq 0xd0(%rsp), %r11 +; movq %r11, 0x160(%rsp) +; movq 0xd8(%rsp), %r11 +; movq %r11, 0x168(%rsp) ; movq 0xe0(%rsp), %r11 -; movq 0xe8(%rsp), %rdi -; movq 0xf0(%rsp), %rcx -; movq 0xf8(%rsp), %r8 -; movq 0x100(%rsp), %r10 -; movq 0x108(%rsp), %rsi -; movq 0x110(%rsp), %rax -; movq 0x118(%rsp), %rdx -; movq 0x120(%rsp), %r9 -; movq 0x128(%rsp), %r11 -; movq 0x130(%rsp), %rax +; movq %r11, 0x170(%rsp) +; movq 0xe8(%rsp), %r11 +; movq %r11, 0x178(%rsp) +; movq 0xf0(%rsp), %r11 +; movq %r11, 0x180(%rsp) +; movq 0xf8(%rsp), %r11 +; movq %r11, 0x188(%rsp) +; movq 0x100(%rsp), %r11 +; movq %r11, 0x190(%rsp) +; movq 0x108(%rsp), %r11 +; movq %r11, 0x198(%rsp) +; movq 0x110(%rsp), %rbx +; movq 0x118(%rsp), %r12 +; movq 0x120(%rsp), %r13 +; movq 0x128(%rsp), %r14 +; movq 0x130(%rsp), %r15 +; movq 0x138(%rsp), %r11 +; movq %r11, 0x1a0(%rsp) +; movq 0x1a0(%rsp), %rax ; movq 0x1b0(%rsp), %rbx ; movq 0x1b8(%rsp), %r12 ; movq 0x1c0(%rsp), %r13 diff --git a/cranelift/filetests/filetests/isa/x64/winch.clif b/cranelift/filetests/filetests/isa/x64/winch.clif index 2bda2fd16b7d..4b227d0ac7ab 100644 --- a/cranelift/filetests/filetests/isa/x64/winch.clif +++ b/cranelift/filetests/filetests/isa/x64/winch.clif @@ -291,25 +291,25 @@ block0(v0:i64): ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $64, %rsp -; movq %rbx, 16(%rsp) -; movq %r12, 24(%rsp) -; movq %r13, 32(%rsp) -; movq %r14, 40(%rsp) -; movq %r15, 48(%rsp) +; subq %rsp, $80, %rsp +; movq %rbx, 32(%rsp) +; movq %r12, 40(%rsp) +; movq %r13, 48(%rsp) +; movq %r14, 56(%rsp) +; movq %r15, 64(%rsp) ; block0: ; lea 0(%rsp), %rdi ; load_ext_name %g+0, %r10 ; call *%r10 -; movq 4(%rsp), %rax -; movq 0(%rsp), %r9 -; andl %r9d, %eax -; movq 16(%rsp), %rbx -; movq 24(%rsp), %r12 -; movq 32(%rsp), %r13 -; movq 40(%rsp), %r14 -; movq 48(%rsp), %r15 -; addq %rsp, $64, %rsp +; movq rsp(0 + virtual offset), %rax +; movq rsp(8 + virtual offset), %rdx +; andl %edx, %eax +; movq 32(%rsp), %rbx +; movq 40(%rsp), %r12 +; movq 48(%rsp), %r13 +; movq 56(%rsp), %r14 +; movq 64(%rsp), %r15 +; addq %rsp, $80, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -318,25 +318,29 @@ block0(v0:i64): ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x40, %rsp -; movq %rbx, 0x10(%rsp) -; movq %r12, 0x18(%rsp) -; movq %r13, 0x20(%rsp) -; movq %r14, 0x28(%rsp) -; movq %r15, 0x30(%rsp) +; subq $0x50, %rsp +; movq %rbx, 0x20(%rsp) +; movq %r12, 0x28(%rsp) +; movq %r13, 0x30(%rsp) +; movq %r14, 0x38(%rsp) +; movq %r15, 0x40(%rsp) ; block1: ; offset 0x21 ; leaq (%rsp), %rdi ; movabsq $0, %r10 ; reloc_external Abs8 %g 0 ; callq *%r10 -; movq 4(%rsp), %rax -; movq (%rsp), %r9 -; andl %r9d, %eax -; movq 0x10(%rsp), %rbx -; movq 0x18(%rsp), %r12 -; movq 0x20(%rsp), %r13 -; movq 0x28(%rsp), %r14 -; movq 0x30(%rsp), %r15 -; addq $0x40, %rsp +; movq 4(%rsp), %r11 +; movq %r11, 0x10(%rsp) +; movq (%rsp), %r11 +; movq %r11, 0x18(%rsp) +; movq 0x10(%rsp), %rax +; movq 0x18(%rsp), %rdx +; andl %edx, %eax +; movq 0x20(%rsp), %rbx +; movq 0x28(%rsp), %r12 +; movq 0x30(%rsp), %r13 +; movq 0x38(%rsp), %r14 +; movq 0x40(%rsp), %r15 +; addq $0x50, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/tests/disas/winch/x64/load/grow_load.wat b/tests/disas/winch/x64/load/grow_load.wat index 4b0bada9f6cf..dde1b16b81d3 100644 --- a/tests/disas/winch/x64/load/grow_load.wat +++ b/tests/disas/winch/x64/load/grow_load.wat @@ -65,7 +65,7 @@ ;; movq %r14, %rdi ;; movl 0xc(%rsp), %esi ;; movl $0, %edx -;; callq 0x2d6 +;; callq 0x2ed ;; addq $0xc, %rsp ;; addq $4, %rsp ;; movq 0x58(%rsp), %r14