From a4dd0a5e9361eb75d34536c85c6effb51f2502ec Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Apr 2025 09:25:47 -0700 Subject: [PATCH 1/5] Cranelift: remove return-value instructions after calls at callsites. This PR addresses the issues described in #10488 in a more head-on way: it removes the use of separate "return-value instructions" that load return values from the stack, instead folding these loads into the semantics of the call VCode instruction. This is a prerequisite for exception-handling: we need calls to be workable as terminators, meaning that we cannot require any other (VCode) instructions after the call to define the return values. In principle, this PR starts simply enough: the return-locations list on the `CallInfo` that each backend uses to provide regalloc metadata is updated to support a notion of "register or stack address" as the source of each return value, and this list is now used for both kinds of returns, not just returns in registers. Shared code is defined in `machinst::abi` used by all backends to perform the requisite loads. In order to make this work with more defined values than fit in registers, however, this PR also had to add support for "any"-constrained registers to Cranelift, and handling allocations that may be spillslots. This has always been supported by RA2, but this is the first time that Cranelift uses them directly (previously they were used only internally in RA2 as lowerings from other kinds of constraints like safepoints). This requires encoding a spillslot index in our `Reg` type. There is a little bit of complexity around handling the loads/defs as well: if we have a return value on-stack, and we need to put it in a spillslot, we cannot do a memory-to-memory move directly, so we need a temporary register. Earlier versions of this PR allocated another temp as a vreg on the call, but this doesn't work with all calling conventions (too many clobbers). For simplicity I picked a particular register that is (i) clobbered by calls and (ii) not used for return values for each architecture (x86-64's tailcall needed to lose one return-in-register slot to make this work). This removes retval insts from the shared ABI infra completely. s390x is different, still, because it handles callsite lowering from ISLE; we will need to address that separately for exception support there. --- cranelift/codegen/src/isa/aarch64/abi.rs | 2 + .../codegen/src/isa/aarch64/inst/emit.rs | 28 + cranelift/codegen/src/isa/aarch64/inst/mod.rs | 14 +- .../codegen/src/isa/pulley_shared/abi.rs | 2 + .../codegen/src/isa/pulley_shared/inst.isle | 3 + .../src/isa/pulley_shared/inst/emit.rs | 35 ++ .../codegen/src/isa/pulley_shared/inst/mod.rs | 25 +- cranelift/codegen/src/isa/riscv64/abi.rs | 2 + cranelift/codegen/src/isa/riscv64/inst.isle | 4 + .../codegen/src/isa/riscv64/inst/emit.rs | 34 +- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 18 +- cranelift/codegen/src/isa/s390x/abi.rs | 2 + cranelift/codegen/src/isa/s390x/inst/mod.rs | 22 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 10 +- cranelift/codegen/src/isa/x64/abi.rs | 5 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 22 + cranelift/codegen/src/isa/x64/inst/mod.rs | 14 +- cranelift/codegen/src/isa/x64/lower.rs | 8 +- cranelift/codegen/src/machinst/abi.rs | 127 ++++- cranelift/codegen/src/machinst/isle.rs | 11 +- cranelift/codegen/src/machinst/reg.rs | 78 ++- cranelift/codegen/src/machinst/vcode.rs | 22 +- .../filetests/isa/aarch64/tail-call-conv.clif | 154 +++--- .../filetests/isa/pulley32/call.clif | 133 +++-- .../filetests/isa/pulley64/call.clif | 135 +++-- .../filetests/isa/riscv64/tail-call-conv.clif | 334 ++++++------ .../isa/x64/call-with-retval-insts.clif | 179 +++++++ .../filetests/filetests/isa/x64/i128.clif | 21 +- .../filetests/isa/x64/tail-call-conv.clif | 506 +++++++++--------- .../filetests/filetests/isa/x64/winch.clif | 64 +-- tests/disas/winch/x64/load/grow_load.wat | 2 +- 31 files changed, 1265 insertions(+), 751 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 9ade3ebea363..8cd36c76c68d 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1149,6 +1149,7 @@ impl ABIMachineSpec for AArch64MachineDeps { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -1188,6 +1189,7 @@ impl ABIMachineSpec for AArch64MachineDeps { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index adc7396c4787..cd4cdb95f343 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2951,6 +2951,20 @@ impl MachInstEmit for Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + // Use x9 as a temp if needed: clobbered, not a + // retval. + regs::writable_xreg(9), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + start_off = sink.cur_offset(); } &Inst::CallInd { ref info } => { let user_stack_map = state.take_stack_map(); @@ -2970,6 +2984,20 @@ impl MachInstEmit for Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + // Use x9 as a temp if needed: clobbered, not a + // retval. + regs::writable_xreg(9), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + start_off = sink.cur_offset(); } &Inst::ReturnCall { ref info } => { emit_return_call_common_sequence(sink, emit_info, state, info); diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ba7103a32a8c..2dae98b929f3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -839,8 +839,11 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -852,8 +855,11 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index a29c1146cadd..b25988e36f56 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -544,6 +544,7 @@ where is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -578,6 +579,7 @@ where setup_area_size: setup_area_size.into(), clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index e68d68dfe22c..bdca3c8d978f 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -89,6 +89,9 @@ ;; emit this instruction and auto-generated methods for other various ;; bits and pieces of boilerplate in the backend. (Raw (raw RawInst)) + + ;; Island generation prior to variable-length instructions. + (EmitIsland (space_needed u32)) ) ) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 7adec74bcddc..5788141b1a6d 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -188,6 +188,20 @@ fn pulley_emit

( for i in PulleyMachineDeps::

::gen_sp_reg_adjust(adjust) { >::from(i).emit(sink, emit_info, state); } + + // Load any stack-carried return values. + info.emit_retval_loads::, _, _>( + // Use x15 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(15)), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + *start_offset = sink.cur_offset(); } Inst::IndirectCall { info } => { @@ -204,6 +218,20 @@ fn pulley_emit

( for i in PulleyMachineDeps::

::gen_sp_reg_adjust(adjust) { >::from(i).emit(sink, emit_info, state); } + + // Load any stack-carried return values. + info.emit_retval_loads::, _, _>( + // Use x15 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(15)), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + *start_offset = sink.cur_offset(); } Inst::ReturnCall { info } => { @@ -517,6 +545,13 @@ fn pulley_emit

( } super::generated::emit(raw, sink) } + + Inst::EmitIsland { space_needed } => { + let label = sink.get_label(); + >::from(Inst::Jump { label }).emit(sink, emit_info, state); + sink.emit_island(space_needed + 8, &mut state.ctrl_plane); + sink.bind_label(label, &mut state.ctrl_plane); + } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 78b1ea0eb882..bbaa10b25532 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -169,8 +169,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -179,8 +182,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -190,8 +196,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -298,6 +307,8 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { } Inst::Raw { raw } => generated::get_operands(raw, collector), + + Inst::EmitIsland { .. } => {} } } @@ -745,6 +756,8 @@ impl Inst { format!("br_table {idx} {default:?} {targets:?}") } Inst::Raw { raw } => generated::print(raw), + + Inst::EmitIsland { space_needed } => format!("emit_island {space_needed}"), } } } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 3bde5ea9bb17..bacb4e7b2a38 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -649,6 +649,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -684,6 +685,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index acdfa35f5da8..7d6b5c96adac 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -345,6 +345,10 @@ (flags MemFlags) (mask VecOpMasking) (vstate VState)) + + (EmitIsland + ;; The needed space before the next deadline. + (needed_space u32)) )) (type AtomicOP (enum diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 2951e2a8f6db..17b4c2c165ed 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -210,6 +210,7 @@ impl Inst { // some cases. Inst::VecLoad { vstate, .. } | Inst::VecStore { vstate, .. } => Some(vstate), + Inst::EmitIsland { .. } => None, } } } @@ -1133,6 +1134,18 @@ impl Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + // Use x12 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(12)), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + *start_off = sink.cur_offset(); } &Inst::CallInd { ref info } => { Inst::Jalr { @@ -1155,6 +1168,18 @@ impl Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + // Use x12 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(12)), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + *start_off = sink.cur_offset(); } &Inst::ReturnCall { ref info } => { @@ -2577,7 +2602,14 @@ impl Inst { to.nf(), )); } - }; + + Inst::EmitIsland { needed_space } => { + let jump_around_label = sink.get_label(); + Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); + sink.emit_island(needed_space + 4, &mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 6e593f6901c6..090585b3eb9e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -335,8 +335,11 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -348,8 +351,11 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -680,6 +686,7 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(from); vec_mask_operands(mask, collector); } + Inst::EmitIsland { .. } => {} } } @@ -1612,6 +1619,9 @@ impl Inst { format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") } + Inst::EmitIsland { needed_space } => { + format!("emit_island {needed_space}") + } } } } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 845599cb04a6..62818e6fdfb8 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -914,6 +914,7 @@ impl ABIMachineSpec for S390xMachineDeps { _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, mut outgoing_args_size: u32, ) -> FrameLayout { @@ -985,6 +986,7 @@ impl ABIMachineSpec for S390xMachineDeps { setup_area_size: 0, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 139255c2657c..09245bf056a9 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -879,9 +879,14 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + collector.reg_fixed_def(vreg, *preg); + } + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(clobbers); } @@ -899,9 +904,14 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + collector.reg_fixed_def(vreg, *preg); + } + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(clobbers); } diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index c65aa9c2184a..c3e241f859fa 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -11,7 +11,7 @@ use crate::isa::s390x::inst::{ ReturnCallInfo, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, WritableRegPair, }; use crate::isa::s390x::S390xBackend; -use crate::machinst::isle::*; +use crate::machinst::{isle::*, RetLocation}; use crate::machinst::{CallInfo, MachLabel, Reg}; use crate::{ ir::{ @@ -135,7 +135,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { let value_regs = self.lower_ctx.alloc_tmp(ty); defs.push(CallRetPair { vreg: value_regs.only_reg().unwrap(), - preg: reg.into(), + location: RetLocation::Reg(reg.into()), }); } _ => {} @@ -149,8 +149,10 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { fn defs_lookup(&mut self, defs: &CallRetList, reg: RealReg) -> Reg { let reg = Reg::from(reg); for def in defs { - if def.preg == reg { - return def.vreg.to_reg(); + if let RetLocation::Reg(preg) = def.location { + if preg == reg { + return def.vreg.to_reg(); + } } } unreachable!() diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index c911c97014be..dda711ce535d 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -929,6 +929,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -968,6 +969,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } @@ -1126,7 +1128,8 @@ fn get_intreg_for_retval( 5 => Some(regs::r8()), 6 => Some(regs::r9()), 7 => Some(regs::r10()), - 8 => Some(regs::r11()), + // NB: `r11` is reserved as a scratch register that is + // also part of the clobber set. // NB: `r15` is reserved as a scratch register. _ => None, }, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b812fab48b13..f105f793f1e7 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1624,6 +1624,17 @@ pub(crate) fn emit( ) .emit(sink, info, state); } + + // Load any stack-carried return values. + call_info.emit_retval_loads::( + // Use r11 as a temp if needed: clobbered anyway, and + // not otherwise used as a return value in any of our + // supported calling conventions. + Writable::from_reg(regs::r11()), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, info, state), + |_space_needed| None, + ); } Inst::ReturnCallKnown { info: call_info } => { @@ -1706,6 +1717,17 @@ pub(crate) fn emit( ) .emit(sink, info, state); } + + // Load any stack-carried return values. + call_info.emit_retval_loads::( + // Use r11 as a temp if needed: clobbered anyway, and + // not otherwise used as a return value in any of our + // supported calling conventions. + Writable::from_reg(regs::r11()), + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, info, state), + |_space_needed| None, + ); } Inst::Args { .. } => {} diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 033e96c8f96a..3a8f873c75bc 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2445,8 +2445,11 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(*clobbers); } @@ -2472,8 +2475,11 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(*clobbers); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index eea24464a443..d8b0d4ce9eaa 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -179,20 +179,14 @@ fn emit_vm_call( abi.gen_arg(ctx, i, ValueRegs::one(*input)); } - let mut retval_insts: SmallInstVec<_> = smallvec![]; let mut outputs: SmallVec<[_; 1]> = smallvec![]; for i in 0..ctx.sigs().num_rets(ctx.sigs().abi_sig_for_signature(&sig)) { - let (retval_inst, retval_regs) = abi.gen_retval(ctx, i); - retval_insts.extend(retval_inst.into_iter()); + let retval_regs = abi.gen_retval(ctx, i); outputs.push(retval_regs.only_reg().unwrap()); } abi.emit_call(ctx); - for inst in retval_insts { - ctx.emit(inst); - } - Ok(outputs) } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index fbe14d6c2cb1..dfa9d515212d 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -282,6 +282,20 @@ pub enum StackAMode { OutgoingArg(i64), } +impl StackAMode { + fn offset_by(&self, offset: u32) -> Self { + match self { + StackAMode::IncomingArg(off, size) => { + StackAMode::IncomingArg(off.checked_add(i64::from(offset)).unwrap(), *size) + } + StackAMode::Slot(off) => StackAMode::Slot(off.checked_add(i64::from(offset)).unwrap()), + StackAMode::OutgoingArg(off) => { + StackAMode::OutgoingArg(off.checked_add(i64::from(offset)).unwrap()) + } + } + } +} + /// Trait implemented by machine-specific backend to represent ISA flags. pub trait IsaFlags: Clone { /// Get a flag indicating whether forward-edge CFI is enabled. @@ -479,6 +493,7 @@ pub trait ABIMachineSpec { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout; @@ -1018,6 +1033,9 @@ pub struct FrameLayout { /// This contains stack slots and spill slots. pub fixed_frame_storage_size: u32, + /// The size of all stackslots. + pub stackslots_size: u32, + /// Stack size to be reserved for outgoing arguments, if used by /// the current ABI, or 0 otherwise. After gen_clobber_save and /// before gen_clobber_restore, the stack pointer points to the @@ -1760,6 +1778,7 @@ impl Callee { self.is_leaf, self.stack_args_size(sigs), self.tail_args_size, + self.stackslots_size, total_stacksize, self.outgoing_args_size, )); @@ -1962,13 +1981,23 @@ pub struct CallArgPair { } /// An output return value from a call instruction: the vreg that is -/// defined, and the preg it is constrained to (per the ABI). +/// defined, and the preg or stack location it is constrained to (per +/// the ABI). #[derive(Clone, Debug)] pub struct CallRetPair { /// The virtual register to define from this return value. pub vreg: Writable, /// The real register from which the return value is read. - pub preg: Reg, + pub location: RetLocation, +} + +/// A location to load a return-value from after a call completes. +#[derive(Clone, Debug)] +pub enum RetLocation { + /// A physical register. + Reg(Reg), + /// A stack location, identified by a `StackAMode`. + Stack(StackAMode, Type), } pub type CallArgList = SmallVec<[CallArgPair; 8]>; @@ -2297,12 +2326,7 @@ impl CallSite { } /// Define a return value after the call returns. - pub fn gen_retval( - &mut self, - ctx: &mut Lower, - idx: usize, - ) -> (SmallInstVec, ValueRegs) { - let mut insts = smallvec![]; + pub fn gen_retval(&mut self, ctx: &mut Lower, idx: usize) -> ValueRegs { let mut into_regs: SmallVec<[Reg; 2]> = smallvec![]; let ret = ctx.sigs().rets(self.sig)[idx].clone(); match ret { @@ -2315,7 +2339,7 @@ impl CallSite { let into_reg = ctx.alloc_tmp(ty).only_reg().unwrap(); self.defs.push(CallRetPair { vreg: into_reg, - preg: reg.into(), + location: RetLocation::Reg(reg.into()), }); into_regs.push(into_reg.to_reg()); } @@ -2326,11 +2350,11 @@ impl CallSite { // ensuring that the return values will be in a consistent place after // any call. let ret_area_base = sig_data.sized_stack_arg_space(); - insts.push(M::gen_load_stack( - StackAMode::OutgoingArg(offset + ret_area_base), - into_reg, - ty, - )); + let amode = StackAMode::OutgoingArg(offset + ret_area_base); + self.defs.push(CallRetPair { + vreg: into_reg, + location: RetLocation::Stack(amode, ty), + }); into_regs.push(into_reg.to_reg()); } } @@ -2349,7 +2373,7 @@ impl CallSite { [a, b] => ValueRegs::two(a, b), _ => panic!("Expected to see one or two slots only from {ret:?}"), }; - (insts, value_regs) + value_regs } /// Emit the call itself. @@ -2386,7 +2410,9 @@ impl CallSite { // Remove retval regs from clobbers. for def in &defs { - clobbers.remove(PReg::from(def.preg.to_real_reg().unwrap())); + if let RetLocation::Reg(preg) = def.location { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + } } clobbers @@ -2438,6 +2464,75 @@ impl CallSite { } } +impl CallInfo { + /// Emit loads for any stack-carried return values using the call + /// info and allocations. + pub fn emit_retval_loads< + M: ABIMachineSpec, + EmitFn: FnMut(M::I), + IslandFn: Fn(u32) -> Option, + >( + &self, + temp: Writable, + stackslots_size: u32, + mut emit: EmitFn, + emit_island: IslandFn, + ) { + // Count stack-ret locations and emit an island to account for + // this space usage. + let mut space_needed = 0; + for CallRetPair { location, .. } in &self.defs { + if let RetLocation::Stack(..) = location { + // Assume up to ten instructions, semi-arbitrarily: + // load from stack, store to spillslot, codegen of + // large offsets on RISC ISAs. + space_needed += 10 * M::I::worst_case_size(); + } + } + if space_needed > 0 { + if let Some(island_inst) = emit_island(space_needed) { + emit(island_inst); + } + } + + for CallRetPair { vreg, location } in &self.defs { + if let RetLocation::Stack(amode, ty) = location { + if let Some(spillslot) = vreg.to_reg().to_spillslot() { + // `temp` is an integer register of machine word + // width, but `ty` may be floating-point/vector, + // which (i) may not be loadable directly into an + // int reg, and (ii) may be wider than a machine + // word. For simplicity, and because there are not + // always easy choices for volatile float/vec regs + // (see e.g. x86-64, where fastcall clobbers only + // xmm0-xmm5, but tail uses xmm0-xmm7 for + // returns), we use the integer temp register in + // steps. + let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes(); + for part in 0..parts { + emit(M::gen_load_stack( + amode.offset_by(part * M::word_bytes()), + temp, + M::word_type(), + )); + emit(M::gen_store_stack( + StackAMode::Slot( + i64::from(stackslots_size) + + i64::from(M::word_bytes()) + * ((spillslot.index() as i64) + (part as i64)), + ), + temp.to_reg(), + M::word_type(), + )); + } + } else { + emit(M::gen_load_stack(*amode, *vreg, *ty)); + } + } + } + } +} + #[cfg(test)] mod tests { use super::SigData; diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 8875097f1c51..8066d89d7748 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -889,26 +889,19 @@ pub fn gen_call_common( gen_call_common_args(ctx, &mut caller, args); // Handle retvals prior to emitting call, so the - // constraints are on the call instruction; but buffer the - // instructions till after the call. + // constraints are on the call instruction. let mut outputs = InstOutput::new(); - let mut retval_insts = crate::machinst::abi::SmallInstVec::new(); // We take the *last* `num_rets` returns of the sig: // this skips a StructReturn, if any, that is present. let sigdata_num_rets = caller.num_rets(ctx.sigs()); debug_assert!(num_rets <= sigdata_num_rets); for i in (sigdata_num_rets - num_rets)..sigdata_num_rets { - let (retval_inst, retval_regs) = caller.gen_retval(ctx, i); - retval_insts.extend(retval_inst.into_iter()); + let retval_regs = caller.gen_retval(ctx, i); outputs.push(retval_regs); } caller.emit_call(ctx); - for inst in retval_insts { - ctx.emit(inst); - } - outputs } diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 2670e3ad12c3..5a6ea1f9b326 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -38,27 +38,41 @@ pub fn first_user_vreg_index() -> usize { PINNED_VREGS } -/// A register named in an instruction. This register can be either a -/// virtual register or a fixed physical register. It does not have -/// any constraints applied to it: those can be added later in -/// `MachInst::get_operands()` when the `Reg`s are converted to -/// `Operand`s. +/// A register named in an instruction. This register can be a virtual +/// register, a fixed physical register, or a named spillslot (after +/// regalloc). It does not have any constraints applied to it: those +/// can be added later in `MachInst::get_operands()` when the `Reg`s +/// are converted to `Operand`s. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Reg(VReg); +pub struct Reg(u32); + +const REG_SPILLSLOT_BIT: u32 = 0x8000_0000; +const REG_SPILLSLOT_MASK: u32 = !REG_SPILLSLOT_BIT; impl Reg { /// Get the physical register (`RealReg`), if this register is /// one. pub fn to_real_reg(self) -> Option { - pinned_vreg_to_preg(self.0).map(RealReg) + pinned_vreg_to_preg(self.0.into()).map(RealReg) } /// Get the virtual (non-physical) register, if this register is /// one. pub fn to_virtual_reg(self) -> Option { - if pinned_vreg_to_preg(self.0).is_none() { - Some(VirtualReg(self.0)) + if self.to_spillslot().is_some() { + None + } else if pinned_vreg_to_preg(self.0.into()).is_none() { + Some(VirtualReg(self.0.into())) + } else { + None + } + } + + /// Get the spillslot, if this register is one. + pub fn to_spillslot(self) -> Option { + if (self.0 & REG_SPILLSLOT_BIT) != 0 { + Some(SpillSlot::new((self.0 & REG_SPILLSLOT_MASK) as usize)) } else { None } @@ -66,7 +80,8 @@ impl Reg { /// Get the class of this register. pub fn class(self) -> RegClass { - self.0.class() + assert!(!self.to_spillslot().is_some()); + VReg::from(self.0).class() } /// Is this a real (physical) reg? @@ -78,12 +93,19 @@ impl Reg { pub fn is_virtual(self) -> bool { self.to_virtual_reg().is_some() } + + /// Is this a spillslot? + pub fn is_spillslot(self) -> bool { + self.to_spillslot().is_some() + } } impl std::fmt::Debug for Reg { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if self.0 == VReg::invalid() { + if VReg::from(self.0) == VReg::invalid() { write!(f, "") + } else if let Some(spillslot) = self.to_spillslot() { + write!(f, "{spillslot}") } else if let Some(rreg) = self.to_real_reg() { let preg: PReg = rreg.into(); write!(f, "{preg}") @@ -197,7 +219,7 @@ impl Writable { impl std::convert::From for Reg { fn from(vreg: regalloc2::VReg) -> Reg { - Reg(vreg) + Reg(vreg.bits() as u32) } } @@ -213,12 +235,12 @@ impl std::convert::From for regalloc2::VReg { /// registers also map to particular (special) VRegs, so this /// method can be used either on virtual or physical `Reg`s. fn from(reg: Reg) -> regalloc2::VReg { - reg.0 + reg.0.into() } } impl std::convert::From<&Reg> for regalloc2::VReg { fn from(reg: &Reg) -> regalloc2::VReg { - reg.0 + reg.0.into() } } @@ -256,19 +278,25 @@ impl std::convert::From for Reg { impl std::convert::From for Reg { fn from(reg: RealReg) -> Reg { - Reg(reg.into()) + Reg(VReg::from(reg).bits() as u32) } } impl std::convert::From for Reg { fn from(reg: VirtualReg) -> Reg { - Reg(reg.0) + Reg(reg.0.bits() as u32) } } /// A spill slot. pub type SpillSlot = regalloc2::SpillSlot; +impl std::convert::From for Reg { + fn from(spillslot: regalloc2::SpillSlot) -> Reg { + Reg(REG_SPILLSLOT_BIT | spillslot.index() as u32) + } +} + /// A register class. Each register in the ISA has one class, and the /// classes are disjoint. Most modern ISAs will have just two classes: /// the integer/general-purpose registers (GPRs), and the float/vector @@ -429,6 +457,19 @@ pub trait OperandVisitorImpl: OperandVisitor { self.add_operand(reg, constraint, OperandKind::Def, OperandPos::Late); } } + + /// Add a def that can be allocated to either a register or a + /// spillslot, at the end of the instruction (`After` + /// position). Use only when this def will be written after all + /// uses are read. + fn any_def(&mut self, reg: &mut Writable>) { + self.add_operand( + reg.reg.as_mut(), + OperandConstraint::Any, + OperandKind::Def, + OperandPos::Late, + ); + } } impl OperandVisitorImpl for T {} @@ -441,9 +482,10 @@ impl<'a, F: Fn(VReg) -> VReg> OperandVisitor for OperandCollector<'a, F> { kind: OperandKind, pos: OperandPos, ) { - reg.0 = (self.renamer)(reg.0); + debug_assert!(!reg.is_spillslot()); + reg.0 = (self.renamer)(VReg::from(reg.0)).bits() as u32; self.operands - .push(Operand::new(reg.0, constraint, kind, pos)); + .push(Operand::new(VReg::from(reg.0), constraint, kind, pos)); } fn debug_assert_is_allocatable_preg(&self, reg: PReg, expected: bool) { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 45c58a78ae83..cee1d0009e1d 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -933,17 +933,19 @@ impl VCode { let mut allocs = regalloc.inst_allocs(iix).iter(); self.insts[iix.index()].get_operands( &mut |reg: &mut Reg, constraint, _kind, _pos| { - let alloc = allocs - .next() - .expect("enough allocations for all operands") - .as_reg() - .expect("only register allocations, not stack allocations") - .into(); - - if let OperandConstraint::FixedReg(rreg) = constraint { - debug_assert_eq!(Reg::from(rreg), alloc); + let alloc = + allocs.next().expect("enough allocations for all operands"); + + if let Some(alloc) = alloc.as_reg() { + let alloc: Reg = alloc.into(); + if let OperandConstraint::FixedReg(rreg) = constraint { + debug_assert_eq!(Reg::from(rreg), alloc); + } + *reg = alloc; + } else if let Some(alloc) = alloc.as_stack() { + let alloc: Reg = alloc.into(); + *reg = alloc; } - *reg = alloc; }, ); debug_assert!(allocs.next().is_none()); diff --git a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif index 41045f0acb7b..b650b9285764 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif @@ -380,32 +380,13 @@ block0: ; VCode: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; sub sp, sp, #160 +; sub sp, sp, #240 ; block0: ; mov x8, sp ; load_ext_name x12, TestCase(%tail_callee_stack_rets)+0 ; blr x12 -; ldr x9, [sp] -; ldr x11, [sp, #8] -; ldr x13, [sp, #16] -; ldr x15, [sp, #24] -; ldr x1, [sp, #32] -; ldr x3, [sp, #40] -; ldr x5, [sp, #48] -; ldr x7, [sp, #56] -; ldr x9, [sp, #64] -; ldr x11, [sp, #72] -; ldr x13, [sp, #80] -; ldr x15, [sp, #88] -; ldr x1, [sp, #96] -; ldr x3, [sp, #104] -; ldr x5, [sp, #112] -; ldr x7, [sp, #120] -; ldr x9, [sp, #128] -; ldr x11, [sp, #136] -; ldr x13, [sp, #144] -; ldr x2, [sp, #152] -; add sp, sp, #160 +; ldr x2, [sp, #232] +; add sp, sp, #240 ; ldp fp, lr, [sp], #16 ; ret ; @@ -413,7 +394,7 @@ block0: ; block0: ; offset 0x0 ; stp x29, x30, [sp, #-0x10]! ; mov x29, sp -; sub sp, sp, #0xa0 +; sub sp, sp, #0xf0 ; block1: ; offset 0xc ; mov x8, sp ; ldr x12, #0x18 @@ -422,26 +403,37 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x12 ; ldur x9, [sp] -; ldur x11, [sp, #8] -; ldur x13, [sp, #0x10] -; ldur x15, [sp, #0x18] -; ldur x1, [sp, #0x20] -; ldur x3, [sp, #0x28] -; ldur x5, [sp, #0x30] -; ldur x7, [sp, #0x38] +; stur x9, [sp, #0xa0] +; ldur x9, [sp, #8] +; stur x9, [sp, #0xa8] +; ldur x9, [sp, #0x10] +; stur x9, [sp, #0xb0] +; ldur x9, [sp, #0x18] +; stur x9, [sp, #0xb8] +; ldur x9, [sp, #0x20] +; stur x9, [sp, #0xc0] +; ldur x9, [sp, #0x28] +; stur x9, [sp, #0xc8] +; ldur x9, [sp, #0x30] +; stur x9, [sp, #0xd0] +; ldur x9, [sp, #0x38] +; stur x9, [sp, #0xd8] ; ldur x9, [sp, #0x40] -; ldur x11, [sp, #0x48] -; ldur x13, [sp, #0x50] -; ldur x15, [sp, #0x58] -; ldur x1, [sp, #0x60] -; ldur x3, [sp, #0x68] -; ldur x5, [sp, #0x70] -; ldur x7, [sp, #0x78] -; ldur x9, [sp, #0x80] -; ldur x11, [sp, #0x88] -; ldur x13, [sp, #0x90] -; ldur x2, [sp, #0x98] -; add sp, sp, #0xa0 +; stur x9, [sp, #0xe0] +; ldur x25, [sp, #0x48] +; ldur x26, [sp, #0x50] +; ldur x27, [sp, #0x58] +; ldur x28, [sp, #0x60] +; ldur x21, [sp, #0x68] +; ldur x19, [sp, #0x70] +; ldur x20, [sp, #0x78] +; ldur x22, [sp, #0x80] +; ldur x23, [sp, #0x88] +; ldur x24, [sp, #0x90] +; ldur x9, [sp, #0x98] +; stur x9, [sp, #0xe8] +; ldur x2, [sp, #0xe8] +; add sp, sp, #0xf0 ; ldp x29, x30, [sp], #0x10 ; ret @@ -620,7 +612,7 @@ block0: ; stp x23, x24, [sp, #-16]! ; stp x21, x22, [sp, #-16]! ; stp x19, x20, [sp, #-16]! -; sub sp, sp, #320 +; sub sp, sp, #400 ; block0: ; movz x2, #10 ; movz x3, #15 @@ -671,27 +663,8 @@ block0: ; add x8, sp, #160 ; load_ext_name x10, TestCase(%tail_callee_stack_args_and_rets)+0 ; blr x10 -; ldr x7, [sp, #160] -; ldr x9, [sp, #168] -; ldr x11, [sp, #176] -; ldr x13, [sp, #184] -; ldr x15, [sp, #192] -; ldr x1, [sp, #200] -; ldr x3, [sp, #208] -; ldr x5, [sp, #216] -; ldr x7, [sp, #224] -; ldr x9, [sp, #232] -; ldr x11, [sp, #240] -; ldr x13, [sp, #248] -; ldr x15, [sp, #256] -; ldr x1, [sp, #264] -; ldr x3, [sp, #272] -; ldr x5, [sp, #280] -; ldr x7, [sp, #288] -; ldr x9, [sp, #296] -; ldr x11, [sp, #304] -; ldr x2, [sp, #312] -; add sp, sp, #320 +; ldr x2, [sp, #392] +; add sp, sp, #400 ; ldp x19, x20, [sp], #16 ; ldp x21, x22, [sp], #16 ; ldp x23, x24, [sp], #16 @@ -709,7 +682,7 @@ block0: ; stp x23, x24, [sp, #-0x10]! ; stp x21, x22, [sp, #-0x10]! ; stp x19, x20, [sp, #-0x10]! -; sub sp, sp, #0x140 +; sub sp, sp, #0x190 ; block1: ; offset 0x20 ; mov x2, #0xa ; mov x3, #0xf @@ -764,27 +737,38 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x10 ; sub sp, sp, #0xa0 -; ldur x7, [sp, #0xa0] +; ldur x9, [sp, #0xa0] +; str x9, [sp, #0x140] ; ldur x9, [sp, #0xa8] -; ldur x11, [sp, #0xb0] -; ldur x13, [sp, #0xb8] -; ldur x15, [sp, #0xc0] -; ldur x1, [sp, #0xc8] -; ldur x3, [sp, #0xd0] -; ldur x5, [sp, #0xd8] -; ldur x7, [sp, #0xe0] -; ldur x9, [sp, #0xe8] -; ldur x11, [sp, #0xf0] -; ldur x13, [sp, #0xf8] -; ldr x15, [sp, #0x100] -; ldr x1, [sp, #0x108] -; ldr x3, [sp, #0x110] -; ldr x5, [sp, #0x118] -; ldr x7, [sp, #0x120] -; ldr x9, [sp, #0x128] -; ldr x11, [sp, #0x130] -; ldr x2, [sp, #0x138] -; add sp, sp, #0x140 +; str x9, [sp, #0x148] +; ldur x9, [sp, #0xb0] +; str x9, [sp, #0x150] +; ldur x9, [sp, #0xb8] +; str x9, [sp, #0x158] +; ldur x9, [sp, #0xc0] +; str x9, [sp, #0x160] +; ldur x9, [sp, #0xc8] +; str x9, [sp, #0x168] +; ldur x9, [sp, #0xd0] +; str x9, [sp, #0x170] +; ldur x9, [sp, #0xd8] +; str x9, [sp, #0x178] +; ldur x9, [sp, #0xe0] +; str x9, [sp, #0x180] +; ldur x25, [sp, #0xe8] +; ldur x26, [sp, #0xf0] +; ldur x27, [sp, #0xf8] +; ldr x28, [sp, #0x100] +; ldr x21, [sp, #0x108] +; ldr x19, [sp, #0x110] +; ldr x20, [sp, #0x118] +; ldr x22, [sp, #0x120] +; ldr x23, [sp, #0x128] +; ldr x24, [sp, #0x130] +; ldr x9, [sp, #0x138] +; str x9, [sp, #0x188] +; ldr x2, [sp, #0x188] +; add sp, sp, #0x190 ; ldp x19, x20, [sp], #0x10 ; ldp x21, x22, [sp], #0x10 ; ldp x23, x24, [sp], #0x10 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6231d59c0dc0..943300b2f3df 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -214,80 +214,71 @@ block0: } ; VCode: -; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; xmov x21, x12 -; x28 = xload64 OutgoingArg(0) // flags = notrap aligned -; x16 = xload64 OutgoingArg(8) // flags = notrap aligned -; x12 = xload64 OutgoingArg(16) // flags = notrap aligned -; x15 = xload64 OutgoingArg(24) // flags = notrap aligned -; x22 = xload64 OutgoingArg(32) // flags = notrap aligned -; x24 = xload64 OutgoingArg(40) // flags = notrap aligned -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; ret ; ; Disassembled: -; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28 +; push_frame_save 112, x16, x17, x18, x19, x26, x27, x28, x29 ; xmov x12, sp ; call1 x12, 0x0 // target = 0x8 -; xmov x21, x12 -; xload64le_o32 x28, sp, 0 -; xload64le_o32 x16, sp, 8 -; xload64le_o32 x12, sp, 16 -; xload64le_o32 x15, sp, 24 -; xload64le_o32 x22, sp, 32 -; xload64le_o32 x24, sp, 40 -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28 +; jump 0x5 // target = 0x13 +; xload64le_o32 x27, sp, 0 +; xload64le_o32 x19, sp, 8 +; xload64le_o32 x29, sp, 16 +; xload64le_o32 x16, sp, 24 +; xload64le_o32 x17, sp, 32 +; xload64le_o32 x18, sp, 40 +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, x16, x17, x18, x19, x26, x27, x28, x29 ; ret function %call_indirect(i32) -> i64 { @@ -301,7 +292,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index bde96cabe9a0..fb2c0e4c4cc7 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -214,80 +214,71 @@ block0: } ; VCode: -; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; xmov x21, x12 -; x28 = xload64 OutgoingArg(0) // flags = notrap aligned -; x16 = xload64 OutgoingArg(8) // flags = notrap aligned -; x12 = xload64 OutgoingArg(16) // flags = notrap aligned -; x15 = xload64 OutgoingArg(24) // flags = notrap aligned -; x22 = xload64 OutgoingArg(32) // flags = notrap aligned -; x24 = xload64 OutgoingArg(40) // flags = notrap aligned -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; ret ; ; Disassembled: -; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28 +; push_frame_save 112, x16, x17, x18, x19, x26, x27, x28, x29 ; xmov x12, sp ; call1 x12, 0x0 // target = 0x8 -; xmov x21, x12 -; xload64le_o32 x28, sp, 0 -; xload64le_o32 x16, sp, 8 -; xload64le_o32 x12, sp, 16 -; xload64le_o32 x15, sp, 24 -; xload64le_o32 x22, sp, 32 -; xload64le_o32 x24, sp, 40 -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28 +; jump 0x5 // target = 0x13 +; xload64le_o32 x27, sp, 0 +; xload64le_o32 x19, sp, 8 +; xload64le_o32 x29, sp, 16 +; xload64le_o32 x16, sp, 24 +; xload64le_o32 x17, sp, 32 +; xload64le_o32 x18, sp, 40 +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, x16, x17, x18, x19, x26, x27, x28, x29 ; ret function %call_indirect(i64) -> i64 { @@ -301,7 +292,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -397,7 +388,7 @@ block0(v0: i32): ; xstore64 sp+1000008, x20 // flags = notrap aligned ; block0: ; xmov x20, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x5, x20 ; xadd32 x0, x5, x0 ; x20 = xload64 sp+1000008 // flags = notrap aligned diff --git a/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif index b276f8865781..219c0e01849d 100644 --- a/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif @@ -475,36 +475,35 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; addi sp,sp,-192 +; addi sp,sp,-400 +; sd s1,392(sp) +; sd s2,384(sp) +; sd s3,376(sp) +; sd s4,368(sp) +; sd s5,360(sp) +; sd s6,352(sp) +; sd s7,344(sp) +; sd s8,336(sp) +; sd s9,328(sp) +; sd s10,320(sp) +; sd s11,312(sp) ; block0: ; load_addr a0,0(sp) ; load_sym a4,%tail_callee_stack_rets+0 ; callind a4 -; ld a5,0(sp) -; ld a1,8(sp) -; ld a3,16(sp) -; ld a5,24(sp) -; ld a1,32(sp) -; ld a3,40(sp) -; ld a5,48(sp) -; ld a1,56(sp) -; ld a3,64(sp) -; ld a5,72(sp) -; ld a1,80(sp) -; ld a3,88(sp) -; ld a5,96(sp) -; ld a1,104(sp) -; ld a3,112(sp) -; ld a5,120(sp) -; ld a1,128(sp) -; ld a3,136(sp) -; ld a5,144(sp) -; ld a1,152(sp) -; ld a3,160(sp) -; ld a5,168(sp) -; ld a1,176(sp) -; ld a0,184(sp) -; addi sp,sp,192 +; ld a0,96(slot) +; ld s1,392(sp) +; ld s2,384(sp) +; ld s3,376(sp) +; ld s4,368(sp) +; ld s5,360(sp) +; ld s6,352(sp) +; ld s7,344(sp) +; ld s8,336(sp) +; ld s9,328(sp) +; ld s10,320(sp) +; ld s11,312(sp) +; addi sp,sp,400 ; ld ra,8(sp) ; ld fp,0(sp) ; addi sp,sp,16 @@ -516,8 +515,19 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; addi sp, sp, -0xc0 -; block1: ; offset 0x14 +; addi sp, sp, -0x190 +; sd s1, 0x188(sp) +; sd s2, 0x180(sp) +; sd s3, 0x178(sp) +; sd s4, 0x170(sp) +; sd s5, 0x168(sp) +; sd s6, 0x160(sp) +; sd s7, 0x158(sp) +; sd s8, 0x150(sp) +; sd s9, 0x148(sp) +; sd s10, 0x140(sp) +; sd s11, 0x138(sp) +; block1: ; offset 0x40 ; mv a0, sp ; auipc a4, 0 ; ld a4, 0xc(a4) @@ -525,31 +535,57 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; jalr a4 -; ld a5, 0(sp) -; ld a1, 8(sp) -; ld a3, 0x10(sp) -; ld a5, 0x18(sp) -; ld a1, 0x20(sp) -; ld a3, 0x28(sp) -; ld a5, 0x30(sp) -; ld a1, 0x38(sp) -; ld a3, 0x40(sp) -; ld a5, 0x48(sp) -; ld a1, 0x50(sp) -; ld a3, 0x58(sp) -; ld a5, 0x60(sp) -; ld a1, 0x68(sp) -; ld a3, 0x70(sp) -; ld a5, 0x78(sp) -; ld a1, 0x80(sp) -; ld a3, 0x88(sp) -; ld a5, 0x90(sp) -; ld a1, 0x98(sp) -; ld a3, 0xa0(sp) -; ld a5, 0xa8(sp) -; ld a1, 0xb0(sp) -; ld a0, 0xb8(sp) -; addi sp, sp, 0xc0 +; j 4 +; ld a2, 0(sp) +; sd a2, 0xc0(sp) +; ld a2, 8(sp) +; sd a2, 0xc8(sp) +; ld a2, 0x10(sp) +; sd a2, 0xd0(sp) +; ld a2, 0x18(sp) +; sd a2, 0xd8(sp) +; ld a2, 0x20(sp) +; sd a2, 0xe0(sp) +; ld a2, 0x28(sp) +; sd a2, 0xe8(sp) +; ld a2, 0x30(sp) +; sd a2, 0xf0(sp) +; ld a2, 0x38(sp) +; sd a2, 0xf8(sp) +; ld a2, 0x40(sp) +; sd a2, 0x100(sp) +; ld a2, 0x48(sp) +; sd a2, 0x108(sp) +; ld a2, 0x50(sp) +; sd a2, 0x110(sp) +; ld a2, 0x58(sp) +; sd a2, 0x118(sp) +; ld s8, 0x60(sp) +; ld s9, 0x68(sp) +; ld s10, 0x70(sp) +; ld s11, 0x78(sp) +; ld s7, 0x80(sp) +; ld s6, 0x88(sp) +; ld s5, 0x90(sp) +; ld s4, 0x98(sp) +; ld s3, 0xa0(sp) +; ld s2, 0xa8(sp) +; ld s1, 0xb0(sp) +; ld a2, 0xb8(sp) +; sd a2, 0x120(sp) +; ld a0, 0x120(sp) +; ld s1, 0x188(sp) +; ld s2, 0x180(sp) +; ld s3, 0x178(sp) +; ld s4, 0x170(sp) +; ld s5, 0x168(sp) +; ld s6, 0x160(sp) +; ld s7, 0x158(sp) +; ld s8, 0x150(sp) +; ld s9, 0x148(sp) +; ld s10, 0x140(sp) +; ld s11, 0x138(sp) +; addi sp, sp, 0x190 ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10 @@ -773,23 +809,23 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; addi sp,sp,-464 -; sd s1,456(sp) -; sd s2,448(sp) -; sd s3,440(sp) -; sd s4,432(sp) -; sd s5,424(sp) -; sd s6,416(sp) -; sd s7,408(sp) -; sd s8,400(sp) -; sd s9,392(sp) -; sd s10,384(sp) -; sd s11,376(sp) +; addi sp,sp,-560 +; sd s1,552(sp) +; sd s2,544(sp) +; sd s3,536(sp) +; sd s4,528(sp) +; sd s5,520(sp) +; sd s6,512(sp) +; sd s7,504(sp) +; sd s8,496(sp) +; sd s9,488(sp) +; sd s10,480(sp) +; sd s11,472(sp) ; block0: ; li a2,10 -; sd a2,8(slot) +; sd a2,0(slot) ; li a3,15 -; sd a3,0(slot) +; sd a3,96(slot) ; li a3,20 ; li a4,25 ; li a5,30 @@ -835,45 +871,22 @@ block0: ; sd a2,144(sp) ; load_addr a0,160(sp) ; load_sym t1,%tail_callee_stack_args_and_rets+0 -; ld a1,8(slot) -; ld a2,0(slot) +; ld a1,0(slot) +; ld a2,96(slot) ; callind t1 -; ld a2,160(sp) -; ld a4,168(sp) -; ld a0,176(sp) -; ld a2,184(sp) -; ld a4,192(sp) -; ld a0,200(sp) -; ld a2,208(sp) -; ld a4,216(sp) -; ld a0,224(sp) -; ld a2,232(sp) -; ld a4,240(sp) -; ld a0,248(sp) -; ld a2,256(sp) -; ld a4,264(sp) -; ld a0,272(sp) -; ld a2,280(sp) -; ld a4,288(sp) -; ld a0,296(sp) -; ld a2,304(sp) -; ld a4,312(sp) -; ld a0,320(sp) -; ld a2,328(sp) -; ld a4,336(sp) -; ld a0,344(sp) -; ld s1,456(sp) -; ld s2,448(sp) -; ld s3,440(sp) -; ld s4,432(sp) -; ld s5,424(sp) -; ld s6,416(sp) -; ld s7,408(sp) -; ld s8,400(sp) -; ld s9,392(sp) -; ld s10,384(sp) -; ld s11,376(sp) -; addi sp,sp,464 +; ld a0,96(slot) +; ld s1,552(sp) +; ld s2,544(sp) +; ld s3,536(sp) +; ld s4,528(sp) +; ld s5,520(sp) +; ld s6,512(sp) +; ld s7,504(sp) +; ld s8,496(sp) +; ld s9,488(sp) +; ld s10,480(sp) +; ld s11,472(sp) +; addi sp,sp,560 ; ld ra,8(sp) ; ld fp,0(sp) ; addi sp,sp,16 @@ -885,23 +898,23 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; addi sp, sp, -0x1d0 -; sd s1, 0x1c8(sp) -; sd s2, 0x1c0(sp) -; sd s3, 0x1b8(sp) -; sd s4, 0x1b0(sp) -; sd s5, 0x1a8(sp) -; sd s6, 0x1a0(sp) -; sd s7, 0x198(sp) -; sd s8, 0x190(sp) -; sd s9, 0x188(sp) -; sd s10, 0x180(sp) -; sd s11, 0x178(sp) +; addi sp, sp, -0x230 +; sd s1, 0x228(sp) +; sd s2, 0x220(sp) +; sd s3, 0x218(sp) +; sd s4, 0x210(sp) +; sd s5, 0x208(sp) +; sd s6, 0x200(sp) +; sd s7, 0x1f8(sp) +; sd s8, 0x1f0(sp) +; sd s9, 0x1e8(sp) +; sd s10, 0x1e0(sp) +; sd s11, 0x1d8(sp) ; block1: ; offset 0x40 ; addi a2, zero, 0xa -; sd a2, 0x168(sp) +; sd a2, 0x160(sp) ; addi a3, zero, 0xf -; sd a3, 0x160(sp) +; sd a3, 0x1c0(sp) ; addi a3, zero, 0x14 ; addi a4, zero, 0x19 ; addi a5, zero, 0x1e @@ -951,46 +964,61 @@ block0: ; j 0xc ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 -; ld a1, 0x168(sp) -; ld a2, 0x160(sp) +; ld a1, 0x160(sp) +; ld a2, 0x1c0(sp) ; jalr t1 ; addi sp, sp, -0xa0 +; j 4 ; ld a2, 0xa0(sp) -; ld a4, 0xa8(sp) -; ld a0, 0xb0(sp) +; sd a2, 0x160(sp) +; ld a2, 0xa8(sp) +; sd a2, 0x168(sp) +; ld a2, 0xb0(sp) +; sd a2, 0x170(sp) ; ld a2, 0xb8(sp) -; ld a4, 0xc0(sp) -; ld a0, 0xc8(sp) +; sd a2, 0x178(sp) +; ld a2, 0xc0(sp) +; sd a2, 0x180(sp) +; ld a2, 0xc8(sp) +; sd a2, 0x188(sp) ; ld a2, 0xd0(sp) -; ld a4, 0xd8(sp) -; ld a0, 0xe0(sp) +; sd a2, 0x190(sp) +; ld a2, 0xd8(sp) +; sd a2, 0x198(sp) +; ld a2, 0xe0(sp) +; sd a2, 0x1a0(sp) ; ld a2, 0xe8(sp) -; ld a4, 0xf0(sp) -; ld a0, 0xf8(sp) -; ld a2, 0x100(sp) -; ld a4, 0x108(sp) -; ld a0, 0x110(sp) -; ld a2, 0x118(sp) -; ld a4, 0x120(sp) -; ld a0, 0x128(sp) -; ld a2, 0x130(sp) -; ld a4, 0x138(sp) -; ld a0, 0x140(sp) -; ld a2, 0x148(sp) -; ld a4, 0x150(sp) -; ld a0, 0x158(sp) -; ld s1, 0x1c8(sp) -; ld s2, 0x1c0(sp) -; ld s3, 0x1b8(sp) -; ld s4, 0x1b0(sp) -; ld s5, 0x1a8(sp) -; ld s6, 0x1a0(sp) -; ld s7, 0x198(sp) -; ld s8, 0x190(sp) -; ld s9, 0x188(sp) -; ld s10, 0x180(sp) -; ld s11, 0x178(sp) -; addi sp, sp, 0x1d0 +; sd a2, 0x1a8(sp) +; ld a2, 0xf0(sp) +; sd a2, 0x1b0(sp) +; ld a2, 0xf8(sp) +; sd a2, 0x1b8(sp) +; ld s8, 0x100(sp) +; ld s9, 0x108(sp) +; ld s10, 0x110(sp) +; ld s11, 0x118(sp) +; ld s7, 0x120(sp) +; ld s6, 0x128(sp) +; ld s5, 0x130(sp) +; ld s4, 0x138(sp) +; ld s3, 0x140(sp) +; ld s2, 0x148(sp) +; ld s1, 0x150(sp) +; ld a2, 0x158(sp) +; sd a2, 0x1c0(sp) +; ld a0, 0x1c0(sp) +; ld s1, 0x228(sp) +; ld s2, 0x220(sp) +; ld s3, 0x218(sp) +; ld s4, 0x210(sp) +; ld s5, 0x208(sp) +; ld s6, 0x200(sp) +; ld s7, 0x1f8(sp) +; ld s8, 0x1f0(sp) +; ld s9, 0x1e8(sp) +; ld s10, 0x1e0(sp) +; ld s11, 0x1d8(sp) +; addi sp, sp, 0x230 ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10 diff --git a/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif b/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif new file mode 100644 index 000000000000..eecbda69d2b2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif @@ -0,0 +1,179 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +target x86_64 + +function %f(i32) -> i64 { + fn0 = %ext(i32) -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 + +block0(v0: i32): + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20 = call fn0(v0) + + v21 = iadd v1, v2 + v22 = iadd v3, v4 + v23 = iadd v5, v6 + v24 = iadd v7, v8 + v25 = iadd v9, v10 + v26 = iadd v11, v12 + v27 = iadd v13, v14 + v28 = iadd v15, v16 + v29 = iadd v17, v18 + v30 = iadd v19, v20 + + v31 = iadd v21, v22 + v32 = iadd v23, v24 + v33 = iadd v25, v26 + v34 = iadd v27, v28 + v35 = iadd v29, v30 + + v36 = iadd v31, v32 + v37 = iadd v33, v34 + v38 = iadd v35, v36 + v39 = iadd v37, v38 + + return v39 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $304, %rsp +; movq %rbx, 256(%rsp) +; movq %r12, 264(%rsp) +; movq %r13, 272(%rsp) +; movq %r14, 280(%rsp) +; movq %r15, 288(%rsp) +; block0: +; movq %rdi, %rsi +; lea 0(%rsp), %rdi +; load_ext_name %ext+0, %r10 +; call *%r10 +; lea 0(%rax,%rdx,1), %r8 +; lea 0(%rbx,%r15,1), %r9 +; lea 0(%r13,%r12,1), %r10 +; movq rsp(0 + virtual offset), %rcx +; lea 0(%rcx,%r14,1), %r11 +; movq rsp(8 + virtual offset), %rcx +; movq rsp(16 + virtual offset), %rdi +; lea 0(%rcx,%rdi,1), %rsi +; movq rsp(32 + virtual offset), %rdx +; movq rsp(24 + virtual offset), %rdi +; lea 0(%rdi,%rdx,1), %rdi +; movq rsp(40 + virtual offset), %rax +; movq rsp(48 + virtual offset), %rcx +; lea 0(%rax,%rcx,1), %rax +; movq rsp(64 + virtual offset), %rcx +; movq rsp(56 + virtual offset), %rdx +; lea 0(%rdx,%rcx,1), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(72 + virtual offset), %r14 +; lea 0(%r14,%rdx,1), %rdx +; movq rsp(96 + virtual offset), %rbx +; movq rsp(88 + virtual offset), %r13 +; lea 0(%r13,%rbx,1), %r14 +; lea 0(%r8,%r9,1), %r8 +; lea 0(%r10,%r11,1), %r9 +; lea 0(%rsi,%rdi,1), %r10 +; lea 0(%rax,%rcx,1), %r11 +; lea 0(%rdx,%r14,1), %rsi +; lea 0(%r8,%r9,1), %r8 +; lea 0(%r10,%r11,1), %r9 +; lea 0(%rsi,%r8,1), %r8 +; lea 0(%r9,%r8,1), %rax +; movq 256(%rsp), %rbx +; movq 264(%rsp), %r12 +; movq 272(%rsp), %r13 +; movq 280(%rsp), %r14 +; movq 288(%rsp), %r15 +; addq %rsp, $304, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x130, %rsp +; movq %rbx, 0x100(%rsp) +; movq %r12, 0x108(%rsp) +; movq %r13, 0x110(%rsp) +; movq %r14, 0x118(%rsp) +; movq %r15, 0x120(%rsp) +; block1: ; offset 0x33 +; movq %rdi, %rsi +; leaq (%rsp), %rdi +; movabsq $0, %r10 ; reloc_external Abs8 %ext 0 +; callq *%r10 +; movq (%rsp), %rbx +; movq 8(%rsp), %r15 +; movq 0x10(%rsp), %r13 +; movq 0x18(%rsp), %r12 +; movq 0x20(%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 0x28(%rsp), %r14 +; movq 0x30(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0xa0(%rsp) +; movq 0x40(%rsp), %r11 +; movq %r11, 0xa8(%rsp) +; movq 0x48(%rsp), %r11 +; movq %r11, 0xb0(%rsp) +; movq 0x50(%rsp), %r11 +; movq %r11, 0xb8(%rsp) +; movq 0x58(%rsp), %r11 +; movq %r11, 0xc0(%rsp) +; movq 0x60(%rsp), %r11 +; movq %r11, 0xc8(%rsp) +; movq 0x68(%rsp), %r11 +; movq %r11, 0xd0(%rsp) +; movq 0x70(%rsp), %r11 +; movq %r11, 0xd8(%rsp) +; movq 0x78(%rsp), %r11 +; movq %r11, 0xe0(%rsp) +; movq 0x80(%rsp), %r11 +; movq %r11, 0xe8(%rsp) +; movq 0x88(%rsp), %r11 +; movq %r11, 0xf0(%rsp) +; leaq (%rax, %rdx), %r8 +; leaq (%rbx, %r15), %r9 +; leaq (%r13, %r12), %r10 +; movq 0x90(%rsp), %rcx +; leaq (%rcx, %r14), %r11 +; movq 0x98(%rsp), %rcx +; movq 0xa0(%rsp), %rdi +; leaq (%rcx, %rdi), %rsi +; movq 0xb0(%rsp), %rdx +; movq 0xa8(%rsp), %rdi +; addq %rdx, %rdi +; movq 0xb8(%rsp), %rax +; movq 0xc0(%rsp), %rcx +; addq %rcx, %rax +; movq 0xd0(%rsp), %rcx +; movq 0xc8(%rsp), %rdx +; addq %rdx, %rcx +; movq 0xe0(%rsp), %rdx +; movq 0xd8(%rsp), %r14 +; addq %r14, %rdx +; movq 0xf0(%rsp), %rbx +; movq 0xe8(%rsp), %r13 +; leaq (%r13, %rbx), %r14 +; addq %r9, %r8 +; leaq (%r10, %r11), %r9 +; leaq (%rsi, %rdi), %r10 +; leaq (%rax, %rcx), %r11 +; leaq (%rdx, %r14), %rsi +; addq %r9, %r8 +; leaq (%r10, %r11), %r9 +; addq %rsi, %r8 +; leaq (%r9, %r8), %rax +; movq 0x100(%rsp), %rbx +; movq 0x108(%rsp), %r12 +; movq 0x110(%rsp), %r13 +; movq 0x118(%rsp), %r14 +; movq 0x120(%rsp), %r15 +; addq $0x130, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 1a03ba7aa5ea..6b16c75d3fcb 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1287,16 +1287,17 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $32, %rsp -; movq %r13, 16(%rsp) +; movq %r12, 16(%rsp) +; movq %r13, 24(%rsp) ; block0: ; movq %rdi, %r13 ; lea 0(%rsp), %rdi ; load_ext_name %g+0, %r9 ; call *%r9 -; movq 0(%rsp), %r8 ; movq %r13, %rdi -; movq %r8, 0(%rdi) -; movq 16(%rsp), %r13 +; movq %r12, 0(%rdi) +; movq 16(%rsp), %r12 +; movq 24(%rsp), %r13 ; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -1307,16 +1308,18 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; subq $0x20, %rsp -; movq %r13, 0x10(%rsp) -; block1: ; offset 0xd +; movq %r12, 0x10(%rsp) +; movq %r13, 0x18(%rsp) +; block1: ; offset 0x12 ; movq %rdi, %r13 ; leaq (%rsp), %rdi ; movabsq $0, %r9 ; reloc_external Abs8 %g 0 ; callq *%r9 -; movq (%rsp), %r8 +; movq (%rsp), %r12 ; movq %r13, %rdi -; movq %r8, (%rdi) -; movq 0x10(%rsp), %r13 +; movq %r12, (%rdi) +; movq 0x10(%rsp), %r12 +; movq 0x18(%rsp), %r13 ; addq $0x20, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif index 828be1ef89d3..44e26d71a9d0 100644 --- a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif @@ -208,24 +208,24 @@ block0: ; movq %r9, rsp(64 + virtual offset) ; movl $40, %r10d ; movq %r10, rsp(56 + virtual offset) -; movl $45, %r11d -; movq %r11, rsp(48 + virtual offset) -; movl $50, %r11d -; movq %r11, rsp(40 + virtual offset) +; movl $45, %r10d +; movq %r10, rsp(48 + virtual offset) +; movl $50, %r13d ; movl $55, %r14d ; movl $60, %r15d ; movl $65, %ebx ; movl $70, %r12d -; movl $75, %r13d +; movl $75, %r11d ; movl $80, %eax ; movl $85, %ecx ; movl $90, %edx ; movl $95, %esi -; movq %rsi, rsp(32 + virtual offset) +; movq %rsi, rsp(40 + virtual offset) ; movl $100, %r8d ; movl $105, %r9d ; movl $110, %r10d -; movl $115, %r11d +; movl $115, %esi +; movq %rsi, rsp(32 + virtual offset) ; movl $120, %esi ; movq %rsi, rsp(24 + virtual offset) ; movl $125, %esi @@ -234,28 +234,30 @@ block0: ; movq %rsi, rsp(8 + virtual offset) ; movl $135, %esi ; movq %rsi, rsp(0 + virtual offset) -; movq %r14, 0(%rdi) -; movq %r15, 8(%rdi) -; movq %rbx, 16(%rdi) -; movq %r12, 24(%rdi) -; movq %r13, 32(%rdi) -; movq %rax, 40(%rdi) -; movq %rcx, 48(%rdi) -; movq %rdx, 56(%rdi) -; movq rsp(32 + virtual offset), %rax -; movq %rax, 64(%rdi) -; movq %r8, 72(%rdi) -; movq %r9, 80(%rdi) -; movq %r10, 88(%rdi) -; movq %r11, 96(%rdi) -; movq rsp(24 + virtual offset), %rsi +; movq %r13, 0(%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 16(%rdi) +; movq %rbx, 24(%rdi) +; movq %r12, 32(%rdi) +; movq %r11, 40(%rdi) +; movq %rax, 48(%rdi) +; movq %rcx, 56(%rdi) +; movq %rdx, 64(%rdi) +; movq rsp(40 + virtual offset), %rax +; movq %rax, 72(%rdi) +; movq %r8, 80(%rdi) +; movq %r9, 88(%rdi) +; movq %r10, 96(%rdi) +; movq rsp(32 + virtual offset), %rsi ; movq %rsi, 104(%rdi) -; movq rsp(16 + virtual offset), %rsi +; movq rsp(24 + virtual offset), %rsi ; movq %rsi, 112(%rdi) -; movq rsp(8 + virtual offset), %rsi +; movq rsp(16 + virtual offset), %rsi ; movq %rsi, 120(%rdi) -; movq rsp(0 + virtual offset), %rsi +; movq rsp(8 + virtual offset), %rsi ; movq %rsi, 128(%rdi) +; movq rsp(0 + virtual offset), %rsi +; movq %rsi, 136(%rdi) ; movq rsp(104 + virtual offset), %rax ; movq rsp(96 + virtual offset), %rcx ; movq rsp(88 + virtual offset), %rdx @@ -264,7 +266,6 @@ block0: ; movq rsp(64 + virtual offset), %r8 ; movq rsp(56 + virtual offset), %r9 ; movq rsp(48 + virtual offset), %r10 -; movq rsp(40 + virtual offset), %r11 ; movq 112(%rsp), %rbx ; movq 120(%rsp), %r12 ; movq 128(%rsp), %r13 @@ -300,24 +301,24 @@ block0: ; movq %r9, 0x40(%rsp) ; movl $0x28, %r10d ; movq %r10, 0x38(%rsp) -; movl $0x2d, %r11d -; movq %r11, 0x30(%rsp) -; movl $0x32, %r11d -; movq %r11, 0x28(%rsp) +; movl $0x2d, %r10d +; movq %r10, 0x30(%rsp) +; movl $0x32, %r13d ; movl $0x37, %r14d ; movl $0x3c, %r15d ; movl $0x41, %ebx ; movl $0x46, %r12d -; movl $0x4b, %r13d +; movl $0x4b, %r11d ; movl $0x50, %eax ; movl $0x55, %ecx ; movl $0x5a, %edx ; movl $0x5f, %esi -; movq %rsi, 0x20(%rsp) +; movq %rsi, 0x28(%rsp) ; movl $0x64, %r8d ; movl $0x69, %r9d ; movl $0x6e, %r10d -; movl $0x73, %r11d +; movl $0x73, %esi +; movq %rsi, 0x20(%rsp) ; movl $0x78, %esi ; movq %rsi, 0x18(%rsp) ; movl $0x7d, %esi @@ -326,28 +327,30 @@ block0: ; movq %rsi, 8(%rsp) ; movl $0x87, %esi ; movq %rsi, (%rsp) -; movq %r14, (%rdi) -; movq %r15, 8(%rdi) -; movq %rbx, 0x10(%rdi) -; movq %r12, 0x18(%rdi) -; movq %r13, 0x20(%rdi) -; movq %rax, 0x28(%rdi) -; movq %rcx, 0x30(%rdi) -; movq %rdx, 0x38(%rdi) -; movq 0x20(%rsp), %rax -; movq %rax, 0x40(%rdi) -; movq %r8, 0x48(%rdi) -; movq %r9, 0x50(%rdi) -; movq %r10, 0x58(%rdi) -; movq %r11, 0x60(%rdi) -; movq 0x18(%rsp), %rsi +; movq %r13, (%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 0x10(%rdi) +; movq %rbx, 0x18(%rdi) +; movq %r12, 0x20(%rdi) +; movq %r11, 0x28(%rdi) +; movq %rax, 0x30(%rdi) +; movq %rcx, 0x38(%rdi) +; movq %rdx, 0x40(%rdi) +; movq 0x28(%rsp), %rax +; movq %rax, 0x48(%rdi) +; movq %r8, 0x50(%rdi) +; movq %r9, 0x58(%rdi) +; movq %r10, 0x60(%rdi) +; movq 0x20(%rsp), %rsi ; movq %rsi, 0x68(%rdi) -; movq 0x10(%rsp), %rsi +; movq 0x18(%rsp), %rsi ; movq %rsi, 0x70(%rdi) -; movq 8(%rsp), %rsi +; movq 0x10(%rsp), %rsi ; movq %rsi, 0x78(%rdi) -; movq (%rsp), %rsi +; movq 8(%rsp), %rsi ; movq %rsi, 0x80(%rdi) +; movq (%rsp), %rsi +; movq %rsi, 0x88(%rdi) ; movq 0x68(%rsp), %rax ; movq 0x60(%rsp), %rcx ; movq 0x58(%rsp), %rdx @@ -356,7 +359,6 @@ block0: ; movq 0x40(%rsp), %r8 ; movq 0x38(%rsp), %r9 ; movq 0x30(%rsp), %r10 -; movq 0x28(%rsp), %r11 ; movq 0x70(%rsp), %rbx ; movq 0x78(%rsp), %r12 ; movq 0x80(%rsp), %r13 @@ -378,28 +380,22 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $144, %rsp +; subq %rsp, $304, %rsp +; movq %rbx, 256(%rsp) +; movq %r12, 264(%rsp) +; movq %r13, 272(%rsp) +; movq %r14, 280(%rsp) +; movq %r15, 288(%rsp) ; block0: ; lea 0(%rsp), %rdi ; call TestCase(%tail_callee_stack_rets) -; movq 0(%rsp), %rax -; movq 8(%rsp), %rdx -; movq 16(%rsp), %r9 -; movq 24(%rsp), %r11 -; movq 32(%rsp), %rdi -; movq 40(%rsp), %rcx -; movq 48(%rsp), %r8 -; movq 56(%rsp), %r10 -; movq 64(%rsp), %rsi -; movq 72(%rsp), %rax -; movq 80(%rsp), %rdx -; movq 88(%rsp), %r9 -; movq 96(%rsp), %r11 -; movq 104(%rsp), %rdi -; movq 112(%rsp), %rcx -; movq 120(%rsp), %r8 -; movq 128(%rsp), %rax -; addq %rsp, $144, %rsp +; movq rsp(96 + virtual offset), %rax +; movq 256(%rsp), %rbx +; movq 264(%rsp), %r12 +; movq 272(%rsp), %r13 +; movq 280(%rsp), %r14 +; movq 288(%rsp), %r15 +; addq %rsp, $304, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -408,28 +404,53 @@ block0: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x90, %rsp -; block1: ; offset 0xb +; subq $0x130, %rsp +; movq %rbx, 0x100(%rsp) +; movq %r12, 0x108(%rsp) +; movq %r13, 0x110(%rsp) +; movq %r14, 0x118(%rsp) +; movq %r15, 0x120(%rsp) +; block1: ; offset 0x33 ; leaq (%rsp), %rdi -; callq 0x14 ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 -; movq (%rsp), %rax -; movq 8(%rsp), %rdx -; movq 0x10(%rsp), %r9 +; callq 0x3c ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 +; movq (%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 8(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x10(%rsp), %r11 +; movq %r11, 0xa0(%rsp) ; movq 0x18(%rsp), %r11 -; movq 0x20(%rsp), %rdi -; movq 0x28(%rsp), %rcx -; movq 0x30(%rsp), %r8 -; movq 0x38(%rsp), %r10 -; movq 0x40(%rsp), %rsi -; movq 0x48(%rsp), %rax -; movq 0x50(%rsp), %rdx -; movq 0x58(%rsp), %r9 -; movq 0x60(%rsp), %r11 -; movq 0x68(%rsp), %rdi -; movq 0x70(%rsp), %rcx -; movq 0x78(%rsp), %r8 -; movq 0x80(%rsp), %rax -; addq $0x90, %rsp +; movq %r11, 0xa8(%rsp) +; movq 0x20(%rsp), %r11 +; movq %r11, 0xb0(%rsp) +; movq 0x28(%rsp), %r11 +; movq %r11, 0xb8(%rsp) +; movq 0x30(%rsp), %r11 +; movq %r11, 0xc0(%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0xc8(%rsp) +; movq 0x40(%rsp), %r11 +; movq %r11, 0xd0(%rsp) +; movq 0x48(%rsp), %r11 +; movq %r11, 0xd8(%rsp) +; movq 0x50(%rsp), %r11 +; movq %r11, 0xe0(%rsp) +; movq 0x58(%rsp), %r11 +; movq %r11, 0xe8(%rsp) +; movq 0x60(%rsp), %rbx +; movq 0x68(%rsp), %r12 +; movq 0x70(%rsp), %r13 +; movq 0x78(%rsp), %r14 +; movq 0x80(%rsp), %r15 +; movq 0x88(%rsp), %r11 +; movq %r11, 0xf0(%rsp) +; movq 0xf0(%rsp), %rax +; movq 0x100(%rsp), %rbx +; movq 0x108(%rsp), %r12 +; movq 0x110(%rsp), %r13 +; movq 0x118(%rsp), %r14 +; movq 0x120(%rsp), %r15 +; addq $0x130, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq @@ -461,52 +482,56 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq %r9, rsp(40 + virtual offset) ; movq rbp(stack args max - 168), %r10 ; movq %r10, rsp(48 + virtual offset) -; movq rbp(stack args max - 160), %r11 -; movq %r11, rsp(56 + virtual offset) -; movq rbp(stack args max - 152), %r11 -; movq %r11, rsp(64 + virtual offset) +; movq rbp(stack args max - 160), %r10 +; movq %r10, rsp(56 + virtual offset) +; movq rbp(stack args max - 152), %rcx +; movq %rcx, rsp(64 + virtual offset) ; movq rbp(stack args max - 144), %r8 +; movq %r8, rsp(72 + virtual offset) ; movq rbp(stack args max - 136), %r10 -; movq %r10, rsp(72 + virtual offset) +; movq %r10, rsp(80 + virtual offset) ; movq rbp(stack args max - 128), %rsi -; movq %rsi, rsp(80 + virtual offset) +; movq %rsi, rsp(88 + virtual offset) ; movq rbp(stack args max - 120), %rax -; movq %rax, rsp(88 + virtual offset) -; movq rbp(stack args max - 112), %r11 -; movq rbp(stack args max - 104), %r10 -; movq rbp(stack args max - 96), %r9 -; movq rbp(stack args max - 88), %rax ; movq %rax, rsp(96 + virtual offset) -; movq rbp(stack args max - 80), %rdx -; movq rbp(stack args max - 72), %rcx -; movq rbp(stack args max - 64), %rsi +; movq rbp(stack args max - 112), %r10 +; movq rbp(stack args max - 104), %r9 +; movq rbp(stack args max - 96), %r8 +; movq rbp(stack args max - 88), %rdx +; movq rbp(stack args max - 80), %rcx +; movq %rcx, rsp(104 + virtual offset) +; movq rbp(stack args max - 72), %rsi +; movq rbp(stack args max - 64), %r15 ; movq rbp(stack args max - 56), %r12 ; movq rbp(stack args max - 48), %r14 ; movq rbp(stack args max - 40), %rbx ; movq rbp(stack args max - 32), %r13 -; movq rbp(stack args max - 24), %r15 +; movq rbp(stack args max - 24), %r11 ; movq rbp(stack args max - 16), %rax -; movq %r8, 0(%rdi) -; movq rsp(72 + virtual offset), %r8 -; movq %r8, 8(%rdi) -; movq rsp(80 + virtual offset), %r8 -; movq %r8, 16(%rdi) -; movq rsp(88 + virtual offset), %r8 -; movq %r8, 24(%rdi) -; movq %r11, 32(%rdi) +; movq rsp(64 + virtual offset), %rcx +; movq %rcx, 0(%rdi) +; movq rsp(72 + virtual offset), %rcx +; movq %rcx, 8(%rdi) +; movq rsp(80 + virtual offset), %rcx +; movq %rcx, 16(%rdi) +; movq rsp(88 + virtual offset), %rcx +; movq %rcx, 24(%rdi) +; movq rsp(96 + virtual offset), %rcx +; movq %rcx, 32(%rdi) ; movq %r10, 40(%rdi) ; movq %r9, 48(%rdi) -; movq rsp(96 + virtual offset), %r8 ; movq %r8, 56(%rdi) ; movq %rdx, 64(%rdi) -; movq %rcx, 72(%rdi) +; movq rsp(104 + virtual offset), %rdx +; movq %rdx, 72(%rdi) ; movq %rsi, 80(%rdi) -; movq %r12, 88(%rdi) -; movq %r14, 96(%rdi) -; movq %rbx, 104(%rdi) -; movq %r13, 112(%rdi) -; movq %r15, 120(%rdi) -; movq %rax, 128(%rdi) +; movq %r15, 88(%rdi) +; movq %r12, 96(%rdi) +; movq %r14, 104(%rdi) +; movq %rbx, 112(%rdi) +; movq %r13, 120(%rdi) +; movq %r11, 128(%rdi) +; movq %rax, 136(%rdi) ; movq rsp(0 + virtual offset), %rax ; movq rsp(8 + virtual offset), %rcx ; movq rsp(16 + virtual offset), %rdx @@ -515,7 +540,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq rsp(40 + virtual offset), %r8 ; movq rsp(48 + virtual offset), %r9 ; movq rsp(56 + virtual offset), %r10 -; movq rsp(64 + virtual offset), %r11 ; movq 112(%rsp), %rbx ; movq 120(%rsp), %r12 ; movq 128(%rsp), %r13 @@ -546,52 +570,56 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq %r9, 0x28(%rsp) ; movq 0x18(%rbp), %r10 ; movq %r10, 0x30(%rsp) -; movq 0x20(%rbp), %r11 -; movq %r11, 0x38(%rsp) -; movq 0x28(%rbp), %r11 -; movq %r11, 0x40(%rsp) +; movq 0x20(%rbp), %r10 +; movq %r10, 0x38(%rsp) +; movq 0x28(%rbp), %rcx +; movq %rcx, 0x40(%rsp) ; movq 0x30(%rbp), %r8 +; movq %r8, 0x48(%rsp) ; movq 0x38(%rbp), %r10 -; movq %r10, 0x48(%rsp) +; movq %r10, 0x50(%rsp) ; movq 0x40(%rbp), %rsi -; movq %rsi, 0x50(%rsp) +; movq %rsi, 0x58(%rsp) ; movq 0x48(%rbp), %rax -; movq %rax, 0x58(%rsp) -; movq 0x50(%rbp), %r11 -; movq 0x58(%rbp), %r10 -; movq 0x60(%rbp), %r9 -; movq 0x68(%rbp), %rax ; movq %rax, 0x60(%rsp) -; movq 0x70(%rbp), %rdx -; movq 0x78(%rbp), %rcx -; movq 0x80(%rbp), %rsi +; movq 0x50(%rbp), %r10 +; movq 0x58(%rbp), %r9 +; movq 0x60(%rbp), %r8 +; movq 0x68(%rbp), %rdx +; movq 0x70(%rbp), %rcx +; movq %rcx, 0x68(%rsp) +; movq 0x78(%rbp), %rsi +; movq 0x80(%rbp), %r15 ; movq 0x88(%rbp), %r12 ; movq 0x90(%rbp), %r14 ; movq 0x98(%rbp), %rbx ; movq 0xa0(%rbp), %r13 -; movq 0xa8(%rbp), %r15 +; movq 0xa8(%rbp), %r11 ; movq 0xb0(%rbp), %rax -; movq %r8, (%rdi) -; movq 0x48(%rsp), %r8 -; movq %r8, 8(%rdi) -; movq 0x50(%rsp), %r8 -; movq %r8, 0x10(%rdi) -; movq 0x58(%rsp), %r8 -; movq %r8, 0x18(%rdi) -; movq %r11, 0x20(%rdi) +; movq 0x40(%rsp), %rcx +; movq %rcx, (%rdi) +; movq 0x48(%rsp), %rcx +; movq %rcx, 8(%rdi) +; movq 0x50(%rsp), %rcx +; movq %rcx, 0x10(%rdi) +; movq 0x58(%rsp), %rcx +; movq %rcx, 0x18(%rdi) +; movq 0x60(%rsp), %rcx +; movq %rcx, 0x20(%rdi) ; movq %r10, 0x28(%rdi) ; movq %r9, 0x30(%rdi) -; movq 0x60(%rsp), %r8 ; movq %r8, 0x38(%rdi) ; movq %rdx, 0x40(%rdi) -; movq %rcx, 0x48(%rdi) +; movq 0x68(%rsp), %rdx +; movq %rdx, 0x48(%rdi) ; movq %rsi, 0x50(%rdi) -; movq %r12, 0x58(%rdi) -; movq %r14, 0x60(%rdi) -; movq %rbx, 0x68(%rdi) -; movq %r13, 0x70(%rdi) -; movq %r15, 0x78(%rdi) -; movq %rax, 0x80(%rdi) +; movq %r15, 0x58(%rdi) +; movq %r12, 0x60(%rdi) +; movq %r14, 0x68(%rdi) +; movq %rbx, 0x70(%rdi) +; movq %r13, 0x78(%rdi) +; movq %r11, 0x80(%rdi) +; movq %rax, 0x88(%rdi) ; movq (%rsp), %rax ; movq 8(%rsp), %rcx ; movq 0x10(%rsp), %rdx @@ -600,7 +628,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq 0x28(%rsp), %r8 ; movq 0x30(%rsp), %r9 ; movq 0x38(%rsp), %r10 -; movq 0x40(%rsp), %r11 ; movq 0x70(%rsp), %rbx ; movq 0x78(%rsp), %r12 ; movq 0x80(%rsp), %r13 @@ -656,17 +683,17 @@ block0: ; movq %r15, 464(%rsp) ; block0: ; movl $10, %edx -; movq %rdx, rsp(96 + virtual offset) +; movq %rdx, rsp(88 + virtual offset) ; movl $15, %ecx -; movq %rcx, rsp(88 + virtual offset) +; movq %rcx, rsp(80 + virtual offset) ; movl $20, %r8d -; movq %r8, rsp(80 + virtual offset) +; movq %r8, rsp(72 + virtual offset) ; movl $25, %r9d -; movq %r9, rsp(72 + virtual offset) -; movl $30, %r9d ; movq %r9, rsp(64 + virtual offset) +; movl $30, %r9d +; movq %r9, rsp(56 + virtual offset) ; movl $35, %esi -; movq %rsi, rsp(56 + virtual offset) +; movq %rsi, rsp(48 + virtual offset) ; movl $40, %edi ; movl $45, %eax ; movl $50, %r10d @@ -681,20 +708,20 @@ block0: ; movl $95, %r8d ; movl $100, %r9d ; movl $105, %r11d -; movq %r11, rsp(48 + virtual offset) -; movl $110, %r11d ; movq %r11, rsp(40 + virtual offset) -; movl $115, %r11d +; movl $110, %r11d ; movq %r11, rsp(32 + virtual offset) -; movl $120, %r11d +; movl $115, %r11d ; movq %r11, rsp(24 + virtual offset) -; movl $125, %r11d +; movl $120, %r11d ; movq %r11, rsp(16 + virtual offset) -; movl $130, %r11d +; movl $125, %r11d ; movq %r11, rsp(8 + virtual offset) -; movl $135, %r11d +; movl $130, %r11d ; movq %r11, rsp(0 + virtual offset) -; movq rsp(56 + virtual offset), %r11 +; movl $135, %r11d +; movq %r11, rsp(96 + virtual offset) +; movq rsp(48 + virtual offset), %r11 ; movq %r11, 0(%rsp) ; movq %rdi, 8(%rsp) ; movq %rax, 16(%rsp) @@ -709,45 +736,29 @@ block0: ; movq %rcx, 88(%rsp) ; movq %r8, 96(%rsp) ; movq %r9, 104(%rsp) -; movq rsp(48 + virtual offset), %r11 -; movq %r11, 112(%rsp) ; movq rsp(40 + virtual offset), %r11 -; movq %r11, 120(%rsp) +; movq %r11, 112(%rsp) ; movq rsp(32 + virtual offset), %r11 -; movq %r11, 128(%rsp) +; movq %r11, 120(%rsp) ; movq rsp(24 + virtual offset), %r11 -; movq %r11, 136(%rsp) +; movq %r11, 128(%rsp) ; movq rsp(16 + virtual offset), %r11 -; movq %r11, 144(%rsp) +; movq %r11, 136(%rsp) ; movq rsp(8 + virtual offset), %r11 -; movq %r11, 152(%rsp) +; movq %r11, 144(%rsp) ; movq rsp(0 + virtual offset), %r11 +; movq %r11, 152(%rsp) +; movq rsp(96 + virtual offset), %r11 ; movq %r11, 160(%rsp) ; lea 176(%rsp), %rdi ; load_ext_name %tail_callee_stack_args_and_rets+0, %r10 -; movq rsp(80 + virtual offset), %rcx -; movq rsp(88 + virtual offset), %rdx -; movq rsp(96 + virtual offset), %rsi -; movq rsp(72 + virtual offset), %r8 -; movq rsp(64 + virtual offset), %r9 +; movq rsp(72 + virtual offset), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(88 + virtual offset), %rsi +; movq rsp(64 + virtual offset), %r8 +; movq rsp(56 + virtual offset), %r9 ; call *%r10 -; movq 176(%rsp), %r8 -; movq 184(%rsp), %r10 -; movq 192(%rsp), %rsi -; movq 200(%rsp), %rax -; movq 208(%rsp), %rdx -; movq 216(%rsp), %r9 -; movq 224(%rsp), %r11 -; movq 232(%rsp), %rdi -; movq 240(%rsp), %rcx -; movq 248(%rsp), %r8 -; movq 256(%rsp), %r10 -; movq 264(%rsp), %rsi -; movq 272(%rsp), %rax -; movq 280(%rsp), %rdx -; movq 288(%rsp), %r9 -; movq 296(%rsp), %r11 -; movq 304(%rsp), %rax +; movq rsp(96 + virtual offset), %rax ; movq 432(%rsp), %rbx ; movq 440(%rsp), %r12 ; movq 448(%rsp), %r13 @@ -770,17 +781,17 @@ block0: ; movq %r15, 0x1d0(%rsp) ; block1: ; offset 0x33 ; movl $0xa, %edx -; movq %rdx, 0x1a0(%rsp) +; movq %rdx, 0x198(%rsp) ; movl $0xf, %ecx -; movq %rcx, 0x198(%rsp) +; movq %rcx, 0x190(%rsp) ; movl $0x14, %r8d -; movq %r8, 0x190(%rsp) +; movq %r8, 0x188(%rsp) ; movl $0x19, %r9d -; movq %r9, 0x188(%rsp) -; movl $0x1e, %r9d ; movq %r9, 0x180(%rsp) +; movl $0x1e, %r9d +; movq %r9, 0x178(%rsp) ; movl $0x23, %esi -; movq %rsi, 0x178(%rsp) +; movq %rsi, 0x170(%rsp) ; movl $0x28, %edi ; movl $0x2d, %eax ; movl $0x32, %r10d @@ -795,20 +806,20 @@ block0: ; movl $0x5f, %r8d ; movl $0x64, %r9d ; movl $0x69, %r11d -; movq %r11, 0x170(%rsp) -; movl $0x6e, %r11d ; movq %r11, 0x168(%rsp) -; movl $0x73, %r11d +; movl $0x6e, %r11d ; movq %r11, 0x160(%rsp) -; movl $0x78, %r11d +; movl $0x73, %r11d ; movq %r11, 0x158(%rsp) -; movl $0x7d, %r11d +; movl $0x78, %r11d ; movq %r11, 0x150(%rsp) -; movl $0x82, %r11d +; movl $0x7d, %r11d ; movq %r11, 0x148(%rsp) -; movl $0x87, %r11d +; movl $0x82, %r11d ; movq %r11, 0x140(%rsp) -; movq 0x178(%rsp), %r11 +; movl $0x87, %r11d +; movq %r11, 0x1a0(%rsp) +; movq 0x170(%rsp), %r11 ; movq %r11, (%rsp) ; movq %rdi, 8(%rsp) ; movq %rax, 0x10(%rsp) @@ -823,46 +834,61 @@ block0: ; movq %rcx, 0x58(%rsp) ; movq %r8, 0x60(%rsp) ; movq %r9, 0x68(%rsp) -; movq 0x170(%rsp), %r11 -; movq %r11, 0x70(%rsp) ; movq 0x168(%rsp), %r11 -; movq %r11, 0x78(%rsp) +; movq %r11, 0x70(%rsp) ; movq 0x160(%rsp), %r11 -; movq %r11, 0x80(%rsp) +; movq %r11, 0x78(%rsp) ; movq 0x158(%rsp), %r11 -; movq %r11, 0x88(%rsp) +; movq %r11, 0x80(%rsp) ; movq 0x150(%rsp), %r11 -; movq %r11, 0x90(%rsp) +; movq %r11, 0x88(%rsp) ; movq 0x148(%rsp), %r11 -; movq %r11, 0x98(%rsp) +; movq %r11, 0x90(%rsp) ; movq 0x140(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x1a0(%rsp), %r11 ; movq %r11, 0xa0(%rsp) ; leaq 0xb0(%rsp), %rdi ; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 -; movq 0x190(%rsp), %rcx -; movq 0x198(%rsp), %rdx -; movq 0x1a0(%rsp), %rsi -; movq 0x188(%rsp), %r8 -; movq 0x180(%rsp), %r9 +; movq 0x188(%rsp), %rcx +; movq 0x190(%rsp), %rdx +; movq 0x198(%rsp), %rsi +; movq 0x180(%rsp), %r8 +; movq 0x178(%rsp), %r9 ; callq *%r10 ; subq $0xb0, %rsp -; movq 0xb0(%rsp), %r8 -; movq 0xb8(%rsp), %r10 -; movq 0xc0(%rsp), %rsi -; movq 0xc8(%rsp), %rax -; movq 0xd0(%rsp), %rdx -; movq 0xd8(%rsp), %r9 +; movq 0xb0(%rsp), %r11 +; movq %r11, 0x140(%rsp) +; movq 0xb8(%rsp), %r11 +; movq %r11, 0x148(%rsp) +; movq 0xc0(%rsp), %r11 +; movq %r11, 0x150(%rsp) +; movq 0xc8(%rsp), %r11 +; movq %r11, 0x158(%rsp) +; movq 0xd0(%rsp), %r11 +; movq %r11, 0x160(%rsp) +; movq 0xd8(%rsp), %r11 +; movq %r11, 0x168(%rsp) ; movq 0xe0(%rsp), %r11 -; movq 0xe8(%rsp), %rdi -; movq 0xf0(%rsp), %rcx -; movq 0xf8(%rsp), %r8 -; movq 0x100(%rsp), %r10 -; movq 0x108(%rsp), %rsi -; movq 0x110(%rsp), %rax -; movq 0x118(%rsp), %rdx -; movq 0x120(%rsp), %r9 -; movq 0x128(%rsp), %r11 -; movq 0x130(%rsp), %rax +; movq %r11, 0x170(%rsp) +; movq 0xe8(%rsp), %r11 +; movq %r11, 0x178(%rsp) +; movq 0xf0(%rsp), %r11 +; movq %r11, 0x180(%rsp) +; movq 0xf8(%rsp), %r11 +; movq %r11, 0x188(%rsp) +; movq 0x100(%rsp), %r11 +; movq %r11, 0x190(%rsp) +; movq 0x108(%rsp), %r11 +; movq %r11, 0x198(%rsp) +; movq 0x110(%rsp), %rbx +; movq 0x118(%rsp), %r12 +; movq 0x120(%rsp), %r13 +; movq 0x128(%rsp), %r14 +; movq 0x130(%rsp), %r15 +; movq 0x138(%rsp), %r11 +; movq %r11, 0x1a0(%rsp) +; movq 0x1a0(%rsp), %rax ; movq 0x1b0(%rsp), %rbx ; movq 0x1b8(%rsp), %r12 ; movq 0x1c0(%rsp), %r13 diff --git a/cranelift/filetests/filetests/isa/x64/winch.clif b/cranelift/filetests/filetests/isa/x64/winch.clif index 2bda2fd16b7d..4b227d0ac7ab 100644 --- a/cranelift/filetests/filetests/isa/x64/winch.clif +++ b/cranelift/filetests/filetests/isa/x64/winch.clif @@ -291,25 +291,25 @@ block0(v0:i64): ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $64, %rsp -; movq %rbx, 16(%rsp) -; movq %r12, 24(%rsp) -; movq %r13, 32(%rsp) -; movq %r14, 40(%rsp) -; movq %r15, 48(%rsp) +; subq %rsp, $80, %rsp +; movq %rbx, 32(%rsp) +; movq %r12, 40(%rsp) +; movq %r13, 48(%rsp) +; movq %r14, 56(%rsp) +; movq %r15, 64(%rsp) ; block0: ; lea 0(%rsp), %rdi ; load_ext_name %g+0, %r10 ; call *%r10 -; movq 4(%rsp), %rax -; movq 0(%rsp), %r9 -; andl %r9d, %eax -; movq 16(%rsp), %rbx -; movq 24(%rsp), %r12 -; movq 32(%rsp), %r13 -; movq 40(%rsp), %r14 -; movq 48(%rsp), %r15 -; addq %rsp, $64, %rsp +; movq rsp(0 + virtual offset), %rax +; movq rsp(8 + virtual offset), %rdx +; andl %edx, %eax +; movq 32(%rsp), %rbx +; movq 40(%rsp), %r12 +; movq 48(%rsp), %r13 +; movq 56(%rsp), %r14 +; movq 64(%rsp), %r15 +; addq %rsp, $80, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -318,25 +318,29 @@ block0(v0:i64): ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x40, %rsp -; movq %rbx, 0x10(%rsp) -; movq %r12, 0x18(%rsp) -; movq %r13, 0x20(%rsp) -; movq %r14, 0x28(%rsp) -; movq %r15, 0x30(%rsp) +; subq $0x50, %rsp +; movq %rbx, 0x20(%rsp) +; movq %r12, 0x28(%rsp) +; movq %r13, 0x30(%rsp) +; movq %r14, 0x38(%rsp) +; movq %r15, 0x40(%rsp) ; block1: ; offset 0x21 ; leaq (%rsp), %rdi ; movabsq $0, %r10 ; reloc_external Abs8 %g 0 ; callq *%r10 -; movq 4(%rsp), %rax -; movq (%rsp), %r9 -; andl %r9d, %eax -; movq 0x10(%rsp), %rbx -; movq 0x18(%rsp), %r12 -; movq 0x20(%rsp), %r13 -; movq 0x28(%rsp), %r14 -; movq 0x30(%rsp), %r15 -; addq $0x40, %rsp +; movq 4(%rsp), %r11 +; movq %r11, 0x10(%rsp) +; movq (%rsp), %r11 +; movq %r11, 0x18(%rsp) +; movq 0x10(%rsp), %rax +; movq 0x18(%rsp), %rdx +; andl %edx, %eax +; movq 0x20(%rsp), %rbx +; movq 0x28(%rsp), %r12 +; movq 0x30(%rsp), %r13 +; movq 0x38(%rsp), %r14 +; movq 0x40(%rsp), %r15 +; addq $0x50, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/tests/disas/winch/x64/load/grow_load.wat b/tests/disas/winch/x64/load/grow_load.wat index 4b0bada9f6cf..dde1b16b81d3 100644 --- a/tests/disas/winch/x64/load/grow_load.wat +++ b/tests/disas/winch/x64/load/grow_load.wat @@ -65,7 +65,7 @@ ;; movq %r14, %rdi ;; movl 0xc(%rsp), %esi ;; movl $0, %edx -;; callq 0x2d6 +;; callq 0x2ed ;; addq $0xc, %rsp ;; addq $4, %rsp ;; movq 0x58(%rsp), %r14 From 37f245d8cc8a16c1bc51fb93076d2aa2fa27e91e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Apr 2025 15:28:28 -0700 Subject: [PATCH 2/5] Fix is_included_in_clobbers on aarch64: new defs must skip optimization. --- cranelift/codegen/src/isa/aarch64/abi.rs | 1 + cranelift/codegen/src/isa/aarch64/inst/mod.rs | 2 + cranelift/codegen/src/isa/riscv64/abi.rs | 1 + cranelift/codegen/src/isa/s390x/inst/mod.rs | 4 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 1 + cranelift/codegen/src/isa/x64/abi.rs | 1 + cranelift/codegen/src/machinst/abi.rs | 14 ++++ .../filetests/isa/aarch64/tail-call-conv.clif | 64 +++++++++++++++++-- .../filetests/isa/pulley32/call.clif | 14 ++-- .../filetests/isa/pulley32/extend.clif | 16 ++--- .../filetests/isa/pulley64/call.clif | 18 +++--- .../isa/pulley64/call_indirect_host.clif | 2 +- .../filetests/isa/pulley64/extend.clif | 16 ++--- 13 files changed, 114 insertions(+), 40 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 8cd36c76c68d..becc7d90dd50 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1094,6 +1094,7 @@ impl ABIMachineSpec for AArch64MachineDeps { caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, + has_non_abi_defs: false, }), }); insts diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 2dae98b929f3..25a16fbbc716 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -979,6 +979,8 @@ impl MachInst for Inst { fn is_included_in_clobbers(&self) -> bool { let (caller, callee) = match self { Inst::Args { .. } => return false, + Inst::Call { info } if info.has_non_abi_defs => return true, + Inst::CallInd { info } if info.has_non_abi_defs => return true, Inst::Call { info } => (info.caller_conv, info.callee_conv), Inst::CallInd { info } => (info.caller_conv, info.callee_conv), _ => return true, diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index bacb4e7b2a38..5056e75b66fc 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -614,6 +614,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, + has_non_abi_defs: false, }), }); insts diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 09245bf056a9..64a0d8af4b40 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1069,7 +1069,9 @@ impl MachInst for Inst { // registers. match self { &Inst::Args { .. } => false, - &Inst::Call { ref info, .. } => info.caller_conv != info.callee_conv, + &Inst::Call { ref info, .. } => { + info.caller_conv != info.callee_conv || info.has_non_abi_defs + } &Inst::CallInd { ref info, .. } => info.caller_conv != info.callee_conv, &Inst::ElfTlsGetOffset { .. } => false, _ => true, diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index c3e241f859fa..4589afcc4d89 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -999,6 +999,7 @@ impl IsleContext<'_, '_, MInst, S390xBackend> { callee_pop_size, caller_conv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), callee_conv: self.lower_ctx.sigs()[abi].call_conv(), + has_non_abi_defs: false, } } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index dda711ce535d..632b86ca1fb2 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -877,6 +877,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { defs: smallvec![], clobbers: Self::get_regs_clobbered_by_call(call_conv), callee_pop_size, + has_non_abi_defs: false, callee_conv: call_conv, caller_conv: call_conv, }))); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index dfa9d515212d..d5a263070820 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -614,6 +614,10 @@ pub struct CallInfo { /// caller, if any. (Used for popping stack arguments with the `tail` /// calling convention.) pub callee_pop_size: u32, + /// Do the defs have any definitions outside of the ABI-implied + /// clobbers? If so, this instruction needs to be considered when + /// computing the function body's clobbered registers. + pub has_non_abi_defs: bool, } impl CallInfo { @@ -628,6 +632,7 @@ impl CallInfo { caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, + has_non_abi_defs: false, } } @@ -641,6 +646,7 @@ impl CallInfo { caller_conv: self.caller_conv, callee_conv: self.callee_conv, callee_pop_size: self.callee_pop_size, + has_non_abi_defs: self.has_non_abi_defs, } } } @@ -2024,6 +2030,9 @@ pub struct CallSite { caller_conv: isa::CallConv, /// The settings controlling this compilation. flags: settings::Flags, + /// Has any defs that are not constrained to ABI-specified + /// registers. + has_non_abi_defs: bool, _mach: PhantomData, } @@ -2057,6 +2066,7 @@ impl CallSite { is_tail_call, caller_conv, flags, + has_non_abi_defs: false, _mach: PhantomData, } } @@ -2080,6 +2090,7 @@ impl CallSite { is_tail_call: IsTailCall::No, caller_conv, flags, + has_non_abi_defs: false, _mach: PhantomData, } } @@ -2103,6 +2114,7 @@ impl CallSite { is_tail_call, caller_conv, flags, + has_non_abi_defs: false, _mach: PhantomData, } } @@ -2355,6 +2367,7 @@ impl CallSite { vreg: into_reg, location: RetLocation::Stack(amode, ty), }); + self.has_non_abi_defs = true; into_regs.push(into_reg.to_reg()); } } @@ -2455,6 +2468,7 @@ impl CallSite { callee_conv: call_conv, caller_conv: self.caller_conv, callee_pop_size, + has_non_abi_defs: self.has_non_abi_defs, }, ) .into_iter() diff --git a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif index b650b9285764..ece17e8afdbc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif @@ -380,6 +380,15 @@ block0: ; VCode: ; stp fp, lr, [sp, #-16]! ; mov fp, sp +; stp x27, x28, [sp, #-16]! +; stp x25, x26, [sp, #-16]! +; stp x23, x24, [sp, #-16]! +; stp x21, x22, [sp, #-16]! +; stp x19, x20, [sp, #-16]! +; stp d14, d15, [sp, #-16]! +; stp d12, d13, [sp, #-16]! +; stp d10, d11, [sp, #-16]! +; stp d8, d9, [sp, #-16]! ; sub sp, sp, #240 ; block0: ; mov x8, sp @@ -387,6 +396,15 @@ block0: ; blr x12 ; ldr x2, [sp, #232] ; add sp, sp, #240 +; ldp d8, d9, [sp], #16 +; ldp d10, d11, [sp], #16 +; ldp d12, d13, [sp], #16 +; ldp d14, d15, [sp], #16 +; ldp x19, x20, [sp], #16 +; ldp x21, x22, [sp], #16 +; ldp x23, x24, [sp], #16 +; ldp x25, x26, [sp], #16 +; ldp x27, x28, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret ; @@ -394,11 +412,20 @@ block0: ; block0: ; offset 0x0 ; stp x29, x30, [sp, #-0x10]! ; mov x29, sp +; stp x27, x28, [sp, #-0x10]! +; stp x25, x26, [sp, #-0x10]! +; stp x23, x24, [sp, #-0x10]! +; stp x21, x22, [sp, #-0x10]! +; stp x19, x20, [sp, #-0x10]! +; stp d14, d15, [sp, #-0x10]! +; stp d12, d13, [sp, #-0x10]! +; stp d10, d11, [sp, #-0x10]! +; stp d8, d9, [sp, #-0x10]! ; sub sp, sp, #0xf0 -; block1: ; offset 0xc +; block1: ; offset 0x30 ; mov x8, sp -; ldr x12, #0x18 -; b #0x20 +; ldr x12, #0x3c +; b #0x44 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x12 @@ -434,6 +461,15 @@ block0: ; stur x9, [sp, #0xe8] ; ldur x2, [sp, #0xe8] ; add sp, sp, #0xf0 +; ldp d8, d9, [sp], #0x10 +; ldp d10, d11, [sp], #0x10 +; ldp d12, d13, [sp], #0x10 +; ldp d14, d15, [sp], #0x10 +; ldp x19, x20, [sp], #0x10 +; ldp x21, x22, [sp], #0x10 +; ldp x23, x24, [sp], #0x10 +; ldp x25, x26, [sp], #0x10 +; ldp x27, x28, [sp], #0x10 ; ldp x29, x30, [sp], #0x10 ; ret @@ -612,6 +648,10 @@ block0: ; stp x23, x24, [sp, #-16]! ; stp x21, x22, [sp, #-16]! ; stp x19, x20, [sp, #-16]! +; stp d14, d15, [sp, #-16]! +; stp d12, d13, [sp, #-16]! +; stp d10, d11, [sp, #-16]! +; stp d8, d9, [sp, #-16]! ; sub sp, sp, #400 ; block0: ; movz x2, #10 @@ -665,6 +705,10 @@ block0: ; blr x10 ; ldr x2, [sp, #392] ; add sp, sp, #400 +; ldp d8, d9, [sp], #16 +; ldp d10, d11, [sp], #16 +; ldp d12, d13, [sp], #16 +; ldp d14, d15, [sp], #16 ; ldp x19, x20, [sp], #16 ; ldp x21, x22, [sp], #16 ; ldp x23, x24, [sp], #16 @@ -682,8 +726,12 @@ block0: ; stp x23, x24, [sp, #-0x10]! ; stp x21, x22, [sp, #-0x10]! ; stp x19, x20, [sp, #-0x10]! +; stp d14, d15, [sp, #-0x10]! +; stp d12, d13, [sp, #-0x10]! +; stp d10, d11, [sp, #-0x10]! +; stp d8, d9, [sp, #-0x10]! ; sub sp, sp, #0x190 -; block1: ; offset 0x20 +; block1: ; offset 0x30 ; mov x2, #0xa ; mov x3, #0xf ; mov x4, #0x14 @@ -731,8 +779,8 @@ block0: ; stur x20, [sp, #0x90] ; stur x22, [sp, #0x98] ; add x8, sp, #0xa0 -; ldr x10, #0xe4 -; b #0xec +; ldr x10, #0xf4 +; b #0xfc ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x10 @@ -769,6 +817,10 @@ block0: ; str x9, [sp, #0x188] ; ldr x2, [sp, #0x188] ; add sp, sp, #0x190 +; ldp d8, d9, [sp], #0x10 +; ldp d10, d11, [sp], #0x10 +; ldp d12, d13, [sp], #0x10 +; ldp d14, d15, [sp], #0x10 ; ldp x19, x20, [sp], #0x10 ; ldp x21, x22, [sp], #0x10 ; ldp x23, x24, [sp], #0x10 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 943300b2f3df..83eedf17b38e 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xone x0 ; pop_frame ; ret @@ -75,7 +75,7 @@ block0: ; xone x4 ; xconst8 x5, 2 ; xconst8 x6, 3 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -149,7 +149,7 @@ block0: ; xmov x11, x14 ; xmov x12, x14 ; xmov x13, x14 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame_restore 64, {} ; ret ; @@ -217,7 +217,7 @@ block0: ; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: true } ; xadd64 x26, x0, x1 ; xadd64 x28, x2, x3 ; xadd64 x2, x4, x5 @@ -292,7 +292,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley32/extend.clif b/cranelift/filetests/filetests/isa/pulley32/extend.clif index d82485e18853..b2f32b9be89c 100644 --- a/cranelift/filetests/filetests/isa/pulley32/extend.clif +++ b/cranelift/filetests/filetests/isa/pulley32/extend.clif @@ -12,7 +12,7 @@ block0(v0: i8): ; push_frame ; block0: ; zext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -34,7 +34,7 @@ block0(v0: i16): ; push_frame ; block0: ; zext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -55,7 +55,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -75,7 +75,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -96,7 +96,7 @@ block0(v0: i8): ; push_frame ; block0: ; sext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -118,7 +118,7 @@ block0(v0: i16): ; push_frame ; block0: ; sext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -139,7 +139,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -159,7 +159,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index fb2c0e4c4cc7..0c92244e1b39 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xone x0 ; pop_frame ; ret @@ -75,7 +75,7 @@ block0: ; xone x4 ; xconst8 x5, 2 ; xconst8 x6, 3 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -149,7 +149,7 @@ block0: ; xmov x11, x14 ; xmov x12, x14 ; xmov x13, x14 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame_restore 64, {} ; ret ; @@ -217,7 +217,7 @@ block0: ; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: true } ; xadd64 x26, x0, x1 ; xadd64 x28, x2, x3 ; xadd64 x2, x4, x5 @@ -292,7 +292,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -342,7 +342,7 @@ block0: ; xmov x11, x14 ; xmov x12, x14 ; xmov x13, x14 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame_restore 80, {} ; ret ; @@ -388,7 +388,7 @@ block0(v0: i32): ; xstore64 sp+1000008, x20 // flags = notrap aligned ; block0: ; xmov x20, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; xmov x5, x20 ; xadd32 x0, x5, x0 ; x20 = xload64 sp+1000008 // flags = notrap aligned diff --git a/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif b/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif index b81ab16c44c7..baa4454ee6c6 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif @@ -11,7 +11,7 @@ block0: ; VCode: ; push_frame ; block0: -; indirect_call_host CallInfo { dest: User(userextname0), uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: SystemV, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call_host CallInfo { dest: User(userextname0), uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: SystemV, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/extend.clif b/cranelift/filetests/filetests/isa/pulley64/extend.clif index 0efbfb6a9a5a..0c6632e0d5c8 100644 --- a/cranelift/filetests/filetests/isa/pulley64/extend.clif +++ b/cranelift/filetests/filetests/isa/pulley64/extend.clif @@ -12,7 +12,7 @@ block0(v0: i8): ; push_frame ; block0: ; zext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -34,7 +34,7 @@ block0(v0: i16): ; push_frame ; block0: ; zext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -56,7 +56,7 @@ block0(v0: i32): ; push_frame ; block0: ; zext32 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -77,7 +77,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -98,7 +98,7 @@ block0(v0: i8): ; push_frame ; block0: ; sext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -120,7 +120,7 @@ block0(v0: i16): ; push_frame ; block0: ; sext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -142,7 +142,7 @@ block0(v0: i32): ; push_frame ; block0: ; sext32 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; @@ -163,7 +163,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } ; pop_frame ; ret ; From 4e678ffdae303c6a3539e57e42345e845dcceb2d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 1 Apr 2025 15:37:59 -0700 Subject: [PATCH 3/5] Review feedback: add assert. --- cranelift/codegen/src/machinst/abi.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index d5a263070820..39447eacbf4b 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -2540,6 +2540,7 @@ impl CallInfo { )); } } else { + assert_ne!(*vreg, temp); emit(M::gen_load_stack(*amode, *vreg, *ty)); } } From ee5ccedf40d217fde9dad08f2d15161aee77ad69 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 2 Apr 2025 09:44:10 -0700 Subject: [PATCH 4/5] Review feedback: handle retval temp reg via ABI trait method. --- cranelift/codegen/src/isa/aarch64/abi.rs | 6 ++ .../codegen/src/isa/aarch64/inst/emit.rs | 6 -- .../codegen/src/isa/pulley_shared/abi.rs | 6 ++ .../src/isa/pulley_shared/inst/emit.rs | 6 -- cranelift/codegen/src/isa/riscv64/abi.rs | 6 ++ .../codegen/src/isa/riscv64/inst/emit.rs | 6 -- cranelift/codegen/src/isa/s390x/abi.rs | 4 + cranelift/codegen/src/isa/x64/abi.rs | 7 ++ cranelift/codegen/src/isa/x64/inst/emit.rs | 8 -- cranelift/codegen/src/machinst/abi.rs | 81 +++++++++++-------- 10 files changed, 78 insertions(+), 58 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index becc7d90dd50..6693485bd26d 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1195,6 +1195,12 @@ impl ABIMachineSpec for AArch64MachineDeps { clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x9 as a temp if needed: clobbered, not a + // retval. + regs::writable_xreg(9) + } } impl AArch64MachineDeps { diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index cd4cdb95f343..0d6e11e32f80 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2954,9 +2954,6 @@ impl MachInstEmit for Inst { // Load any stack-carried return values. info.emit_retval_loads::( - // Use x9 as a temp if needed: clobbered, not a - // retval. - regs::writable_xreg(9), state.frame_layout().stackslots_size, |inst| inst.emit(sink, emit_info, state), |needed_space| Some(Inst::EmitIsland { needed_space }), @@ -2987,9 +2984,6 @@ impl MachInstEmit for Inst { // Load any stack-carried return values. info.emit_retval_loads::( - // Use x9 as a temp if needed: clobbered, not a - // retval. - regs::writable_xreg(9), state.frame_layout().stackslots_size, |inst| inst.emit(sink, emit_info, state), |needed_space| Some(Inst::EmitIsland { needed_space }), diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index b25988e36f56..dcf28b62ab53 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -594,6 +594,12 @@ where // Pulley doesn't need inline probestacks because it always checks stack // decrements. } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x15 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(15)) + } } /// Different styles of management of fp/lr and clobbered registers. diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 5788141b1a6d..1a5095e6fe21 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -191,9 +191,6 @@ fn pulley_emit

( // Load any stack-carried return values. info.emit_retval_loads::, _, _>( - // Use x15 as a temp if needed: clobbered, not a - // retval. - Writable::from_reg(regs::x_reg(15)), state.frame_layout().stackslots_size, |inst| inst.emit(sink, emit_info, state), |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), @@ -221,9 +218,6 @@ fn pulley_emit

( // Load any stack-carried return values. info.emit_retval_loads::, _, _>( - // Use x15 as a temp if needed: clobbered, not a - // retval. - Writable::from_reg(regs::x_reg(15)), state.frame_layout().stackslots_size, |inst| inst.emit(sink, emit_info, state), |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 5056e75b66fc..4f5b1fbf6818 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -722,6 +722,12 @@ impl ABIMachineSpec for Riscv64MachineDeps { }); } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x12 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(12)) + } } impl Riscv64ABICallSite { diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 17b4c2c165ed..36e72db5c2e8 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -1137,9 +1137,6 @@ impl Inst { // Load any stack-carried return values. info.emit_retval_loads::( - // Use x12 as a temp if needed: clobbered, not a - // retval. - Writable::from_reg(regs::x_reg(12)), state.frame_layout().stackslots_size, |inst| inst.emit(sink, emit_info, state), |needed_space| Some(Inst::EmitIsland { needed_space }), @@ -1171,9 +1168,6 @@ impl Inst { // Load any stack-carried return values. info.emit_retval_loads::( - // Use x12 as a temp if needed: clobbered, not a - // retval. - Writable::from_reg(regs::x_reg(12)), state.frame_layout().stackslots_size, |inst| inst.emit(sink, emit_info, state), |needed_space| Some(Inst::EmitIsland { needed_space }), diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 62818e6fdfb8..50acf177d004 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -991,6 +991,10 @@ impl ABIMachineSpec for S390xMachineDeps { clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + panic!("Should not be called"); + } } impl S390xMachineDeps { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 632b86ca1fb2..d0e80437400b 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -975,6 +975,13 @@ impl ABIMachineSpec for X64ABIMachineSpec { clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use r11 as a temp: clobbered anyway, and + // not otherwise used as a return value in any of our + // supported calling conventions. + Writable::from_reg(regs::r11()) + } } impl X64CallSite { diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index f105f793f1e7..01f8e281e065 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1627,10 +1627,6 @@ pub(crate) fn emit( // Load any stack-carried return values. call_info.emit_retval_loads::( - // Use r11 as a temp if needed: clobbered anyway, and - // not otherwise used as a return value in any of our - // supported calling conventions. - Writable::from_reg(regs::r11()), state.frame_layout().stackslots_size, |inst| inst.emit(sink, info, state), |_space_needed| None, @@ -1720,10 +1716,6 @@ pub(crate) fn emit( // Load any stack-carried return values. call_info.emit_retval_loads::( - // Use r11 as a temp if needed: clobbered anyway, and - // not otherwise used as a return value in any of our - // supported calling conventions. - Writable::from_reg(regs::r11()), state.frame_layout().stackslots_size, |inst| inst.emit(sink, info, state), |_space_needed| None, diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 39447eacbf4b..189168091938 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -593,6 +593,12 @@ pub trait ABIMachineSpec { call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension; + + /// Get a temporary register that is available to use after a call + /// completes and that does not interfere with register-carried + /// return values. This is used to move stack-carried return + /// values directly into spillslots if needed. + fn retval_temp_reg(call_conv_of_callee: isa::CallConv) -> Writable; } /// Out-of-line data for calls, to keep the size of `Inst` down. @@ -2487,7 +2493,6 @@ impl CallInfo { IslandFn: Fn(u32) -> Option, >( &self, - temp: Writable, stackslots_size: u32, mut emit: EmitFn, emit_island: IslandFn, @@ -2509,39 +2514,51 @@ impl CallInfo { } } + let temp = M::retval_temp_reg(self.callee_conv); + // The temporary must be noted as clobbered. + debug_assert!(M::get_regs_clobbered_by_call(self.callee_conv) + .contains(PReg::from(temp.to_reg().to_real_reg().unwrap()))); + for CallRetPair { vreg, location } in &self.defs { - if let RetLocation::Stack(amode, ty) = location { - if let Some(spillslot) = vreg.to_reg().to_spillslot() { - // `temp` is an integer register of machine word - // width, but `ty` may be floating-point/vector, - // which (i) may not be loadable directly into an - // int reg, and (ii) may be wider than a machine - // word. For simplicity, and because there are not - // always easy choices for volatile float/vec regs - // (see e.g. x86-64, where fastcall clobbers only - // xmm0-xmm5, but tail uses xmm0-xmm7 for - // returns), we use the integer temp register in - // steps. - let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes(); - for part in 0..parts { - emit(M::gen_load_stack( - amode.offset_by(part * M::word_bytes()), - temp, - M::word_type(), - )); - emit(M::gen_store_stack( - StackAMode::Slot( - i64::from(stackslots_size) - + i64::from(M::word_bytes()) - * ((spillslot.index() as i64) + (part as i64)), - ), - temp.to_reg(), - M::word_type(), - )); + match location { + RetLocation::Reg(preg) => { + // The temporary must not also be an actual return + // value register. + debug_assert!(*preg != temp.to_reg()); + } + RetLocation::Stack(amode, ty) => { + if let Some(spillslot) = vreg.to_reg().to_spillslot() { + // `temp` is an integer register of machine word + // width, but `ty` may be floating-point/vector, + // which (i) may not be loadable directly into an + // int reg, and (ii) may be wider than a machine + // word. For simplicity, and because there are not + // always easy choices for volatile float/vec regs + // (see e.g. x86-64, where fastcall clobbers only + // xmm0-xmm5, but tail uses xmm0-xmm7 for + // returns), we use the integer temp register in + // steps. + let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes(); + for part in 0..parts { + emit(M::gen_load_stack( + amode.offset_by(part * M::word_bytes()), + temp, + M::word_type(), + )); + emit(M::gen_store_stack( + StackAMode::Slot( + i64::from(stackslots_size) + + i64::from(M::word_bytes()) + * ((spillslot.index() as i64) + (part as i64)), + ), + temp.to_reg(), + M::word_type(), + )); + } + } else { + assert_ne!(*vreg, temp); + emit(M::gen_load_stack(*amode, *vreg, *ty)); } - } else { - assert_ne!(*vreg, temp); - emit(M::gen_load_stack(*amode, *vreg, *ty)); } } } From f0d403abe45983a64fb2a38a86199b7f8c537a27 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 4 Apr 2025 16:43:28 -0700 Subject: [PATCH 5/5] Update is_clobbered_in_inst to affect only clobbers, not all defs. --- cranelift/codegen/src/isa/aarch64/abi.rs | 1 - cranelift/codegen/src/isa/aarch64/inst/mod.rs | 2 - cranelift/codegen/src/isa/riscv64/abi.rs | 1 - cranelift/codegen/src/isa/s390x/inst/mod.rs | 4 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 1 - cranelift/codegen/src/isa/x64/abi.rs | 1 - cranelift/codegen/src/machinst/abi.rs | 14 ------ cranelift/codegen/src/machinst/mod.rs | 3 +- cranelift/codegen/src/machinst/vcode.rs | 34 +++++++++----- .../filetests/isa/aarch64/tail-call-conv.clif | 44 +++---------------- .../filetests/isa/pulley32/call.clif | 14 +++--- .../filetests/isa/pulley32/extend.clif | 16 +++---- .../filetests/isa/pulley64/call.clif | 18 ++++---- .../isa/pulley64/call_indirect_host.clif | 2 +- .../filetests/isa/pulley64/extend.clif | 16 +++---- .../filetests/filetests/isa/s390x/call.clif | 5 +++ 16 files changed, 69 insertions(+), 107 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 6693485bd26d..1c5eb7b96b70 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1094,7 +1094,6 @@ impl ABIMachineSpec for AArch64MachineDeps { caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, - has_non_abi_defs: false, }), }); insts diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 25a16fbbc716..2dae98b929f3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -979,8 +979,6 @@ impl MachInst for Inst { fn is_included_in_clobbers(&self) -> bool { let (caller, callee) = match self { Inst::Args { .. } => return false, - Inst::Call { info } if info.has_non_abi_defs => return true, - Inst::CallInd { info } if info.has_non_abi_defs => return true, Inst::Call { info } => (info.caller_conv, info.callee_conv), Inst::CallInd { info } => (info.caller_conv, info.callee_conv), _ => return true, diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 4f5b1fbf6818..5364b78dd0d2 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -614,7 +614,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, - has_non_abi_defs: false, }), }); insts diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 64a0d8af4b40..09245bf056a9 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1069,9 +1069,7 @@ impl MachInst for Inst { // registers. match self { &Inst::Args { .. } => false, - &Inst::Call { ref info, .. } => { - info.caller_conv != info.callee_conv || info.has_non_abi_defs - } + &Inst::Call { ref info, .. } => info.caller_conv != info.callee_conv, &Inst::CallInd { ref info, .. } => info.caller_conv != info.callee_conv, &Inst::ElfTlsGetOffset { .. } => false, _ => true, diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 4589afcc4d89..c3e241f859fa 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -999,7 +999,6 @@ impl IsleContext<'_, '_, MInst, S390xBackend> { callee_pop_size, caller_conv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), callee_conv: self.lower_ctx.sigs()[abi].call_conv(), - has_non_abi_defs: false, } } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index d0e80437400b..55a6821aa137 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -877,7 +877,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { defs: smallvec![], clobbers: Self::get_regs_clobbered_by_call(call_conv), callee_pop_size, - has_non_abi_defs: false, callee_conv: call_conv, caller_conv: call_conv, }))); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 189168091938..d661847bfb21 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -620,10 +620,6 @@ pub struct CallInfo { /// caller, if any. (Used for popping stack arguments with the `tail` /// calling convention.) pub callee_pop_size: u32, - /// Do the defs have any definitions outside of the ABI-implied - /// clobbers? If so, this instruction needs to be considered when - /// computing the function body's clobbered registers. - pub has_non_abi_defs: bool, } impl CallInfo { @@ -638,7 +634,6 @@ impl CallInfo { caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, - has_non_abi_defs: false, } } @@ -652,7 +647,6 @@ impl CallInfo { caller_conv: self.caller_conv, callee_conv: self.callee_conv, callee_pop_size: self.callee_pop_size, - has_non_abi_defs: self.has_non_abi_defs, } } } @@ -2036,9 +2030,6 @@ pub struct CallSite { caller_conv: isa::CallConv, /// The settings controlling this compilation. flags: settings::Flags, - /// Has any defs that are not constrained to ABI-specified - /// registers. - has_non_abi_defs: bool, _mach: PhantomData, } @@ -2072,7 +2063,6 @@ impl CallSite { is_tail_call, caller_conv, flags, - has_non_abi_defs: false, _mach: PhantomData, } } @@ -2096,7 +2086,6 @@ impl CallSite { is_tail_call: IsTailCall::No, caller_conv, flags, - has_non_abi_defs: false, _mach: PhantomData, } } @@ -2120,7 +2109,6 @@ impl CallSite { is_tail_call, caller_conv, flags, - has_non_abi_defs: false, _mach: PhantomData, } } @@ -2373,7 +2361,6 @@ impl CallSite { vreg: into_reg, location: RetLocation::Stack(amode, ty), }); - self.has_non_abi_defs = true; into_regs.push(into_reg.to_reg()); } } @@ -2474,7 +2461,6 @@ impl CallSite { callee_conv: call_conv, caller_conv: self.caller_conv, callee_pop_size, - has_non_abi_defs: self.has_non_abi_defs, }, ) .into_iter() diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 8ffc68b4e81f..fce309471ada 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -112,7 +112,8 @@ pub trait MachInst: Clone + Debug { /// Is this an "args" pseudoinst? fn is_args(&self) -> bool; - /// Should this instruction be included in the clobber-set? + /// Should this instruction's clobber-list be included in the + /// clobber-set? fn is_included_in_clobbers(&self) -> bool; /// Does this instruction access memory? diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index cee1d0009e1d..dd6f48668b75 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -672,16 +672,6 @@ impl VCode { } for (i, range) in self.operand_ranges.iter() { - // Skip this instruction if not "included in clobbers" as - // per the MachInst. (Some backends use this to implement - // ABI specifics; e.g., excluding calls of the same ABI as - // the current function from clobbers, because by - // definition everything clobbered by the call can be - // clobbered by this function without saving as well.) - if !self.insts[i].is_included_in_clobbers() { - continue; - } - let operands = &self.operands[range.clone()]; let allocs = ®alloc.allocs[range]; for (operand, alloc) in operands.iter().zip(allocs.iter()) { @@ -693,8 +683,28 @@ impl VCode { } // Also add explicitly-clobbered registers. - if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { - clobbered.union_from(inst_clobbered); + // + // Skip merging this instruction's clobber list if not + // "included in clobbers" as per the MachInst. (Some + // backends use this to implement ABI specifics; e.g., + // excluding calls of the same ABI as the current function + // from clobbers, because by definition everything + // clobbered by the call can be clobbered by this function + // without saving as well. + // + // This is important for a particular optimization: when + // some registers are "half-clobbered", e.g. vector/float + // registers on aarch64, we want them to be seen as + // clobbered by regalloc so it avoids carrying values + // across calls in these registers but not seen as + // clobbered by prologue generation here (because the + // actual half-clobber implied by the clobber list fits + // within the clobbers that we allow without + // clobber-saves). + if self.insts[i].is_included_in_clobbers() { + if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { + clobbered.union_from(inst_clobbered); + } } } diff --git a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif index ece17e8afdbc..9ba81c4ec643 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif @@ -385,10 +385,6 @@ block0: ; stp x23, x24, [sp, #-16]! ; stp x21, x22, [sp, #-16]! ; stp x19, x20, [sp, #-16]! -; stp d14, d15, [sp, #-16]! -; stp d12, d13, [sp, #-16]! -; stp d10, d11, [sp, #-16]! -; stp d8, d9, [sp, #-16]! ; sub sp, sp, #240 ; block0: ; mov x8, sp @@ -396,10 +392,6 @@ block0: ; blr x12 ; ldr x2, [sp, #232] ; add sp, sp, #240 -; ldp d8, d9, [sp], #16 -; ldp d10, d11, [sp], #16 -; ldp d12, d13, [sp], #16 -; ldp d14, d15, [sp], #16 ; ldp x19, x20, [sp], #16 ; ldp x21, x22, [sp], #16 ; ldp x23, x24, [sp], #16 @@ -417,15 +409,11 @@ block0: ; stp x23, x24, [sp, #-0x10]! ; stp x21, x22, [sp, #-0x10]! ; stp x19, x20, [sp, #-0x10]! -; stp d14, d15, [sp, #-0x10]! -; stp d12, d13, [sp, #-0x10]! -; stp d10, d11, [sp, #-0x10]! -; stp d8, d9, [sp, #-0x10]! ; sub sp, sp, #0xf0 -; block1: ; offset 0x30 +; block1: ; offset 0x20 ; mov x8, sp -; ldr x12, #0x3c -; b #0x44 +; ldr x12, #0x2c +; b #0x34 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x12 @@ -461,10 +449,6 @@ block0: ; stur x9, [sp, #0xe8] ; ldur x2, [sp, #0xe8] ; add sp, sp, #0xf0 -; ldp d8, d9, [sp], #0x10 -; ldp d10, d11, [sp], #0x10 -; ldp d12, d13, [sp], #0x10 -; ldp d14, d15, [sp], #0x10 ; ldp x19, x20, [sp], #0x10 ; ldp x21, x22, [sp], #0x10 ; ldp x23, x24, [sp], #0x10 @@ -648,10 +632,6 @@ block0: ; stp x23, x24, [sp, #-16]! ; stp x21, x22, [sp, #-16]! ; stp x19, x20, [sp, #-16]! -; stp d14, d15, [sp, #-16]! -; stp d12, d13, [sp, #-16]! -; stp d10, d11, [sp, #-16]! -; stp d8, d9, [sp, #-16]! ; sub sp, sp, #400 ; block0: ; movz x2, #10 @@ -705,10 +685,6 @@ block0: ; blr x10 ; ldr x2, [sp, #392] ; add sp, sp, #400 -; ldp d8, d9, [sp], #16 -; ldp d10, d11, [sp], #16 -; ldp d12, d13, [sp], #16 -; ldp d14, d15, [sp], #16 ; ldp x19, x20, [sp], #16 ; ldp x21, x22, [sp], #16 ; ldp x23, x24, [sp], #16 @@ -726,12 +702,8 @@ block0: ; stp x23, x24, [sp, #-0x10]! ; stp x21, x22, [sp, #-0x10]! ; stp x19, x20, [sp, #-0x10]! -; stp d14, d15, [sp, #-0x10]! -; stp d12, d13, [sp, #-0x10]! -; stp d10, d11, [sp, #-0x10]! -; stp d8, d9, [sp, #-0x10]! ; sub sp, sp, #0x190 -; block1: ; offset 0x30 +; block1: ; offset 0x20 ; mov x2, #0xa ; mov x3, #0xf ; mov x4, #0x14 @@ -779,8 +751,8 @@ block0: ; stur x20, [sp, #0x90] ; stur x22, [sp, #0x98] ; add x8, sp, #0xa0 -; ldr x10, #0xf4 -; b #0xfc +; ldr x10, #0xe4 +; b #0xec ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x10 @@ -817,10 +789,6 @@ block0: ; str x9, [sp, #0x188] ; ldr x2, [sp, #0x188] ; add sp, sp, #0x190 -; ldp d8, d9, [sp], #0x10 -; ldp d10, d11, [sp], #0x10 -; ldp d12, d13, [sp], #0x10 -; ldp d14, d15, [sp], #0x10 ; ldp x19, x20, [sp], #0x10 ; ldp x21, x22, [sp], #0x10 ; ldp x23, x24, [sp], #0x10 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 83eedf17b38e..943300b2f3df 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -75,7 +75,7 @@ block0: ; xone x4 ; xconst8 x5, 2 ; xconst8 x6, 3 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -149,7 +149,7 @@ block0: ; xmov x11, x14 ; xmov x12, x14 ; xmov x13, x14 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame_restore 64, {} ; ret ; @@ -217,7 +217,7 @@ block0: ; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: true } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x26, x0, x1 ; xadd64 x28, x2, x3 ; xadd64 x2, x4, x5 @@ -292,7 +292,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley32/extend.clif b/cranelift/filetests/filetests/isa/pulley32/extend.clif index b2f32b9be89c..d82485e18853 100644 --- a/cranelift/filetests/filetests/isa/pulley32/extend.clif +++ b/cranelift/filetests/filetests/isa/pulley32/extend.clif @@ -12,7 +12,7 @@ block0(v0: i8): ; push_frame ; block0: ; zext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -34,7 +34,7 @@ block0(v0: i16): ; push_frame ; block0: ; zext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -55,7 +55,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -75,7 +75,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -96,7 +96,7 @@ block0(v0: i8): ; push_frame ; block0: ; sext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -118,7 +118,7 @@ block0(v0: i16): ; push_frame ; block0: ; sext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -139,7 +139,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -159,7 +159,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index 0c92244e1b39..fb2c0e4c4cc7 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -75,7 +75,7 @@ block0: ; xone x4 ; xconst8 x5, 2 ; xconst8 x6, 3 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -149,7 +149,7 @@ block0: ; xmov x11, x14 ; xmov x12, x14 ; xmov x13, x14 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame_restore 64, {} ; ret ; @@ -217,7 +217,7 @@ block0: ; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: true } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x26, x0, x1 ; xadd64 x28, x2, x3 ; xadd64 x2, x4, x5 @@ -292,7 +292,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -342,7 +342,7 @@ block0: ; xmov x11, x14 ; xmov x12, x14 ; xmov x13, x14 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame_restore 80, {} ; ret ; @@ -388,7 +388,7 @@ block0(v0: i32): ; xstore64 sp+1000008, x20 // flags = notrap aligned ; block0: ; xmov x20, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x5, x20 ; xadd32 x0, x5, x0 ; x20 = xload64 sp+1000008 // flags = notrap aligned diff --git a/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif b/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif index baa4454ee6c6..b81ab16c44c7 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif @@ -11,7 +11,7 @@ block0: ; VCode: ; push_frame ; block0: -; indirect_call_host CallInfo { dest: User(userextname0), uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: SystemV, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; indirect_call_host CallInfo { dest: User(userextname0), uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: SystemV, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/extend.clif b/cranelift/filetests/filetests/isa/pulley64/extend.clif index 0c6632e0d5c8..0efbfb6a9a5a 100644 --- a/cranelift/filetests/filetests/isa/pulley64/extend.clif +++ b/cranelift/filetests/filetests/isa/pulley64/extend.clif @@ -12,7 +12,7 @@ block0(v0: i8): ; push_frame ; block0: ; zext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -34,7 +34,7 @@ block0(v0: i16): ; push_frame ; block0: ; zext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -56,7 +56,7 @@ block0(v0: i32): ; push_frame ; block0: ; zext32 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -77,7 +77,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -98,7 +98,7 @@ block0(v0: i8): ; push_frame ; block0: ; sext8 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -120,7 +120,7 @@ block0(v0: i16): ; push_frame ; block0: ; sext16 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -142,7 +142,7 @@ block0(v0: i32): ; push_frame ; block0: ; sext32 x2, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -163,7 +163,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0, has_non_abi_defs: false } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index 13d6ae00f19a..92270306919e 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -314,6 +314,7 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: } ; VCode: +; stmg %r6, %r15, 48(%r15) ; block0: ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) @@ -335,10 +336,13 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: ; vaq %v5, %v6, %v7 ; vaq %v4, %v4, %v5 ; vst %v4, 0(%r2) +; lmg %r6, %r15, 48(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 +; stmg %r6, %r15, 0x30(%r15) +; block1: ; offset 0x6 ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) ; vl %v5, 0(%r5) @@ -359,6 +363,7 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: ; vaq %v5, %v6, %v7 ; vaq %v4, %v4, %v5 ; vst %v4, 0(%r2) +; lmg %r6, %r15, 0x30(%r15) ; br %r14 function %call_sret() -> i64 {