Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 106 additions & 19 deletions compiler/rustc_codegen_ssa/src/mir/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1140,19 +1140,50 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
(args, None)
};

// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
//
// Normally an indirect argument with `on_stack: false` would be passed as a pointer into
// the caller's stack frame. For tail calls, that would be unsound, because the caller's
// stack frame is overwritten by the callee's stack frame.
//
// Therefore we store the argument for the callee in the corresponding caller's slot.
// Because guaranteed tail calls demand that the caller's signature matches the callee's,
// the corresponding slot has the correct type.
//
// To handle cases like the one below, the tail call arguments must first be copied to a
// temporary, and only then copied to the caller's argument slots.
//
// ```
// // A struct big enough that it is not passed via registers.
// pub struct Big([u64; 4]);
//
// fn swapper(a: Big, b: Big) -> (Big, Big) {
// become swapper_helper(b, a);
// }
// ```
let mut tail_call_temporaries = vec![];
if kind == CallKind::Tail {
tail_call_temporaries = vec![None; first_args.len()];
// First copy the arguments of this call to temporary stack allocations.
for (i, arg) in first_args.iter().enumerate() {
if !matches!(fn_abi.args[i].mode, PassMode::Indirect { on_stack: false, .. }) {
continue;
}

let op = self.codegen_operand(bx, &arg.node);
let tmp = PlaceRef::alloca(bx, op.layout);
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
op.store_with_annotation(bx, tmp);

tail_call_temporaries[i] = Some(tmp);
}
}

// When generating arguments we sometimes introduce temporary allocations with lifetime
// that extend for the duration of a call. Keep track of those allocations and their sizes
// to generate `lifetime_end` when the call returns.
let mut lifetime_ends_after_call: Vec<(Bx::Value, Size)> = Vec::new();
'make_args: for (i, arg) in first_args.iter().enumerate() {
if kind == CallKind::Tail && matches!(fn_abi.args[i].mode, PassMode::Indirect { .. }) {
// FIXME: https://github.com/rust-lang/rust/pull/144232#discussion_r2218543841
span_bug!(
fn_span,
"arguments using PassMode::Indirect are currently not supported for tail calls"
);
}

let mut op = self.codegen_operand(bx, &arg.node);

if let (0, Some(ty::InstanceKind::Virtual(_, idx))) = (i, instance.map(|i| i.def)) {
Expand Down Expand Up @@ -1203,18 +1234,74 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
}
}

// The callee needs to own the argument memory if we pass it
// by-ref, so make a local copy of non-immediate constants.
match (&arg.node, op.val) {
(&mir::Operand::Copy(_), Ref(PlaceValue { llextra: None, .. }))
| (&mir::Operand::Constant(_), Ref(PlaceValue { llextra: None, .. })) => {
let tmp = PlaceRef::alloca(bx, op.layout);
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
op.store_with_annotation(bx, tmp);
op.val = Ref(tmp.val);
lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size));
match kind {
CallKind::Tail => {
match fn_abi.args[i].mode {
PassMode::Indirect { on_stack: false, .. } => {
let Some(tmp) = tail_call_temporaries[i].take() else {
span_bug!(
fn_span,
"missing temporary for indirect tail call argument #{i}"
)
};

let local = self.mir.args_iter().nth(i).unwrap();

match &self.locals[local] {
LocalRef::Place(arg) => {
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
op.val = Ref(arg.val);
}
LocalRef::Operand(arg) => {
let Ref(place_value) = arg.val else {
bug!("only `Ref` should use `PassMode::Indirect`");
};
bx.typed_place_copy(
place_value,
tmp.val,
fn_abi.args[i].layout,
);
op.val = arg.val;
}
LocalRef::UnsizedPlace(_) => {
span_bug!(fn_span, "unsized types are not supported")
}
LocalRef::PendingOperand => {
span_bug!(fn_span, "argument local should not be pending")
}
};

bx.lifetime_end(tmp.val.llval, tmp.layout.size);
}
PassMode::Indirect { on_stack: true, .. } => {
// FIXME: some LLVM backends (notably x86) do not correctly pass byval
// arguments to tail calls (as of LLVM 21). See also:
//
// - https://github.com/rust-lang/rust/pull/144232#discussion_r2218543841
// - https://github.com/rust-lang/rust/issues/144855
span_bug!(
fn_span,
"arguments using PassMode::Indirect {{ on_stack: true, .. }} are currently not supported for tail calls"
)
Comment on lines +1276 to +1285
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've kept the span_bug! for now.

It looks like a fix for x86 might get cherry-picked into LLVM 22. If so, I think that is enough support to allow this variant too. At that point riscv would be the next most commonly used target that would miscompile.

}
_ => (),
}
}
CallKind::Normal => {
// The callee needs to own the argument memory if we pass it
// by-ref, so make a local copy of non-immediate constants.
match (&arg.node, op.val) {
(&mir::Operand::Copy(_), Ref(PlaceValue { llextra: None, .. }))
| (&mir::Operand::Constant(_), Ref(PlaceValue { llextra: None, .. })) => {
let tmp = PlaceRef::alloca(bx, op.layout);
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
op.store_with_annotation(bx, tmp);
op.val = Ref(tmp.val);
lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size));
}
_ => {}
}
}
_ => {}
}

self.codegen_argument(
Expand Down
30 changes: 21 additions & 9 deletions compiler/rustc_mir_transform/src/deduce_param_attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,29 @@ impl<'tcx> Visitor<'tcx> for DeduceParamAttrs {
// `f` passes. Note that function arguments are the only situation in which this problem can
// arise: every other use of `move` in MIR doesn't actually write to the value it moves
// from.
if let TerminatorKind::Call { ref args, .. } = terminator.kind {
for arg in args {
if let Operand::Move(place) = arg.node
&& !place.is_indirect_first_projection()
&& let Some(i) = self.as_param(place.local)
{
self.usage[i] |= UsageSummary::MUTATE;
self.usage[i] |= UsageSummary::CAPTURE;
match terminator.kind {
TerminatorKind::Call { ref args, .. } => {
for arg in args {
if let Operand::Move(place) = arg.node
&& !place.is_indirect_first_projection()
&& let Some(i) = self.as_param(place.local)
{
self.usage[i] |= UsageSummary::MUTATE;
self.usage[i] |= UsageSummary::CAPTURE;
}
}
}
};

// Like a call, but more conservative because the backend may introduce writes to an
// argument if the argument is passed as `PassMode::Indirect { on_stack: false, ... }`.
TerminatorKind::TailCall { .. } => {
for usage in self.usage.iter_mut() {
*usage |= UsageSummary::MUTATE;
*usage |= UsageSummary::CAPTURE;
}
}
_ => {}
}

self.super_terminator(terminator, location);
}
Expand Down
75 changes: 75 additions & 0 deletions tests/assembly-llvm/tail-call-indirect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//@ add-minicore
//@ assembly-output: emit-asm
//@ needs-llvm-components: x86
//@ compile-flags: --target=x86_64-unknown-linux-gnu
//@ compile-flags: -Copt-level=3 -C llvm-args=-x86-asm-syntax=intel

#![feature(no_core, explicit_tail_calls)]
#![expect(incomplete_features)]
#![no_core]
#![crate_type = "lib"]

// Test tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
//
// Normally an indirect argument with `on_stack: false` would be passed as a pointer to the
// caller's stack frame. For tail calls, that would be unsound, because the caller's stack
// frame is overwritten by the callee's stack frame.
//
// The solution is to write the argument into the caller's argument place (stored somewhere further
// up the stack), and forward that place.

extern crate minicore;
use minicore::*;

#[repr(C)]
struct S {
x: u64,
y: u64,
z: u64,
}

unsafe extern "C" {
safe fn force_usage(_: u64, _: u64, _: u64) -> u64;
}

// CHECK-LABEL: callee:
// CHECK-NEXT: .cfi_startproc
//
// CHECK-NEXT: mov rax, qword ptr [rdi]
// CHECK-NEXT: mov rsi, qword ptr [rdi + 8]
// CHECK-NEXT: mov rdx, qword ptr [rdi + 16]
// CHECK-NEXT: mov rdi, rax
//
// CHECK-NEXT: jmp qword ptr [rip + force_usage@GOTPCREL]
#[inline(never)]
#[unsafe(no_mangle)]
fn callee(s: S) -> u64 {
force_usage(s.x, s.y, s.z)
}

// CHECK-LABEL: caller1:
// CHECK-NEXT: .cfi_startproc
//
// Just forward the argument:
//
// CHECK-NEXT: jmp qword ptr [rip + callee@GOTPCREL]
#[unsafe(no_mangle)]
fn caller1(s: S) -> u64 {
become callee(s);
}

// CHECK-LABEL: caller2:
// CHECK-NEXT: .cfi_startproc
//
// Construct the S value directly into the argument slot:
//
// CHECK-NEXT: mov qword ptr [rdi], 1
// CHECK-NEXT: mov qword ptr [rdi + 8], 2
// CHECK-NEXT: mov qword ptr [rdi + 16], 3
//
// CHECK-NEXT: jmp qword ptr [rip + callee@GOTPCREL]
#[unsafe(no_mangle)]
fn caller2(_: S) -> u64 {
let s = S { x: 1, y: 2, z: 3 };
become callee(s);
}
38 changes: 38 additions & 0 deletions tests/codegen-llvm/tail-call-u128.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//@ add-minicore
//@ revisions: win linux
//
//@ compile-flags: -Copt-level=3
//@[linux] compile-flags: --target x86_64-unknown-linux-gnu
//@[linux] needs-llvm-components: x86
//@[win] compile-flags: --target x86_64-pc-windows-msvc
//@[win] needs-llvm-components: x86

#![crate_type = "lib"]
#![feature(no_core, lang_items, explicit_tail_calls)]
#![allow(incomplete_features)]
#![no_core]

extern crate minicore;
use minicore::*;

// linux: define noundef i128 @foo(i128 noundef %a, i128 noundef %b)
// win: define <16 x i8> @foo(ptr {{.*}} %a, ptr {{.*}} %b)
#[unsafe(no_mangle)]
#[inline(never)]
extern "C" fn foo(a: u128, b: u128) -> u128 {
// linux: start:
// linux-NEXT: musttail call noundef i128 @bar(i128 noundef %b, i128 noundef %a)
//
//
// win: start:
// win-NEXT: %0 = load i128, ptr %b
// win-NEXT: %1 = load i128, ptr %a
// win-NEXT: store i128 %0, ptr %a
// win-NEXT: store i128 %1, ptr %b
Comment on lines +28 to +31
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this swapping is a little unfortunate of course. I suspect this is a very rare pattern in practice though, so we just need a correct implementation, not necessarily the most optimal one from the get-go.

// win-NEXT: musttail call <16 x i8> @bar(ptr {{.*}} %a, ptr {{.*}} %b)
become bar(b, a)
}

unsafe extern "C" {
safe fn bar(a: u128, b: u128) -> u128;
}
42 changes: 0 additions & 42 deletions tests/crashes/144293-indirect-ops-llvm.rs

This file was deleted.

Loading
Loading