Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix floating point calling convention for DynamicFunc trampolines. #1271

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 81 additions & 1 deletion lib/runtime-core/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,10 +480,11 @@ impl InstanceImage {
}
}

/// Declarations for x86-64 registers.
/// X64-specific structures and methods that do not depend on an x64 machine to run.
#[cfg(unix)]
pub mod x64_decl {
use super::*;
use crate::types::Type;

/// General-purpose registers.
#[repr(u8)]
Expand Down Expand Up @@ -610,9 +611,88 @@ pub mod x64_decl {
_ => return None,
})
}

/// Returns the instruction prefix for `movq %this_reg, ?(%rsp)`.
///
/// To build a instruction, append the memory location as a 32-bit
/// offset to the stack pointer to this prefix.
pub fn prefix_mov_to_stack(&self) -> Option<&'static [u8]> {
Some(match *self {
X64Register::GPR(gpr) => match gpr {
GPR::RDI => &[0x48, 0x89, 0xbc, 0x24],
GPR::RSI => &[0x48, 0x89, 0xb4, 0x24],
GPR::RDX => &[0x48, 0x89, 0x94, 0x24],
GPR::RCX => &[0x48, 0x89, 0x8c, 0x24],
GPR::R8 => &[0x4c, 0x89, 0x84, 0x24],
GPR::R9 => &[0x4c, 0x89, 0x8c, 0x24],
_ => return None,
},
X64Register::XMM(xmm) => match xmm {
XMM::XMM0 => &[0x66, 0x0f, 0xd6, 0x84, 0x24],
XMM::XMM1 => &[0x66, 0x0f, 0xd6, 0x8c, 0x24],
XMM::XMM2 => &[0x66, 0x0f, 0xd6, 0x94, 0x24],
XMM::XMM3 => &[0x66, 0x0f, 0xd6, 0x9c, 0x24],
XMM::XMM4 => &[0x66, 0x0f, 0xd6, 0xa4, 0x24],
XMM::XMM5 => &[0x66, 0x0f, 0xd6, 0xac, 0x24],
XMM::XMM6 => &[0x66, 0x0f, 0xd6, 0xb4, 0x24],
XMM::XMM7 => &[0x66, 0x0f, 0xd6, 0xbc, 0x24],
_ => return None,
},
})
}
}

/// An allocator that allocates registers for function arguments according to the System V ABI.
#[derive(Default)]
pub struct ArgumentRegisterAllocator {
n_gprs: usize,
n_xmms: usize,
}

impl ArgumentRegisterAllocator {
/// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type..
pub fn next(&mut self, ty: Type) -> Option<X64Register> {
static GPR_SEQ: &'static [GPR] =
&[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
static XMM_SEQ: &'static [XMM] = &[
XMM::XMM0,
XMM::XMM1,
XMM::XMM2,
XMM::XMM3,
XMM::XMM4,
XMM::XMM5,
XMM::XMM6,
XMM::XMM7,
];
match ty {
Type::I32 | Type::I64 => {
if self.n_gprs < GPR_SEQ.len() {
let gpr = GPR_SEQ[self.n_gprs];
self.n_gprs += 1;
Some(X64Register::GPR(gpr))
} else {
None
}
}
Type::F32 | Type::F64 => {
if self.n_xmms < XMM_SEQ.len() {
let xmm = XMM_SEQ[self.n_xmms];
self.n_xmms += 1;
Some(X64Register::XMM(xmm))
} else {
None
}
}
_ => todo!(
"ArgumentRegisterAllocator::next: Unsupported type: {:?}",
ty
),
}
}
}
}

/// X64-specific structures and methods that only work on an x64 machine.
#[cfg(unix)]
pub mod x64 {
//! The x64 state module contains functions to generate state and code for x64 targets.
Expand Down
93 changes: 71 additions & 22 deletions lib/runtime-core/src/trampoline_x64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
//! Variadic functions are not supported because `rax` is used by the trampoline code.

use crate::loader::CodeMemory;
use crate::state::x64_decl::ArgumentRegisterAllocator;
use crate::types::Type;
use crate::vm::Ctx;
use std::collections::BTreeMap;
use std::fmt;
Expand Down Expand Up @@ -246,44 +248,49 @@ impl TrampolineBufferBuilder {
&mut self,
target: unsafe extern "C" fn(*const CallContext, *const u64) -> u64,
context: *const CallContext,
num_params: u32,
params: &[Type],
) -> usize {
let idx = self.offsets.len();
self.offsets.push(self.code.len());

let mut stack_offset: u32 = num_params.checked_mul(8).unwrap();
let mut stack_offset: u32 = params.len().checked_mul(8).unwrap() as u32;
if stack_offset % 16 == 0 {
stack_offset += 8;
}

self.code.extend_from_slice(&[0x48, 0x81, 0xec]); // sub ?, %rsp
self.code.extend_from_slice(value_to_bytes(&stack_offset));
for i in 0..num_params {
match i {
0..=5 => {
// mov %?, ?(%rsp)
let prefix: &[u8] = match i {
0 => &[0x48, 0x89, 0xbc, 0x24], // rdi
1 => &[0x48, 0x89, 0xb4, 0x24], // rsi
2 => &[0x48, 0x89, 0x94, 0x24], // rdx
3 => &[0x48, 0x89, 0x8c, 0x24], // rcx
4 => &[0x4c, 0x89, 0x84, 0x24], // r8
5 => &[0x4c, 0x89, 0x8c, 0x24], // r9
_ => unreachable!(),
};

let mut allocator = ArgumentRegisterAllocator::default();

let mut source_stack_count: u32 = 0; // # of allocated slots in the source stack.

for (i, ty) in params.iter().enumerate() {
match allocator.next(*ty) {
Some(reg) => {
// This argument is allocated to a register.

let prefix = reg
.prefix_mov_to_stack()
.expect("cannot get instruction prefix for argument register");
self.code.extend_from_slice(prefix);
self.code.extend_from_slice(value_to_bytes(&(i * 8u32)));
self.code
.extend_from_slice(value_to_bytes(&((i as u32) * 8u32)));
}
_ => {
None => {
// This argument is allocated to the stack.

self.code.extend_from_slice(&[
0x48, 0x8b, 0x84, 0x24, // mov ?(%rsp), %rax
]);
self.code.extend_from_slice(value_to_bytes(
&((i - 6) * 8u32 + stack_offset + 8/* ret addr */),
&(source_stack_count * 8u32 + stack_offset + 8/* ret addr */),
));
// mov %rax, ?(%rsp)
self.code.extend_from_slice(&[0x48, 0x89, 0x84, 0x24]);
self.code.extend_from_slice(value_to_bytes(&(i * 8u32)));
self.code
.extend_from_slice(value_to_bytes(&((i as u32) * 8u32)));
source_stack_count += 1;
}
}
}
Expand Down Expand Up @@ -395,8 +402,12 @@ mod tests {
}
let mut builder = TrampolineBufferBuilder::new();
let ctx = TestContext { value: 100 };
let idx =
builder.add_callinfo_trampoline(do_add, &ctx as *const TestContext as *const _, 8);
let param_types: Vec<Type> = (0..8).map(|_| Type::I32).collect();
let idx = builder.add_callinfo_trampoline(
do_add,
&ctx as *const TestContext as *const _,
&param_types,
);
let buf = builder.build();
let t = buf.get_trampoline(idx);
let ret = unsafe {
Expand All @@ -407,6 +418,43 @@ mod tests {
assert_eq!(ret, 136);
}

#[test]
fn test_trampolines_with_floating_point() {
unsafe extern "C" fn inner(n: *const CallContext, args: *const u64) -> u64 {
let n = n as usize;
let mut result: u64 = 0;
for i in 0..n {
result += *args.offset(i as _);
}
result
}
let buffer = TrampBuffer::new(4096);
let mut builder = TrampolineBufferBuilder::new();
builder.add_callinfo_trampoline(
inner,
8 as _,
&[
Type::I32,
Type::I32,
Type::I32,
Type::F32,
Type::I32,
Type::I32,
Type::I32,
Type::I32,
],
);
let ptr = buffer.insert(builder.code()).unwrap();
let ret = unsafe {
let f = std::mem::transmute::<
_,
extern "C" fn(i32, i32, i32, f32, i32, i32, i32, i32) -> i32,
>(ptr);
f(1, 2, 3, f32::from_bits(4), 5, 6, 7, 8)
};
assert_eq!(ret, 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8);
}

#[test]
fn test_many_global_trampolines() {
unsafe extern "C" fn inner(n: *const CallContext, args: *const u64) -> u64 {
Expand All @@ -427,7 +475,8 @@ mod tests {
for i in 0..5000usize {
let mut builder = TrampolineBufferBuilder::new();
let n = i % 8;
builder.add_callinfo_trampoline(inner, n as _, n as _);
let param_types: Vec<_> = (0..n).map(|_| Type::I32).collect();
builder.add_callinfo_trampoline(inner, n as _, &param_types);
let ptr = buffer
.insert(builder.code())
.expect("cannot insert new code into global buffer");
Expand Down
6 changes: 5 additions & 1 deletion lib/runtime-core/src/typed_func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,14 @@ impl<'a> DynamicFunc<'a> {
func: Box::new(func),
});
let ctx = Box::into_raw(ctx);

let mut native_param_types = vec![Type::I32]; // vm::Ctx is the first parameter.
native_param_types.extend_from_slice(signature.params());

builder.add_callinfo_trampoline(
enter_host_polymorphic,
ctx as *const _,
(signature.params().len() + 1) as u32, // +vmctx
&native_param_types,
);
let ptr = builder
.insert_global()
Expand Down
Loading