Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][interp] Add 16 byte default alignment to interp frames #80700

Merged
merged 3 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
#define TRACING_FLAG 0x1
#define PROFILING_FLAG 0x2

#define MINT_VT_ALIGNMENT 8
#define MINT_STACK_SLOT_SIZE (sizeof (stackval))
// This alignment provides us with straight forward support for Vector128
#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)

#define INTERP_STACK_SIZE (1024*1024)
#define INTERP_REDZONE_SIZE (8*1024)
Expand Down
39 changes: 28 additions & 11 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,11 +403,11 @@ get_context (void)
ThreadContext *context = (ThreadContext *) mono_native_tls_get_value (thread_context_id);
if (context == NULL) {
context = g_new0 (ThreadContext, 1);
context->stack_start = (guchar*)mono_valloc (0, INTERP_STACK_SIZE, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK);
context->stack_start = (guchar*)mono_valloc_aligned (INTERP_STACK_SIZE, MINT_STACK_ALIGNMENT, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK);
context->stack_end = context->stack_start + INTERP_STACK_SIZE - INTERP_REDZONE_SIZE;
context->stack_real_end = context->stack_start + INTERP_STACK_SIZE;
/* We reserve a stack slot at the top of the interp stack to make temp objects visible to GC */
context->stack_pointer = context->stack_start + MINT_STACK_SLOT_SIZE;
context->stack_pointer = context->stack_start + MINT_STACK_ALIGNMENT;

frame_data_allocator_init (&context->data_stack, 8192);
/* Make sure all data is initialized before publishing the context */
Expand Down Expand Up @@ -2230,6 +2230,7 @@ interp_entry (InterpEntryData *data)
sp_args = STACK_ADD_BYTES (sp_args, size);
}
}
sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT);

InterpFrame frame = {0};
frame.imethod = data->rmethod;
Expand Down Expand Up @@ -2604,7 +2605,7 @@ init_jit_call_info (InterpMethod *rmethod, MonoError *error)
* that could end up doing a jit call.
*/
gint32 size = mono_class_value_size (klass, NULL);
cinfo->res_size = ALIGN_TO (size, MINT_VT_ALIGNMENT);
cinfo->res_size = ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
} else {
cinfo->res_size = MINT_STACK_SLOT_SIZE;
}
Expand Down Expand Up @@ -3107,6 +3108,7 @@ interp_entry_from_trampoline (gpointer ccontext_untyped, gpointer rmethod_untype
}
newsp = STACK_ADD_BYTES (newsp, size);
}
newsp = (stackval*)ALIGN_TO (newsp, MINT_STACK_ALIGNMENT);
context->stack_pointer = (guchar*)newsp;
g_assert (context->stack_pointer < context->stack_end);

Expand Down Expand Up @@ -3969,6 +3971,15 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
ip = frame->imethod->code;
MINT_IN_BREAK;
}
MINT_IN_CASE(MINT_CALL_ALIGN_STACK) {
int call_offset = ip [1];
int aligned_call_offset = call_offset + MINT_STACK_SLOT_SIZE;
int params_stack_size = ip [2];

memmove (locals + aligned_call_offset, locals + call_offset, params_stack_size);
ip += 3;
MINT_IN_BREAK;
}
MINT_IN_CASE(MINT_CALL_DELEGATE) {
// FIXME We don't need to encode the whole signature, just param_count
MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [4]];
Expand Down Expand Up @@ -4219,6 +4230,8 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
}

context->stack_pointer = (guchar*)frame->stack + cmethod->alloca_size;
g_assert_checked (((gsize)context->stack_pointer % MINT_STACK_ALIGNMENT) == 0);

if (G_UNLIKELY (context->stack_pointer >= context->stack_end)) {
context->stack_end = context->stack_real_end;
THROW_EX (mono_domain_get ()->stack_overflow_ex, ip);
Expand Down Expand Up @@ -5596,10 +5609,12 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
return_offset = ip [1];
call_args_offset = ip [1];
int aligned_call_args_offset = ALIGN_TO (call_args_offset, MINT_STACK_ALIGNMENT);

int param_size = ip [3];
if (param_size)
memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
memmove (locals + aligned_call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
call_args_offset = aligned_call_args_offset;
LOCAL_VAR (call_args_offset, gpointer) = NULL;
ip += 4;
goto call;
Expand Down Expand Up @@ -5715,19 +5730,21 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
gboolean is_vt = ret_size != 0;
if (!is_vt)
ret_size = MINT_STACK_SLOT_SIZE;
return_offset = call_args_offset;

cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];

MonoClass *newobj_class = cmethod->method->klass;

call_args_offset = ALIGN_TO (call_args_offset + ret_size, MINT_STACK_ALIGNMENT);
// We allocate space on the stack for return value and for this pointer, that is passed to ctor
// Here we use return_offset as meaning original call_args_offset
if (param_size)
memmove (locals + call_args_offset + ret_size + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + return_offset, param_size);

if (is_vt) {
this_ptr = locals + call_args_offset;
this_ptr = locals + return_offset;
memset (this_ptr, 0, ret_size);
call_args_offset += ret_size;
} else {
// FIXME push/pop LMF
MonoVTable *vtable = mono_class_vtable_checked (newobj_class, error);
Expand All @@ -5739,11 +5756,9 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
error_init_reuse (error);
this_ptr = mono_object_new_checked (newobj_class, error);
mono_interp_error_cleanup (error); // FIXME: do not swallow the error
LOCAL_VAR (call_args_offset, gpointer) = this_ptr; // return value
call_args_offset += MINT_STACK_SLOT_SIZE;
LOCAL_VAR (return_offset, gpointer) = this_ptr; // return value
}
LOCAL_VAR (call_args_offset, gpointer) = this_ptr;
return_offset = call_args_offset; // unused, prevent warning
ip += 5;
goto call;
}
Expand Down Expand Up @@ -7344,7 +7359,8 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
int len = LOCAL_VAR (ip [2], gint32);
gpointer mem;
if (len > 0) {
mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
// We align len to 8 so we can safely load all primitive types on all platforms
mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64)));

if (frame->imethod->init_locals)
memset (mem, 0, len);
Expand Down Expand Up @@ -7944,6 +7960,7 @@ interp_run_clause_with_il_state (gpointer il_state_ptr, int clause_index, MonoOb
}
findex ++;
}
sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT);

/* Allocate frame */
InterpFrame frame = {0};
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/jiterpreter.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ mono_jiterp_localloc (gpointer *destination, gint32 len, InterpFrame *frame)
ThreadContext *context = mono_jiterp_get_context();
gpointer mem;
if (len > 0) {
mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64)));

if (frame->imethod->init_locals)
memset (mem, 0, len);
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,8 @@ OPDEF(MINT_STRLEN, "strlen", 3, 1, 1, MintOpNoArgs)
OPDEF(MINT_ARRAY_RANK, "array_rank", 3, 1, 1, MintOpNoArgs)
OPDEF(MINT_ARRAY_ELEMENT_SIZE, "array_element_size", 3, 1, 1, MintOpNoArgs)

OPDEF(MINT_CALL_ALIGN_STACK, "call_align_stack", 3, 1, 0, MintOpShortInt)

/* Calls */
OPDEF(MINT_CALL, "call", 4, 1, 1, MintOpMethodToken)
OPDEF(MINT_CALLVIRT_FAST, "callvirt.fast", 5, 1, 1, MintOpMethodToken)
Expand Down
49 changes: 36 additions & 13 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,11 @@ interp_last_ins (InterpBasicBlock *bb)
return ret; \
} while (0)

// We want to allow any block of stack slots to get moved in order for them to be aligned to MINT_STACK_ALIGNMENT
#define ENSURE_STACK_SIZE(td, size) \
do { \
if ((size) > td->max_stack_size) \
td->max_stack_size = size; \
if ((size) >= td->max_stack_size) \
td->max_stack_size = ALIGN_TO (size + MINT_STACK_ALIGNMENT - MINT_STACK_SLOT_SIZE, MINT_STACK_ALIGNMENT); \
} while (0)

#define ENSURE_I4(td, sp_off) \
Expand Down Expand Up @@ -3229,7 +3230,6 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
MonoMethodSignature *csignature;
int is_virtual = *td->ip == CEE_CALLVIRT;
int calli = *td->ip == CEE_CALLI || *td->ip == CEE_MONO_CALLI_EXTRA_ARG;
guint32 res_size = 0;
int op = -1;
int native = 0;
int need_null_check = is_virtual;
Expand Down Expand Up @@ -3520,19 +3520,18 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
MonoClass *klass = mono_class_from_mono_type_internal (csignature->ret);

if (mt == MINT_TYPE_VT) {
guint32 res_size;
if (csignature->pinvoke && !csignature->marshalling_disabled && method->wrapper_type != MONO_WRAPPER_NONE)
res_size = mono_class_native_size (klass, NULL);
else
res_size = mono_class_value_size (klass, NULL);
push_type_vt (td, klass, res_size);
res_size = ALIGN_TO (res_size, MINT_VT_ALIGNMENT);
if (mono_class_has_failure (klass)) {
mono_error_set_for_class_failure (error, klass);
return FALSE;
}
} else {
push_type (td, stack_type[mt], klass);
res_size = MINT_STACK_SLOT_SIZE;
}
dreg = td->sp [-1].local;
} else {
Expand Down Expand Up @@ -3658,7 +3657,23 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
td->last_ins->flags |= INTERP_INST_FLAG_CALL;
}
td->ip += 5;
td->last_ins->info.call_args = call_args;
if (td->last_ins->flags & INTERP_INST_FLAG_CALL) {
td->last_ins->info.call_args = call_args;
if (!td->optimized) {
int call_dreg = td->last_ins->dreg;
int call_offset = td->locals [call_dreg].stack_offset;
if ((call_offset % MINT_STACK_ALIGNMENT) != 0) {
InterpInst *align_ins = interp_insert_ins_bb (td, td->cbb, interp_prev_ins (td->last_ins), MINT_CALL_ALIGN_STACK);
interp_ins_set_dreg (align_ins, call_dreg);
align_ins->data [0] = params_stack_size;
if (calli) {
// fp_sreg is at the top of the stack, make sure it is not overwritten by MINT_CALL_ALIGN_STACK
int offset = ALIGN_TO (call_offset, MINT_STACK_ALIGNMENT) - call_offset;
td->locals [fp_sreg].stack_offset += offset;
}
}
}
}

return TRUE;
}
Expand Down Expand Up @@ -4067,8 +4082,6 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
td->locals_capacity = td->locals_size;
offset = 0;

g_assert (MINT_STACK_SLOT_SIZE == MINT_VT_ALIGNMENT);

/*
* We will load arguments as if they are locals. Unlike normal locals, every argument
* is stored in a stackval sized slot and valuetypes have special semantics since we
Expand Down Expand Up @@ -4096,6 +4109,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
offset += MINT_STACK_SLOT_SIZE;
}
}
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

td->il_locals_offset = offset;
for (int i = 0; i < num_il_locals; ++i) {
Expand Down Expand Up @@ -4123,7 +4137,8 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
// Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals
offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
}
offset = ALIGN_TO (offset, MINT_VT_ALIGNMENT);
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

td->il_locals_size = offset - td->il_locals_offset;
td->total_locals_size = offset;

Expand Down Expand Up @@ -6238,8 +6253,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
MonoClass *field_klass = mono_class_from_mono_type_internal (ftype);
mt = mint_type (ftype);
int field_size = mono_class_value_size (field_klass, NULL);
int obj_size = mono_class_value_size (klass, NULL);
obj_size = ALIGN_TO (obj_size, MINT_VT_ALIGNMENT);

{
if (is_static) {
Expand Down Expand Up @@ -7998,6 +8011,9 @@ compute_native_offset_estimates (TransformData *td)
foreach_local_var (td, ins, NULL, alloc_unopt_global_local);
}
}

if (!td->optimized)
td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT);
return noe;
}

Expand Down Expand Up @@ -8241,8 +8257,11 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
// same offset. Use the dreg offset so we don't need to rely on existing call_args.
if (td->optimized)
offset = get_local_offset (td, ins->info.call_args [0]);
else
else if (opcode == MINT_NEWOBJ_ARRAY || opcode == MINT_LDELEMA_TC || opcode == MINT_LDELEMA)
// no alignment required since this is not a real call
offset = get_local_offset (td, ins->dreg);
else
offset = ALIGN_TO (get_local_offset (td, ins->dreg), MINT_STACK_ALIGNMENT);
*ip++ = GINT_TO_UINT16 (offset);
} else {
*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i]));
Expand Down Expand Up @@ -9997,6 +10016,7 @@ initialize_global_vars (TransformData *td)
foreach_local_var (td, ins, (gpointer)(gsize)bb->index, initialize_global_var_cb);
}
}
td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT);
}

// Data structure used for offset allocation of call args
Expand Down Expand Up @@ -10043,6 +10063,7 @@ get_call_param_size (TransformData *td, InterpInst *call)
call_args++;
var = *call_args;
}
param_size = ALIGN_TO (param_size, MINT_STACK_ALIGNMENT);
return param_size;
}

Expand Down Expand Up @@ -10364,6 +10385,7 @@ interp_alloc_offsets (TransformData *td)
ins_index++;
}
}
final_total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT);

// Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them)
// then also update td->total_locals_size to account for this space.
Expand All @@ -10375,7 +10397,7 @@ interp_alloc_offsets (TransformData *td)
final_total_locals_size = MAX (td->locals [i].offset + td->locals [i].size, final_total_locals_size);
}
}
td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_SLOT_SIZE);
td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT);
}

/*
Expand Down Expand Up @@ -10587,6 +10609,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
// When unoptimized, the param area is stored in the same order, within the IL execution stack.
g_assert (!td->optimized || !td->max_stack_size);
rtm->alloca_size = td->total_locals_size + td->max_stack_size;
g_assert ((rtm->alloca_size % MINT_STACK_ALIGNMENT) == 0);
rtm->locals_size = td->optimized ? td->param_area_offset : td->total_locals_size;
rtm->data_items = (gpointer*)mono_mem_manager_alloc0 (td->mem_manager, td->n_data_items * sizeof (td->data_items [0]));
memcpy (rtm->data_items, td->data_items, td->n_data_items * sizeof (td->data_items [0]));
Expand Down