From 157bdaac832f7b8a9ae6458aaff37823045e79db Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sat, 14 Jan 2023 12:14:45 +0200 Subject: [PATCH 1/3] [mono][interp] Remove MINT_VT_ALIGNMENT It serves no purpose nowadays. All vars are aligned to MINT_STACK_SLOT_SIZE. --- src/mono/mono/mini/interp/interp-internals.h | 1 - src/mono/mono/mini/interp/interp.c | 5 +++-- src/mono/mono/mini/interp/jiterpreter.c | 2 +- src/mono/mono/mini/interp/transform.c | 9 +-------- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 6d9bbb5586ff5..a079b2b76f69a 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -26,7 +26,6 @@ #define TRACING_FLAG 0x1 #define PROFILING_FLAG 0x2 -#define MINT_VT_ALIGNMENT 8 #define MINT_STACK_SLOT_SIZE (sizeof (stackval)) #define INTERP_STACK_SIZE (1024*1024) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 5b7214cb85b81..b5e2daf1a082a 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -2604,7 +2604,7 @@ init_jit_call_info (InterpMethod *rmethod, MonoError *error) * that could end up doing a jit call. */ gint32 size = mono_class_value_size (klass, NULL); - cinfo->res_size = ALIGN_TO (size, MINT_VT_ALIGNMENT); + cinfo->res_size = ALIGN_TO (size, MINT_STACK_SLOT_SIZE); } else { cinfo->res_size = MINT_STACK_SLOT_SIZE; } @@ -7344,7 +7344,8 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; int len = LOCAL_VAR (ip [2], gint32); gpointer mem; if (len > 0) { - mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT)); + // We align len to 8 so we can safely load all primitive types on all platforms + mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64))); if (frame->imethod->init_locals) memset (mem, 0, len); diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 6c6e94852a010..d1e60f91517b5 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -346,7 +346,7 @@ mono_jiterp_localloc (gpointer *destination, gint32 len, InterpFrame *frame) ThreadContext *context = mono_jiterp_get_context(); gpointer mem; if (len > 0) { - mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT)); + mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64))); if (frame->imethod->init_locals) memset (mem, 0, len); diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 76fac6389eb50..dd65e7d9c405e 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -3229,7 +3229,6 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target MonoMethodSignature *csignature; int is_virtual = *td->ip == CEE_CALLVIRT; int calli = *td->ip == CEE_CALLI || *td->ip == CEE_MONO_CALLI_EXTRA_ARG; - guint32 res_size = 0; int op = -1; int native = 0; int need_null_check = is_virtual; @@ -3520,19 +3519,18 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target MonoClass *klass = mono_class_from_mono_type_internal (csignature->ret); if (mt == MINT_TYPE_VT) { + guint32 res_size; if (csignature->pinvoke && !csignature->marshalling_disabled && method->wrapper_type != MONO_WRAPPER_NONE) res_size = mono_class_native_size (klass, NULL); else res_size = mono_class_value_size (klass, NULL); push_type_vt (td, klass, res_size); - res_size = ALIGN_TO (res_size, MINT_VT_ALIGNMENT); if (mono_class_has_failure (klass)) { mono_error_set_for_class_failure (error, klass); return FALSE; } } else { push_type (td, stack_type[mt], klass); - res_size = MINT_STACK_SLOT_SIZE; } dreg = td->sp [-1].local; } else { @@ -4067,8 +4065,6 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet td->locals_capacity = td->locals_size; offset = 0; - g_assert (MINT_STACK_SLOT_SIZE == MINT_VT_ALIGNMENT); - /* * We will load arguments as if they are locals. Unlike normal locals, every argument * is stored in a stackval sized slot and valuetypes have special semantics since we @@ -4123,7 +4119,6 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet // Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE); } - offset = ALIGN_TO (offset, MINT_VT_ALIGNMENT); td->il_locals_size = offset - td->il_locals_offset; td->total_locals_size = offset; @@ -6238,8 +6233,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, MonoClass *field_klass = mono_class_from_mono_type_internal (ftype); mt = mint_type (ftype); int field_size = mono_class_value_size (field_klass, NULL); - int obj_size = mono_class_value_size (klass, NULL); - obj_size = ALIGN_TO (obj_size, MINT_VT_ALIGNMENT); { if (is_static) { From 5c63ab0638210d913648b940ce4abab4e86f731e Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sat, 14 Jan 2023 13:58:10 +0200 Subject: [PATCH 2/3] [mono][interp] Add 16 byte default alignment to optimized code This will enable us to control alignment of Vector128 vars at compile time. --- src/mono/mono/mini/interp/interp-internals.h | 2 ++ src/mono/mono/mini/interp/interp.c | 9 +++++++-- src/mono/mono/mini/interp/transform.c | 9 ++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index a079b2b76f69a..8657c964b4693 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -27,6 +27,8 @@ #define PROFILING_FLAG 0x2 #define MINT_STACK_SLOT_SIZE (sizeof (stackval)) +// This alignment provides us with straight forward support for Vector128 +#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE) #define INTERP_STACK_SIZE (1024*1024) #define INTERP_REDZONE_SIZE (8*1024) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index b5e2daf1a082a..24ca3aac1c6ef 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -403,11 +403,11 @@ get_context (void) ThreadContext *context = (ThreadContext *) mono_native_tls_get_value (thread_context_id); if (context == NULL) { context = g_new0 (ThreadContext, 1); - context->stack_start = (guchar*)mono_valloc (0, INTERP_STACK_SIZE, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK); + context->stack_start = (guchar*)mono_valloc_aligned (INTERP_STACK_SIZE, MINT_STACK_ALIGNMENT, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK); context->stack_end = context->stack_start + INTERP_STACK_SIZE - INTERP_REDZONE_SIZE; context->stack_real_end = context->stack_start + INTERP_STACK_SIZE; /* We reserve a stack slot at the top of the interp stack to make temp objects visible to GC */ - context->stack_pointer = context->stack_start + MINT_STACK_SLOT_SIZE; + context->stack_pointer = context->stack_start + MINT_STACK_ALIGNMENT; frame_data_allocator_init (&context->data_stack, 8192); /* Make sure all data is initialized before publishing the context */ @@ -2230,6 +2230,7 @@ interp_entry (InterpEntryData *data) sp_args = STACK_ADD_BYTES (sp_args, size); } } + sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT); InterpFrame frame = {0}; frame.imethod = data->rmethod; @@ -3107,6 +3108,7 @@ interp_entry_from_trampoline (gpointer ccontext_untyped, gpointer rmethod_untype } newsp = STACK_ADD_BYTES (newsp, size); } + newsp = (stackval*)ALIGN_TO (newsp, MINT_STACK_ALIGNMENT); context->stack_pointer = (guchar*)newsp; g_assert (context->stack_pointer < context->stack_end); @@ -4219,6 +4221,8 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause } context->stack_pointer = (guchar*)frame->stack + cmethod->alloca_size; + g_assert_checked (((gsize)context->stack_pointer % MINT_STACK_ALIGNMENT) == 0); + if (G_UNLIKELY (context->stack_pointer >= context->stack_end)) { context->stack_end = context->stack_real_end; THROW_EX (mono_domain_get ()->stack_overflow_ex, ip); @@ -7945,6 +7949,7 @@ interp_run_clause_with_il_state (gpointer il_state_ptr, int clause_index, MonoOb } findex ++; } + sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT); /* Allocate frame */ InterpFrame frame = {0}; diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index dd65e7d9c405e..689ea5bade273 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -4092,6 +4092,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet offset += MINT_STACK_SLOT_SIZE; } } + offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT); td->il_locals_offset = offset; for (int i = 0; i < num_il_locals; ++i) { @@ -4119,6 +4120,8 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet // Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE); } + offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT); + td->il_locals_size = offset - td->il_locals_offset; td->total_locals_size = offset; @@ -9990,6 +9993,7 @@ initialize_global_vars (TransformData *td) foreach_local_var (td, ins, (gpointer)(gsize)bb->index, initialize_global_var_cb); } } + td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT); } // Data structure used for offset allocation of call args @@ -10036,6 +10040,7 @@ get_call_param_size (TransformData *td, InterpInst *call) call_args++; var = *call_args; } + param_size = ALIGN_TO (param_size, MINT_STACK_ALIGNMENT); return param_size; } @@ -10357,6 +10362,7 @@ interp_alloc_offsets (TransformData *td) ins_index++; } } + final_total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them) // then also update td->total_locals_size to account for this space. @@ -10368,7 +10374,7 @@ interp_alloc_offsets (TransformData *td) final_total_locals_size = MAX (td->locals [i].offset + td->locals [i].size, final_total_locals_size); } } - td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_SLOT_SIZE); + td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); } /* @@ -10580,6 +10586,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG // When unoptimized, the param area is stored in the same order, within the IL execution stack. g_assert (!td->optimized || !td->max_stack_size); rtm->alloca_size = td->total_locals_size + td->max_stack_size; + g_assert ((rtm->alloca_size % MINT_STACK_ALIGNMENT) == 0); rtm->locals_size = td->optimized ? td->param_area_offset : td->total_locals_size; rtm->data_items = (gpointer*)mono_mem_manager_alloc0 (td->mem_manager, td->n_data_items * sizeof (td->data_items [0])); memcpy (rtm->data_items, td->data_items, td->n_data_items * sizeof (td->data_items [0])); From 6f57e7dc7f5ce0877e8b558f220075be7b425421 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 16 Jan 2023 17:31:01 +0200 Subject: [PATCH 3/3] [mono][interp] Add stack alignment for unoptimized code For normal calls, we introduce a new opcode before the call that will move all the arguments to aligned stack location. When emitting the code for the call, we emit directly the aligned call args offset instead. Unoptimized code has its own opcodes that do moving of param, we tweak them to copy them into aligned location. --- src/mono/mono/mini/interp/interp.c | 25 +++++++++++++++------ src/mono/mono/mini/interp/mintops.def | 2 ++ src/mono/mono/mini/interp/transform.c | 31 +++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 24ca3aac1c6ef..a53d6e6067186 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -3971,6 +3971,15 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause ip = frame->imethod->code; MINT_IN_BREAK; } + MINT_IN_CASE(MINT_CALL_ALIGN_STACK) { + int call_offset = ip [1]; + int aligned_call_offset = call_offset + MINT_STACK_SLOT_SIZE; + int params_stack_size = ip [2]; + + memmove (locals + aligned_call_offset, locals + call_offset, params_stack_size); + ip += 3; + MINT_IN_BREAK; + } MINT_IN_CASE(MINT_CALL_DELEGATE) { // FIXME We don't need to encode the whole signature, just param_count MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [4]]; @@ -5600,10 +5609,12 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]]; return_offset = ip [1]; call_args_offset = ip [1]; + int aligned_call_args_offset = ALIGN_TO (call_args_offset, MINT_STACK_ALIGNMENT); int param_size = ip [3]; if (param_size) - memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + memmove (locals + aligned_call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + call_args_offset = aligned_call_args_offset; LOCAL_VAR (call_args_offset, gpointer) = NULL; ip += 4; goto call; @@ -5719,19 +5730,21 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; gboolean is_vt = ret_size != 0; if (!is_vt) ret_size = MINT_STACK_SLOT_SIZE; + return_offset = call_args_offset; cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]]; MonoClass *newobj_class = cmethod->method->klass; + call_args_offset = ALIGN_TO (call_args_offset + ret_size, MINT_STACK_ALIGNMENT); // We allocate space on the stack for return value and for this pointer, that is passed to ctor + // Here we use return_offset as meaning original call_args_offset if (param_size) - memmove (locals + call_args_offset + ret_size + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + return_offset, param_size); if (is_vt) { - this_ptr = locals + call_args_offset; + this_ptr = locals + return_offset; memset (this_ptr, 0, ret_size); - call_args_offset += ret_size; } else { // FIXME push/pop LMF MonoVTable *vtable = mono_class_vtable_checked (newobj_class, error); @@ -5743,11 +5756,9 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; error_init_reuse (error); this_ptr = mono_object_new_checked (newobj_class, error); mono_interp_error_cleanup (error); // FIXME: do not swallow the error - LOCAL_VAR (call_args_offset, gpointer) = this_ptr; // return value - call_args_offset += MINT_STACK_SLOT_SIZE; + LOCAL_VAR (return_offset, gpointer) = this_ptr; // return value } LOCAL_VAR (call_args_offset, gpointer) = this_ptr; - return_offset = call_args_offset; // unused, prevent warning ip += 5; goto call; } diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 444ceb417b59d..0fc232f837b32 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -682,6 +682,8 @@ OPDEF(MINT_STRLEN, "strlen", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_ARRAY_RANK, "array_rank", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_ARRAY_ELEMENT_SIZE, "array_element_size", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_CALL_ALIGN_STACK, "call_align_stack", 3, 1, 0, MintOpShortInt) + /* Calls */ OPDEF(MINT_CALL, "call", 4, 1, 1, MintOpMethodToken) OPDEF(MINT_CALLVIRT_FAST, "callvirt.fast", 5, 1, 1, MintOpMethodToken) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 689ea5bade273..b1bc1499f52c5 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -348,10 +348,11 @@ interp_last_ins (InterpBasicBlock *bb) return ret; \ } while (0) +// We want to allow any block of stack slots to get moved in order for them to be aligned to MINT_STACK_ALIGNMENT #define ENSURE_STACK_SIZE(td, size) \ do { \ - if ((size) > td->max_stack_size) \ - td->max_stack_size = size; \ + if ((size) >= td->max_stack_size) \ + td->max_stack_size = ALIGN_TO (size + MINT_STACK_ALIGNMENT - MINT_STACK_SLOT_SIZE, MINT_STACK_ALIGNMENT); \ } while (0) #define ENSURE_I4(td, sp_off) \ @@ -3656,7 +3657,23 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target td->last_ins->flags |= INTERP_INST_FLAG_CALL; } td->ip += 5; - td->last_ins->info.call_args = call_args; + if (td->last_ins->flags & INTERP_INST_FLAG_CALL) { + td->last_ins->info.call_args = call_args; + if (!td->optimized) { + int call_dreg = td->last_ins->dreg; + int call_offset = td->locals [call_dreg].stack_offset; + if ((call_offset % MINT_STACK_ALIGNMENT) != 0) { + InterpInst *align_ins = interp_insert_ins_bb (td, td->cbb, interp_prev_ins (td->last_ins), MINT_CALL_ALIGN_STACK); + interp_ins_set_dreg (align_ins, call_dreg); + align_ins->data [0] = params_stack_size; + if (calli) { + // fp_sreg is at the top of the stack, make sure it is not overwritten by MINT_CALL_ALIGN_STACK + int offset = ALIGN_TO (call_offset, MINT_STACK_ALIGNMENT) - call_offset; + td->locals [fp_sreg].stack_offset += offset; + } + } + } + } return TRUE; } @@ -7994,6 +8011,9 @@ compute_native_offset_estimates (TransformData *td) foreach_local_var (td, ins, NULL, alloc_unopt_global_local); } } + + if (!td->optimized) + td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT); return noe; } @@ -8237,8 +8257,11 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in // same offset. Use the dreg offset so we don't need to rely on existing call_args. if (td->optimized) offset = get_local_offset (td, ins->info.call_args [0]); - else + else if (opcode == MINT_NEWOBJ_ARRAY || opcode == MINT_LDELEMA_TC || opcode == MINT_LDELEMA) + // no alignment required since this is not a real call offset = get_local_offset (td, ins->dreg); + else + offset = ALIGN_TO (get_local_offset (td, ins->dreg), MINT_STACK_ALIGNMENT); *ip++ = GINT_TO_UINT16 (offset); } else { *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i]));