From ef8e3f744fd9d36d92524243242b27c0bb3888d3 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 31 Aug 2023 12:40:20 +0200 Subject: [PATCH 01/10] Moving towards DivRem intrinsic. --- .../Runtime/Intrinsics/X86/X86Base.Mono.cs | 22 +++++++++++++++++++ src/mono/mono/mini/cpu-amd64.mdesc | 2 ++ src/mono/mono/mini/mini-amd64.c | 2 ++ src/mono/mono/mini/mini-ops.h | 5 +++++ src/mono/mono/mini/simd-intrinsics.c | 16 ++++++++++++++ src/mono/mono/mini/simd-methods.h | 1 + 6 files changed, 48 insertions(+) diff --git a/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs index f29203516cc374..a78eb66ef77dc3 100644 --- a/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs +++ b/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs @@ -9,5 +9,27 @@ public abstract partial class X86Base { [MethodImplAttribute(MethodImplOptions.InternalCall)] private static extern unsafe void __cpuidex(int* cpuInfo, int functionId, int subFunctionId); + + [Intrinsic] + private static void DivRemInternal(long lower, long upper, long divisor, out long quotient, out long remainder) => + DivRemInternal (lower, upper, divisor, out quotient, out remainder); + + [Intrinsic] + private static void DivRemInternal(ulong lower, ulong upper, ulong divisor, out ulong quotient, out ulong remainder) => + DivRemInternal (lower, upper, divisor, out quotient, out remainder); + + [MethodImpl(AggressiveInlining)] + (long quotient, long remainder) DivRem (long lower, long upper, long divisor) + { + DivRemInternal(lower, upper, divisor, out long quotient, out long remainder); + return (quotient, remainder); + } + + [MethodImpl(AggressiveInlining)] + (ulong quotient, ulong remainder) DivRem (ulong lower, ulong upper, ulong divisor) + { + DivRemInternal(lower, upper, divisor, out ulong quotient, out ulong remainder); + return (quotient, remainder); + } } } diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index 06321f83169e20..814ba28164afb6 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -98,6 +98,8 @@ long_div: dest:a src1:a src2:i len:16 clob:d long_div_un: dest:a src1:a src2:i len:16 clob:d long_rem: dest:d src1:a src2:i len:16 clob:a long_rem_un: dest:d src1:a src2:i len:16 clob:a +long_divrem: dest:a src1:a src2:d src3:i len:16 clob:d +long_divrem_un: dest:a src1:a src2:d src3:i len:16 clob:d long_and: dest:i src1:i src2:i len:3 clob:1 long_or: dest:i src1:i src2:i len:3 clob:1 long_xor: dest:i src1:i src2:i len:3 clob:1 diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index 1a2f9fff59d34e..854e8f172c4722 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -5083,6 +5083,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } case OP_LDIV: case OP_LREM: + case OP_X86_LDIVREM: /* Regalloc magic makes the div/rem cases the same */ if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); @@ -5095,6 +5096,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_LDIV_UN: case OP_LREM_UN: + case OP_X86_LDIVREMU: if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX); diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index d95f0948073402..a2dca4c89ba923 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1184,6 +1184,11 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG) #endif +#ifdef defined(TARGET_X86) || defined(TARGET_AMD64) +MINI_OP3(OP_X86_LDIVREM, "long_divrem", LREG, LREG, LREG, LREG) +MINI_OP3(OP_X86_LDIVREMU, "long_divrem_un", LREG, LREG, LREG, LREG) +#endif + MINI_OP(OP_CREATE_SCALAR_UNSAFE, "create_scalar_unsafe", XREG, XREG, NONE) MINI_OP(OP_CREATE_SCALAR, "create_scalar", XREG, XREG, NONE) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 1b6fe4d2447b5f..a2d7188cbf56d2 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -4599,6 +4599,7 @@ static SimdIntrinsic bmi2_methods [] = { static SimdIntrinsic x86base_methods [] = { {SN_BitScanForward}, {SN_BitScanReverse}, + {SN_DivRemInternal}, {SN_Pause, OP_XOP, INTRINS_SSE_PAUSE}, {SN_get_IsSupported} }; @@ -5246,6 +5247,21 @@ emit_x86_intrinsics ( ins->type = is_64bit ? STACK_I8 : STACK_I4; MONO_ADD_INS (cfg->cbb, ins); return ins; + case SN_DivRemInternal: + if (type_enum_is_unsigned (arg0_type)) { + MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_LDIVREMU : 0); + } else { + MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_LDIVREM : 0); + } + ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); + ins->sreg1 = args [0]->dreg; + ins->sreg2 = args [1]->dreg; + ins->sreg3 = args [2]->dreg; + ins->type = is_64bit ? STACK_I8 : STACK_I4; + MONO_ADD_INS (cfg->cbb, ins); + + // TODO: copy to args[3], args[4] + return ins; default: g_assert_not_reached (); } diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h index 6f237c145c57e4..e8f3074195501d 100644 --- a/src/mono/mono/mini/simd-methods.h +++ b/src/mono/mono/mini/simd-methods.h @@ -293,6 +293,7 @@ METHOD(ComputeCrc32C) // X86Base METHOD(BitScanForward) METHOD(BitScanReverse) +METHOD(DivRem) METHOD(Pause) // Crypto METHOD(FixedRotate) From 9addc9a893429be2b442bca1839ca226030ba001 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 31 Aug 2023 16:27:57 +0200 Subject: [PATCH 02/10] DivRem for x64. --- .../Runtime/Intrinsics/X86/X86Base.Mono.cs | 22 ------- src/mono/mono/mini/cpu-amd64.mdesc | 2 + src/mono/mono/mini/mini-amd64.c | 2 + src/mono/mono/mini/mini-ops.h | 4 +- src/mono/mono/mini/simd-intrinsics.c | 61 +++++++++++++++---- 5 files changed, 57 insertions(+), 34 deletions(-) diff --git a/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs index a78eb66ef77dc3..f29203516cc374 100644 --- a/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs +++ b/src/mono/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.Mono.cs @@ -9,27 +9,5 @@ public abstract partial class X86Base { [MethodImplAttribute(MethodImplOptions.InternalCall)] private static extern unsafe void __cpuidex(int* cpuInfo, int functionId, int subFunctionId); - - [Intrinsic] - private static void DivRemInternal(long lower, long upper, long divisor, out long quotient, out long remainder) => - DivRemInternal (lower, upper, divisor, out quotient, out remainder); - - [Intrinsic] - private static void DivRemInternal(ulong lower, ulong upper, ulong divisor, out ulong quotient, out ulong remainder) => - DivRemInternal (lower, upper, divisor, out quotient, out remainder); - - [MethodImpl(AggressiveInlining)] - (long quotient, long remainder) DivRem (long lower, long upper, long divisor) - { - DivRemInternal(lower, upper, divisor, out long quotient, out long remainder); - return (quotient, remainder); - } - - [MethodImpl(AggressiveInlining)] - (ulong quotient, ulong remainder) DivRem (ulong lower, ulong upper, ulong divisor) - { - DivRemInternal(lower, upper, divisor, out ulong quotient, out ulong remainder); - return (quotient, remainder); - } } } diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index 814ba28164afb6..f1bd38e8941095 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -413,6 +413,8 @@ int_div: dest:a src1:a src2:i clob:d len:32 int_div_un: dest:a src1:a src2:i clob:d len:32 int_rem: dest:d src1:a src2:i clob:a len:32 int_rem_un: dest:d src1:a src2:i clob:a len:32 +int_divrem: dest:a src1:a src2:d src3:i len:32 clob:d +int_divrem_un: dest:a src1:a src2:d src3:i len:32 clob:d int_and: dest:i src1:i src2:i clob:1 len:4 int_or: dest:i src1:i src2:i clob:1 len:4 int_xor: dest:i src1:i src2:i clob:1 len:4 diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index 854e8f172c4722..e6d9393753d2ed 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -5108,6 +5108,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_IDIV: case OP_IREM: + case OP_X86_IDIVREM: if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); amd64_cdq_size (code, 4); @@ -5119,6 +5120,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_IDIV_UN: case OP_IREM_UN: + case OP_X86_IDIVREM_UN: if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX); diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index a2dca4c89ba923..dc6bda3ec7b092 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1184,9 +1184,11 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG) #endif -#ifdef defined(TARGET_X86) || defined(TARGET_AMD64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) MINI_OP3(OP_X86_LDIVREM, "long_divrem", LREG, LREG, LREG, LREG) MINI_OP3(OP_X86_LDIVREMU, "long_divrem_un", LREG, LREG, LREG, LREG) +MINI_OP3(OP_X86_IDIVREM, "int_divrem", IREG, IREG, IREG, IREG) +MINI_OP3(OP_X86_IDIVREMU, "int_divrem_un", IREG, IREG, IREG, IREG) #endif MINI_OP(OP_CREATE_SCALAR_UNSAFE, "create_scalar_unsafe", XREG, XREG, NONE) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a2d7188cbf56d2..d85824d379228a 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -5247,21 +5247,60 @@ emit_x86_intrinsics ( ins->type = is_64bit ? STACK_I8 : STACK_I4; MONO_ADD_INS (cfg->cbb, ins); return ins; - case SN_DivRemInternal: + case SN_DivRemInternal: { + g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs + + const int divtype = is_64bit ? STACK_I8 : STACK_I4; + const int movtype = is_64bit ? OP_LMOVE : OP_MOVE; + const int storetype = is_64bit ? OP_STOREI8_MEMBASE_IMM : OP_STOREI4_MEMBASE_IMM; + + MonoInst* upper_and_remainder; // the register must be backed up, since it will be overwritten by DIV/IDIV + MONO_INST_NEW (cfg, upper_and_remainder, movtype); + upper_and_remainder->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); + upper_and_remainder->sreg1 = args [1]->dreg; + upper_and_remainder->type = divtype; + MONO_ADD_INS (cfg->cbb, upper_and_remainder); + + MonoInst* div; // the division itself upper.remainder:lower / divisor -> quotient, upper.remainder if (type_enum_is_unsigned (arg0_type)) { - MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_LDIVREMU : 0); + MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREMU : OP_X86_IDIVREMU); } else { - MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_LDIVREM : 0); + MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREM : OP_X86_IDIVREM); } - ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); - ins->sreg1 = args [0]->dreg; - ins->sreg2 = args [1]->dreg; - ins->sreg3 = args [2]->dreg; - ins->type = is_64bit ? STACK_I8 : STACK_I4; - MONO_ADD_INS (cfg->cbb, ins); - - // TODO: copy to args[3], args[4] + div->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); + div->sreg1 = args [0]->dreg; + div->sreg2 = upper_and_remainder->dreg; // the contents of this will also be destroyed by DIV/IDIV + div->sreg3 = args [2]->dreg; + div->type = divtype; + MONO_ADD_INS (cfg->cbb, div); + + MonoInst* tuple = mono_compile_create_var (cfg, fsig->ret, OP_LOCAL); + MonoInst* tuple_addr; + EMIT_NEW_TEMPLOADA (cfg, tuple_addr, tuple->inst_c0); + + MonoClassField* field1 = mono_class_get_field_from_name_full (span->klass, "Item1", NULL); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, span_addr->dreg, field1->offset - obj_size, div->dreg); + MonoClassField* field2 = mono_class_get_field_from_name_full (span->klass, "Item2", NULL); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, span_addr->dreg, field2->offset - obj_size, upper_and_remainder->dreg); + EMIT_NEW_TEMPLOAD (cfg, ins, span->inst_c0); return ins; + + /*MonoInst* rem; // moving the remainder to the correct reg + MONO_INST_NEW (cfg, rem, movtype); + rem->dreg = args [4]->dreg; + rem->sreg1 = upper_and_remainder->dreg; + rem->type = divtype; + MONO_ADD_INS (cfg->cbb, rem); + + MonoInst* quo; // moving the quotient to the correct reg + MONO_INST_NEW (cfg, quo, movtype); + quo->dreg = args [3]->dreg; + quo->sreg1 = div->dreg; + quo->type = divtype; + MONO_ADD_INS (cfg->cbb, quo); + + return quo;*/ + } default: g_assert_not_reached (); } From 21ab677b459301d398e347d76da54489f91530c8 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 31 Aug 2023 16:44:36 +0200 Subject: [PATCH 03/10] Typos. --- src/mono/mono/mini/simd-intrinsics.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index d85824d379228a..42862706e9d18f 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -5278,11 +5278,11 @@ emit_x86_intrinsics ( MonoInst* tuple_addr; EMIT_NEW_TEMPLOADA (cfg, tuple_addr, tuple->inst_c0); - MonoClassField* field1 = mono_class_get_field_from_name_full (span->klass, "Item1", NULL); - MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, span_addr->dreg, field1->offset - obj_size, div->dreg); - MonoClassField* field2 = mono_class_get_field_from_name_full (span->klass, "Item2", NULL); - MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, span_addr->dreg, field2->offset - obj_size, upper_and_remainder->dreg); - EMIT_NEW_TEMPLOAD (cfg, ins, span->inst_c0); + MonoClassField* field1 = mono_class_get_field_from_name_full (tuple->klass, "Item1", NULL); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field1->offset - obj_size, div->dreg); + MonoClassField* field2 = mono_class_get_field_from_name_full (tuple->klass, "Item2", NULL); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field2->offset - obj_size, upper_and_remainder->dreg); + EMIT_NEW_TEMPLOAD (cfg, ins, tuple->inst_c0); return ins; /*MonoInst* rem; // moving the remainder to the correct reg From ed989dc1e4cbfb7e920d7cc3ca5f3bf2881a6e80 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 31 Aug 2023 16:55:02 +0200 Subject: [PATCH 04/10] Fixed intrinsic name. --- src/mono/mono/mini/simd-intrinsics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 42862706e9d18f..61d4883a3389a2 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -5247,7 +5247,7 @@ emit_x86_intrinsics ( ins->type = is_64bit ? STACK_I8 : STACK_I4; MONO_ADD_INS (cfg->cbb, ins); return ins; - case SN_DivRemInternal: { + case SN_DivRem: { g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs const int divtype = is_64bit ? STACK_I8 : STACK_I4; From 0b87526c8def866d0deb5ff40fe4f338394e065d Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 31 Aug 2023 17:04:20 +0200 Subject: [PATCH 05/10] Fix. --- src/mono/mono/mini/simd-intrinsics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 61d4883a3389a2..a62c277181d6f0 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -4599,7 +4599,7 @@ static SimdIntrinsic bmi2_methods [] = { static SimdIntrinsic x86base_methods [] = { {SN_BitScanForward}, {SN_BitScanReverse}, - {SN_DivRemInternal}, + {SN_DivRem}, {SN_Pause, OP_XOP, INTRINS_SSE_PAUSE}, {SN_get_IsSupported} }; From 0a51a05ab72001d67a11ba5568d818ffb43b123e Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Tue, 5 Sep 2023 14:50:30 +0200 Subject: [PATCH 06/10] x86Base intrinsics in mini and LLVM. --- src/mono/mono/arch/amd64/amd64-codegen.h | 3 ++ src/mono/mono/arch/x86/x86-codegen.h | 3 ++ src/mono/mono/mini/cpu-amd64.mdesc | 14 ++++-- src/mono/mono/mini/cpu-x86.mdesc | 7 +++ src/mono/mono/mini/mini-amd64.c | 38 +++++++++++++-- src/mono/mono/mini/mini-llvm.c | 39 ++++++++++++++++ src/mono/mono/mini/mini-ops.h | 2 + src/mono/mono/mini/mini-x86.c | 19 ++++++++ src/mono/mono/mini/simd-intrinsics.c | 59 ++++++++++-------------- 9 files changed, 141 insertions(+), 43 deletions(-) diff --git a/src/mono/mono/arch/amd64/amd64-codegen.h b/src/mono/mono/arch/amd64/amd64-codegen.h index 304ff3cb743776..3d4d3845b3df32 100644 --- a/src/mono/mono/arch/amd64/amd64-codegen.h +++ b/src/mono/mono/arch/amd64/amd64-codegen.h @@ -1206,6 +1206,9 @@ typedef union { #define amd64_movsb_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsb(inst); amd64_codegen_post(inst); } while (0) #define amd64_movsl_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsl(inst); amd64_codegen_post(inst); } while (0) #define amd64_movsd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsd(inst); amd64_codegen_post(inst); } while (0) +#define amd64_bsf_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_bsf ((inst),(dreg)&0x7,(reg)&0x7); amd64_codegen_post (inst); } while (0) +#define amd64_bsr_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_bsr ((inst),(dreg)&0x7,(reg)&0x7); amd64_codegen_post (inst); } while (0) + #define amd64_prefix_size(inst,p,size) do { x86_prefix((inst), p); } while (0) #define amd64_rdtsc_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_rdtsc(inst); amd64_codegen_post(inst); } while (0) #define amd64_cmpxchg_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmpxchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0) diff --git a/src/mono/mono/arch/x86/x86-codegen.h b/src/mono/mono/arch/x86/x86-codegen.h index aca2b659ca058b..69372e89724799 100644 --- a/src/mono/mono/arch/x86/x86-codegen.h +++ b/src/mono/mono/arch/x86/x86-codegen.h @@ -1963,6 +1963,9 @@ mono_x86_patch_inline (guchar* code, gpointer target) #define x86_leave(inst) do { x86_byte (inst, 0xc9); } while (0) #define x86_sahf(inst) do { x86_byte (inst, 0x9e); } while (0) +#define x86_bsf(inst,dreg,reg) do { x86_byte (inst, 0x0f); x86_byte (inst, 0xbc); x86_reg_emit ((inst), (dreg), (reg)); } while (0) +#define x86_bsr(inst,dreg,reg) do { x86_byte (inst, 0x0f); x86_byte (inst, 0xbd); x86_reg_emit ((inst), (dreg), (reg)); } while (0) + #define x86_fsin(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xfe); } while (0) #define x86_fcos(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xff); } while (0) #define x86_fabs(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xe1); } while (0) diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index f1bd38e8941095..e0973099b59b84 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -98,8 +98,9 @@ long_div: dest:a src1:a src2:i len:16 clob:d long_div_un: dest:a src1:a src2:i len:16 clob:d long_rem: dest:d src1:a src2:i len:16 clob:a long_rem_un: dest:d src1:a src2:i len:16 clob:a -long_divrem: dest:a src1:a src2:d src3:i len:16 clob:d -long_divrem_un: dest:a src1:a src2:d src3:i len:16 clob:d +long_divrem: dest:a src1:a src2:d src3:i len:16 clob:x +long_divrem_un: dest:a src1:a src2:d src3:i len:16 clob:x +long_divrem2: dest:d len:3 long_and: dest:i src1:i src2:i len:3 clob:1 long_or: dest:i src1:i src2:i len:3 clob:1 long_xor: dest:i src1:i src2:i len:3 clob:1 @@ -339,6 +340,10 @@ amd64_lea_membase: dest:i src1:i len:11 x86_xchg: src1:i src2:i clob:x len:2 x86_fpop: src1:f len:3 x86_seteq_membase: src1:b len:9 +x86_bsf32: dest:i src1:i len:4 +x86_bsf64: dest:l src1:l len:4 +x86_bsr32: dest:i src1:i len:4 +x86_bsr64: dest:l src1:l len:4 x86_add_reg_membase: dest:i src1:i src2:b clob:1 len:13 x86_sub_reg_membase: dest:i src1:i src2:b clob:1 len:13 @@ -413,8 +418,9 @@ int_div: dest:a src1:a src2:i clob:d len:32 int_div_un: dest:a src1:a src2:i clob:d len:32 int_rem: dest:d src1:a src2:i clob:a len:32 int_rem_un: dest:d src1:a src2:i clob:a len:32 -int_divrem: dest:a src1:a src2:d src3:i len:32 clob:d -int_divrem_un: dest:a src1:a src2:d src3:i len:32 clob:d +int_divrem: dest:a src1:a src2:d src3:i clob:x len:15 +int_divrem_un: dest:a src1:a src2:d src3:i clob:x len:15 +int_divrem2: dest:d len:3 int_and: dest:i src1:i src2:i clob:1 len:4 int_or: dest:i src1:i src2:i clob:1 len:4 int_xor: dest:i src1:i src2:i clob:1 len:4 diff --git a/src/mono/mono/mini/cpu-x86.mdesc b/src/mono/mono/mini/cpu-x86.mdesc index e68d6138724336..2e82fc3f6024c2 100644 --- a/src/mono/mono/mini/cpu-x86.mdesc +++ b/src/mono/mono/mini/cpu-x86.mdesc @@ -111,6 +111,9 @@ int_div: dest:a src1:a src2:i len:15 clob:d int_div_un: dest:a src1:a src2:i len:15 clob:d int_rem: dest:d src1:a src2:i len:15 clob:a int_rem_un: dest:d src1:a src2:i len:15 clob:a +int_divrem: dest:a src1:a src2:d src3:i clob:x len:15 +int_divrem_un: dest:a src1:a src2:d src3:i clob:x len:15 +int_divrem2: dest:d len:3 int_and: dest:i src1:i src2:i clob:1 len:2 int_or: dest:i src1:i src2:i clob:1 len:2 int_xor: dest:i src1:i src2:i clob:1 len:2 @@ -303,6 +306,10 @@ x86_fp_load_i8: dest:f src1:b len:7 x86_fp_load_i4: dest:f src1:b len:7 x86_seteq_membase: src1:b len:7 x86_setne_membase: src1:b len:7 +x86_bsf32: dest:i src1:i len:4 +x86_bsf64: dest:l src1:l len:4 +x86_bsr32: dest:i src1:i len:4 +x86_bsr64: dest:l src1:l len:4 x86_add_reg_membase: dest:i src1:i src2:b clob:1 len:11 x86_sub_reg_membase: dest:i src1:i src2:b clob:1 len:11 diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index e6d9393753d2ed..d8a43fdfb92cd0 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -5083,7 +5083,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } case OP_LDIV: case OP_LREM: - case OP_X86_LDIVREM: /* Regalloc magic makes the div/rem cases the same */ if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); @@ -5096,7 +5095,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_LDIV_UN: case OP_LREM_UN: - case OP_X86_LDIVREMU: if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX); @@ -5108,7 +5106,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_IDIV: case OP_IREM: - case OP_X86_IDIVREM: if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); amd64_cdq_size (code, 4); @@ -5120,7 +5117,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_IDIV_UN: case OP_IREM_UN: - case OP_X86_IDIVREM_UN: if (ins->sreg2 == AMD64_RDX) { amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8); amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX); @@ -5130,6 +5126,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_div_reg_size (code, ins->sreg2, FALSE, 4); } break; + case OP_X86_LDIVREM: + amd64_div_reg (code, ins->sreg3, TRUE); + break; + case OP_X86_IDIVREM: + amd64_div_reg_size (code, ins->sreg3, TRUE, 4); + break; + case OP_X86_LDIVREMU: + amd64_div_reg (code, ins->sreg3, FALSE); + break; + case OP_X86_IDIVREMU: + amd64_div_reg_size (code, ins->sreg3, FALSE, 4); + break; + case OP_X86_IDIVREM2: + if (ins->dreg != AMD64_RDX) + amd64_mov_reg_reg (code, ins->dreg, AMD64_RDX, 4); + break; + case OP_X86_LDIVREM2: + if (ins->dreg != AMD64_RDX) + amd64_mov_reg_reg (code, ins->dreg, AMD64_RDX, 8); + break; + case OP_LMUL_OVF: amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2); EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException"); @@ -5689,6 +5706,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_X86_XCHG: amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4); break; + case OP_X86_BSF32: + amd64_bsf_size (code, ins->dreg, ins->sreg1, 4); + break; + case OP_X86_BSF64: + amd64_bsf_size (code, ins->dreg, ins->sreg1, 8); + break; + case OP_X86_BSR32: + amd64_bsr_size (code, ins->dreg, ins->sreg1, 4); + break; + case OP_X86_BSR64: + amd64_bsr_size (code, ins->dreg, ins->sreg1, 8); + break; + case OP_LOCALLOC: /* keep alignment */ amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1); diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index a6c23aa59b67a2..38cddb31d9d5e2 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -5640,6 +5640,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) LLVMBuilderRef builder; gboolean has_terminator; LLVMValueRef lhs, rhs, arg3; + LLVMValueRef last_divrem; int nins = 0; cbb = get_end_bb (ctx, bb); @@ -6638,6 +6639,44 @@ MONO_RESTORE_WARNING values [ins->dreg] = LLVMBuildXor (builder, tz, width, dname); break; } + case OP_X86_IDIVREM: + case OP_X86_LDIVREM: { + const LLVMTypeRef part_type = ins->opcode==OP_X86_IDIVREM ? LLVMInt32Type () : LLVMInt64Type (); + const LLVMTypeRef full_type = ins->opcode==OP_X86_IDIVREM ? LLVMInt64Type () : LLVMInt128Type (); + const LLVMValueRef shift_amount = ins->opcode==OP_X86_IDIVREM ? const_int32 (32) : const_int32 (64); + + LLVMValueRef dividend_low = LLVMBuildZExt (builder, convert (ctx, lhs, part_type), full_type, ""); + LLVMValueRef dividend_high = LLVMBuildSExt (builder, convert (ctx, rhs, part_type), full_type, ""); + LLVMValueRef dividend = LLVMBuildOr (builder, dividend_low, + LLVMBuildShl (builder, dividend_high, shift_amount, ""), ""); + LLVMValueRef divisor = LLVMBuildSExt (builder, convert (ctx, arg3, part_type), full_type, ""); + // LLVM should fuse the individual Div and Rem instructions into one DIV/IDIV on x86 + values [ins->dreg] = LLVMBuildTrunc (builder, LLVMBuildSDiv (builder, dividend, divisor, ""), part_type, ""); + last_divrem = LLVMBuildTrunc (builder, LLVMBuildSRem (builder, dividend, divisor, ""), part_type, ""); + break; + } + case OP_X86_IDIVREMU: + case OP_X86_LDIVREMU: { + const LLVMTypeRef part_type = ins->opcode==OP_X86_IDIVREMU ? LLVMInt32Type () : LLVMInt64Type (); + const LLVMTypeRef full_type = ins->opcode==OP_X86_IDIVREMU ? LLVMInt64Type () : LLVMInt128Type (); + const LLVMValueRef shift_amount = ins->opcode==OP_X86_IDIVREMU ? const_int32 (32) : const_int32 (64); + + LLVMValueRef dividend_low = LLVMBuildZExt (builder, convert (ctx, lhs, part_type), full_type, ""); + LLVMValueRef dividend_high = LLVMBuildZExt (builder, convert (ctx, rhs, part_type), full_type, ""); + LLVMValueRef dividend = LLVMBuildOr (builder, dividend_low, + LLVMBuildShl (builder, dividend_high, shift_amount, ""), ""); + LLVMValueRef divisor = LLVMBuildZExt (builder, convert (ctx, arg3, part_type), full_type, ""); + values [ins->dreg] = LLVMBuildTrunc (builder, LLVMBuildUDiv (builder, dividend, divisor, ""), part_type, ""); + last_divrem = LLVMBuildTrunc (builder, LLVMBuildURem (builder, dividend, divisor, ""), part_type, ""); + break; + } + case OP_X86_IDIVREM2: + case OP_X86_LDIVREM2: { + g_assert (last_divrem); + values [ins->dreg] = last_divrem; + last_divrem = NULL; + break; + } #endif case OP_ICONV_TO_I1: diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index dc6bda3ec7b092..bbc8037426ac69 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1187,8 +1187,10 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG) #if defined(TARGET_X86) || defined(TARGET_AMD64) MINI_OP3(OP_X86_LDIVREM, "long_divrem", LREG, LREG, LREG, LREG) MINI_OP3(OP_X86_LDIVREMU, "long_divrem_un", LREG, LREG, LREG, LREG) +MINI_OP3(OP_X86_LDIVREM2, "long_divrem2", LREG, NONE, NONE, NONE) MINI_OP3(OP_X86_IDIVREM, "int_divrem", IREG, IREG, IREG, IREG) MINI_OP3(OP_X86_IDIVREMU, "int_divrem_un", IREG, IREG, IREG, IREG) +MINI_OP3(OP_X86_IDIVREM2, "int_divrem2", IREG, NONE, NONE, NONE) #endif MINI_OP(OP_CREATE_SCALAR_UNSAFE, "create_scalar_unsafe", XREG, XREG, NONE) diff --git a/src/mono/mono/mini/mini-x86.c b/src/mono/mono/mini/mini-x86.c index 65dea3f7086198..7062f43104b0a0 100644 --- a/src/mono/mono/mini/mini-x86.c +++ b/src/mono/mono/mini/mini-x86.c @@ -2877,6 +2877,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; } + case OP_X86_LDIVREM: + case OP_X86_LDIVREMU: + case OP_X86_LDIVREM2: + g_assert_not_reached (); + break; + case OP_X86_IDIVREM: + case OP_X86_IDIVREMU: + x86_div_reg (code, ins->sreg3, ins->opcode==OP_X86_IDIVREM); + break; + case OP_X86_IDIVREM2: + if (ins->dreg != X86_EDX) + x86_mov_reg_reg (code, ins->dreg, X86_EDX); + break; case OP_IOR: x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2); break; @@ -3309,6 +3322,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_X86_XCHG: x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4); break; + case OP_X86_BSF32: + x86_bsf (code, ins->dreg, ins->sreg1); + break; + case OP_X86_BSR32: + x86_bsr (code, ins->dreg, ins->sreg1); + break; case OP_LOCALLOC: /* keep alignment */ x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a62c277181d6f0..9a0aec4553bf44 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -4621,7 +4621,7 @@ static const IntrinGroup supported_x86_intrinsics [] = { { "Sse41", MONO_CPU_X86_SSE41, sse41_methods, sizeof (sse41_methods) }, { "Sse42", MONO_CPU_X86_SSE42, sse42_methods, sizeof (sse42_methods) }, { "Ssse3", MONO_CPU_X86_SSSE3, ssse3_methods, sizeof (ssse3_methods) }, - { "X86Base", 0, x86base_methods, sizeof (x86base_methods) }, + { "X86Base", MONO_CPU_INITED, x86base_methods, sizeof (x86base_methods), TRUE }, { "X86Serialize", 0, unsupported, sizeof (unsupported) }, }; @@ -5248,32 +5248,37 @@ emit_x86_intrinsics ( MONO_ADD_INS (cfg->cbb, ins); return ins; case SN_DivRem: { - g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs - + g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs const int divtype = is_64bit ? STACK_I8 : STACK_I4; - const int movtype = is_64bit ? OP_LMOVE : OP_MOVE; - const int storetype = is_64bit ? OP_STOREI8_MEMBASE_IMM : OP_STOREI4_MEMBASE_IMM; - - MonoInst* upper_and_remainder; // the register must be backed up, since it will be overwritten by DIV/IDIV - MONO_INST_NEW (cfg, upper_and_remainder, movtype); - upper_and_remainder->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); - upper_and_remainder->sreg1 = args [1]->dreg; - upper_and_remainder->type = divtype; - MONO_ADD_INS (cfg->cbb, upper_and_remainder); - - MonoInst* div; // the division itself upper.remainder:lower / divisor -> quotient, upper.remainder - if (type_enum_is_unsigned (arg0_type)) { + const int storetype = is_64bit ? OP_STOREI8_MEMBASE_REG : OP_STOREI4_MEMBASE_REG; + const int obj_size = MONO_ABI_SIZEOF (MonoObject); + + // We must decide by the second argument, the first is always unsigned here + MonoTypeEnum arg1_type = fsig->param_count > 1 ? get_underlying_type (fsig->params [1]) : MONO_TYPE_VOID; + MonoInst* div; + MonoInst* div2; + + if (type_enum_is_unsigned (arg1_type)) { MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREMU : OP_X86_IDIVREMU); } else { MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREM : OP_X86_IDIVREM); } div->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); - div->sreg1 = args [0]->dreg; - div->sreg2 = upper_and_remainder->dreg; // the contents of this will also be destroyed by DIV/IDIV + div->sreg1 = args [0]->dreg; // we can use this directly, reg alloc knows that the contents will be destroyed + div->sreg2 = args [1]->dreg; // same here as ^ div->sreg3 = args [2]->dreg; div->type = divtype; MONO_ADD_INS (cfg->cbb, div); + // Protect the contents of edx/rdx by assigning it a vreg. The instruction must + // immediately follow DIV/IDIV so that edx content is not modified. + // In LLVM the remainder is already calculated, just need to capture it in a vreg. + MONO_INST_NEW (cfg, div2, is_64bit ? OP_X86_LDIVREM2 : OP_X86_IDIVREM2); + div2->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg); + div2->type = divtype; + MONO_ADD_INS (cfg->cbb, div2); + + // TODO: Can the creation of tuple be elided? (e.g. if deconstruction is used) MonoInst* tuple = mono_compile_create_var (cfg, fsig->ret, OP_LOCAL); MonoInst* tuple_addr; EMIT_NEW_TEMPLOADA (cfg, tuple_addr, tuple->inst_c0); @@ -5281,26 +5286,10 @@ emit_x86_intrinsics ( MonoClassField* field1 = mono_class_get_field_from_name_full (tuple->klass, "Item1", NULL); MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field1->offset - obj_size, div->dreg); MonoClassField* field2 = mono_class_get_field_from_name_full (tuple->klass, "Item2", NULL); - MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field2->offset - obj_size, upper_and_remainder->dreg); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field2->offset - obj_size, div2->dreg); EMIT_NEW_TEMPLOAD (cfg, ins, tuple->inst_c0); return ins; - - /*MonoInst* rem; // moving the remainder to the correct reg - MONO_INST_NEW (cfg, rem, movtype); - rem->dreg = args [4]->dreg; - rem->sreg1 = upper_and_remainder->dreg; - rem->type = divtype; - MONO_ADD_INS (cfg->cbb, rem); - - MonoInst* quo; // moving the quotient to the correct reg - MONO_INST_NEW (cfg, quo, movtype); - quo->dreg = args [3]->dreg; - quo->sreg1 = div->dreg; - quo->type = divtype; - MONO_ADD_INS (cfg->cbb, quo); - - return quo;*/ - } + } default: g_assert_not_reached (); } From 7a87ebaa183008be4217d630e13b565e14eb0092 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Tue, 5 Sep 2023 15:29:57 +0200 Subject: [PATCH 07/10] Assert on unconsumed remainder in fused DivRem. --- src/mono/mono/mini/mini-llvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 38cddb31d9d5e2..43130fc0611414 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -5640,7 +5640,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) LLVMBuilderRef builder; gboolean has_terminator; LLVMValueRef lhs, rhs, arg3; - LLVMValueRef last_divrem; + LLVMValueRef last_divrem = NULL; int nins = 0; cbb = get_end_bb (ctx, bb); @@ -12082,6 +12082,8 @@ MONO_RESTORE_WARNING } } + g_assert (last_divrem == NULL); + if (!ctx_ok (ctx)) return; From 56de5df8f7498f71db4a7a2e79768ce1dd360d6e Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Wed, 6 Sep 2023 14:28:19 +0200 Subject: [PATCH 08/10] Reenable x86base tests. --- src/tests/issues.targets | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index e142ff1292411b..45850d1d473656 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1219,15 +1219,6 @@ https://github.com/dotnet/runtime/issues/54185 - - https://github.com/dotnet/runtime/issues/75767 - - - https://github.com/dotnet/runtime/issues/75767 - - - https://github.com/dotnet/runtime/issues/75767 - Mono does not define out of range fp to int conversions From 33f4b22773582fc00ac1099f354e75031956e585 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Wed, 6 Sep 2023 15:38:59 +0200 Subject: [PATCH 09/10] Fixed narrowing assignment. --- src/mono/mono/mini/simd-intrinsics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 9a0aec4553bf44..e796c2da4f731f 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -5249,7 +5249,7 @@ emit_x86_intrinsics ( return ins; case SN_DivRem: { g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs - const int divtype = is_64bit ? STACK_I8 : STACK_I4; + const MonoStackType divtype = is_64bit ? STACK_I8 : STACK_I4; const int storetype = is_64bit ? OP_STOREI8_MEMBASE_REG : OP_STOREI4_MEMBASE_REG; const int obj_size = MONO_ABI_SIZEOF (MonoObject); From c7019d6c3181b942163cb63ce57c884693273604 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Wed, 6 Sep 2023 16:11:59 +0200 Subject: [PATCH 10/10] Documented the divrem operation pair. --- src/mono/mono/mini/mini-ops.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index bbc8037426ac69..2813ae20351600 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1185,6 +1185,19 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG) #endif #if defined(TARGET_X86) || defined(TARGET_AMD64) +/* + * These operations exist to facilitate simultaneous int/uint division + * and remainder on x86/x86-64. On that platform the DIV/IDIV instructions + * operate as follows edx:eax/reg32 -> (eax=quotient,edx=remainder). Mono + * ops only support one destination register, so two operations are needed + * to obtain two result values. One would use {long,int}_divrem[_un] first, + * and the corresponding {long_int}_divrem2 immediately afterwards. The + * first instruction returns the quotient and leaves the remainder in the + * edx(rdx) register. The second instruction puts a virtual register over + * edx, so that its value can be used. Note that if the first instruction + * is emitted, the second must be also (there is an assert). This works + * both in LLVM and mini. + */ MINI_OP3(OP_X86_LDIVREM, "long_divrem", LREG, LREG, LREG, LREG) MINI_OP3(OP_X86_LDIVREMU, "long_divrem_un", LREG, LREG, LREG, LREG) MINI_OP3(OP_X86_LDIVREM2, "long_divrem2", LREG, NONE, NONE, NONE)