From fdcad017bd22fd3e29dbb1918de002a8a1c43b2d Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Tue, 16 Jan 2024 11:10:38 +0000 Subject: [PATCH 1/4] [Mono] Add amd64 intrinsics for Vector128 Abs --- src/mono/mono/arch/amd64/amd64-codegen.h | 5 +++++ src/mono/mono/mini/cpu-amd64.mdesc | 1 + src/mono/mono/mini/mini-amd64.c | 16 ++++++++++++++++ src/mono/mono/mini/simd-intrinsics.c | 23 +++++++++++++++++++---- 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/arch/amd64/amd64-codegen.h b/src/mono/mono/arch/amd64/amd64-codegen.h index e92018f97385d..1978b47117795 100644 --- a/src/mono/mono/arch/amd64/amd64-codegen.h +++ b/src/mono/mono/arch/amd64/amd64-codegen.h @@ -1194,6 +1194,11 @@ typedef union { #define amd64_sse_phaddw_reg_reg(inst, dreg, sreg) emit_sse_reg_reg_op4((inst), (dreg), (sreg), 0x66, 0x0f, 0x38, 0x01) #define amd64_sse_phaddd_reg_reg(inst, dreg, sreg) emit_sse_reg_reg_op4((inst), (dreg), (sreg), 0x66, 0x0f, 0x38, 0x02) #define amd64_sse_blendpd_reg_reg(inst,dreg,sreg,imm) emit_sse_reg_reg_op4_imm((inst), (dreg), (sreg), 0x66, 0x0f, 0x3a, 0x0d, (imm)) + +#define amd64_sse_pabsb_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1c) +#define amd64_sse_pabsw_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1d) +#define amd64_sse_pabsd_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1e) + #define amd64_movq_reg_reg(inst,dreg,sreg) emit_sse_reg_reg ((inst), (dreg), (sreg), 0xf3, 0x0f, 0x7e) /* Generated from x86-codegen.h */ diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index 2658508e72b0d..ca5f1019ab15a 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -843,6 +843,7 @@ ssse3_shuffle: dest:x src1:x src2:x len:6 clob:1 sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1 sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1 vector_andnot: dest:x src1:x src2:x len:7 clob:1 +vector_integer_abs: dest:x src1:x len:16 roundp: dest:x src1:x len:10 diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index e58703d3e2b64..8119ae5f24406 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -7607,6 +7607,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_SSSE3_SHUFFLE: amd64_sse_pshufb_reg_reg (code, ins->dreg, ins->sreg2); break; + case OP_VECTOR_IABS: + switch (ins->inst_c1) { + case MONO_TYPE_I1: + amd64_sse_pabsb_reg_reg(code, ins->dreg, ins->sreg1); + break; + case MONO_TYPE_I2: + amd64_sse_pabsw_reg_reg(code, ins->dreg, ins->sreg1); + break; + case MONO_TYPE_I4: + amd64_sse_pabsd_reg_reg(code, ins->dreg, ins->sreg1); + break; + default: + g_assert_not_reached (); + break; + } + break; case OP_SSE41_ROUNDP: { if (ins->inst_c1 == MONO_TYPE_R8) amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index eb1c1c687be5b..6bb9636c5e629 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1475,10 +1475,25 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi ins->inst_c1 = arg0_type; return ins; } else { - if (!COMPILE_LLVM (cfg)) - // FIXME: - return NULL; - return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); + if (COMPILE_LLVM (cfg)) + return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); + + // SSSE3 does not support i64 + if (arg0_type == MONO_TYPE_I8) { + MonoInst *zero = emit_xzero (cfg, klass); + MonoInst *neg = emit_simd_ins (cfg, klass, OP_XBINOP, zero->dreg, args [0]->dreg); + neg->inst_c0 = OP_ISUB; + neg->inst_c1 = MONO_TYPE_I8; + MonoInst *ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, neg->dreg); + ins->inst_c0 = OP_IMAX; + ins->inst_c1 = MONO_TYPE_I8; + return ins; + } + + if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3)) + return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); + + return NULL; } #elif defined(TARGET_WASM) if (type_enum_is_float(arg0_type)) { From ed3cecfda40dc01aebf206e67a285c925ccb6f4d Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Tue, 16 Jan 2024 14:21:20 +0000 Subject: [PATCH 2/4] added i64 pointer support --- src/mono/mono/mini/mini-amd64.c | 1 + src/mono/mono/mini/simd-intrinsics.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index 8119ae5f24406..7f77b2107f056 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -7616,6 +7616,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_sse_pabsw_reg_reg(code, ins->dreg, ins->sreg1); break; case MONO_TYPE_I4: + case MONO_TYPE_I: amd64_sse_pabsd_reg_reg(code, ins->dreg, ins->sreg1); break; default: diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 6bb9636c5e629..23887334a8de5 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1479,7 +1479,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); // SSSE3 does not support i64 - if (arg0_type == MONO_TYPE_I8) { + if (arg0_type == MONO_TYPE_I8 || (TARGET_SIZEOF_VOID_P == 8 && arg0_type == MONO_TYPE_I)) { MonoInst *zero = emit_xzero (cfg, klass); MonoInst *neg = emit_simd_ins (cfg, klass, OP_XBINOP, zero->dreg, args [0]->dreg); neg->inst_c0 = OP_ISUB; From 8bf19c620ab0427d347f8a3d9673487cacdfa1b1 Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Wed, 17 Jan 2024 12:04:35 +0000 Subject: [PATCH 3/4] applied review suggestions --- src/mono/mono/arch/amd64/amd64-codegen.h | 6 +++--- src/mono/mono/mini/cpu-amd64.mdesc | 2 +- src/mono/mono/mini/mini-amd64.c | 8 +++++--- src/mono/mono/mini/simd-intrinsics.c | 25 +++++++++++------------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/mono/mono/arch/amd64/amd64-codegen.h b/src/mono/mono/arch/amd64/amd64-codegen.h index 1978b47117795..e20e43357cee6 100644 --- a/src/mono/mono/arch/amd64/amd64-codegen.h +++ b/src/mono/mono/arch/amd64/amd64-codegen.h @@ -1195,9 +1195,9 @@ typedef union { #define amd64_sse_phaddd_reg_reg(inst, dreg, sreg) emit_sse_reg_reg_op4((inst), (dreg), (sreg), 0x66, 0x0f, 0x38, 0x02) #define amd64_sse_blendpd_reg_reg(inst,dreg,sreg,imm) emit_sse_reg_reg_op4_imm((inst), (dreg), (sreg), 0x66, 0x0f, 0x3a, 0x0d, (imm)) -#define amd64_sse_pabsb_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1c) -#define amd64_sse_pabsw_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1d) -#define amd64_sse_pabsd_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1e) +#define amd64_ssse3_pabsb_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1c) +#define amd64_ssse3_pabsw_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1d) +#define amd64_ssse3_pabsd_reg_reg(inst, dreg, reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x1e) #define amd64_movq_reg_reg(inst,dreg,sreg) emit_sse_reg_reg ((inst), (dreg), (sreg), 0xf3, 0x0f, 0x7e) diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index ca5f1019ab15a..631b49b5aeea2 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -843,7 +843,7 @@ ssse3_shuffle: dest:x src1:x src2:x len:6 clob:1 sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1 sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1 vector_andnot: dest:x src1:x src2:x len:7 clob:1 -vector_integer_abs: dest:x src1:x len:16 +vector_integer_abs: dest:x src1:x len:6 roundp: dest:x src1:x len:10 diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index 7f77b2107f056..c0f18b73e7e2d 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -7610,14 +7610,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_VECTOR_IABS: switch (ins->inst_c1) { case MONO_TYPE_I1: - amd64_sse_pabsb_reg_reg(code, ins->dreg, ins->sreg1); + amd64_ssse3_pabsb_reg_reg(code, ins->dreg, ins->sreg1); break; case MONO_TYPE_I2: - amd64_sse_pabsw_reg_reg(code, ins->dreg, ins->sreg1); + amd64_ssse3_pabsw_reg_reg(code, ins->dreg, ins->sreg1); break; case MONO_TYPE_I4: +#if TARGET_SIZEOF_VOID_P == 4 case MONO_TYPE_I: - amd64_sse_pabsd_reg_reg(code, ins->dreg, ins->sreg1); +#endif + amd64_ssse3_pabsd_reg_reg(code, ins->dreg, ins->sreg1); break; default: g_assert_not_reached (); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 23887334a8de5..ca88724df01f2 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1477,23 +1477,20 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } else { if (COMPILE_LLVM (cfg)) return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); - - // SSSE3 does not support i64 - if (arg0_type == MONO_TYPE_I8 || (TARGET_SIZEOF_VOID_P == 8 && arg0_type == MONO_TYPE_I)) { - MonoInst *zero = emit_xzero (cfg, klass); - MonoInst *neg = emit_simd_ins (cfg, klass, OP_XBINOP, zero->dreg, args [0]->dreg); - neg->inst_c0 = OP_ISUB; - neg->inst_c1 = MONO_TYPE_I8; - MonoInst *ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, neg->dreg); - ins->inst_c0 = OP_IMAX; - ins->inst_c1 = MONO_TYPE_I8; - return ins; - } - if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3)) + // SSSE3 does not support i64 + if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3) && + !(arg0_type == MONO_TYPE_I8 || (TARGET_SIZEOF_VOID_P == 8 && arg0_type == MONO_TYPE_I))) return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); - return NULL; + MonoInst *zero = emit_xzero (cfg, klass); + MonoInst *neg = emit_simd_ins (cfg, klass, OP_XBINOP, zero->dreg, args [0]->dreg); + neg->inst_c0 = OP_ISUB; + neg->inst_c1 = MONO_TYPE_I8; + MonoInst *ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, neg->dreg); + ins->inst_c0 = OP_IMAX; + ins->inst_c1 = MONO_TYPE_I8; + return ins; } #elif defined(TARGET_WASM) if (type_enum_is_float(arg0_type)) { From cbc905455132a59e1be4ec8d46ff7a39eacc749c Mon Sep 17 00:00:00 2001 From: Jeremi Kurdek Date: Wed, 17 Jan 2024 12:07:03 +0000 Subject: [PATCH 4/4] fixed types in non ssse3 fallback --- src/mono/mono/mini/simd-intrinsics.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index ca88724df01f2..3004bb6aa268c 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1486,10 +1486,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoInst *zero = emit_xzero (cfg, klass); MonoInst *neg = emit_simd_ins (cfg, klass, OP_XBINOP, zero->dreg, args [0]->dreg); neg->inst_c0 = OP_ISUB; - neg->inst_c1 = MONO_TYPE_I8; + neg->inst_c1 = arg0_type; + MonoInst *ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, neg->dreg); ins->inst_c0 = OP_IMAX; - ins->inst_c1 = MONO_TYPE_I8; + ins->inst_c1 = arg0_type; return ins; } #elif defined(TARGET_WASM)