diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 9f0e409698ae4e..67c0864ed91bd7 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -1126,6 +1126,7 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_umov(p, type, rd, rn, index) arm_neon_cpy_opcode ((p), (type == TYPE_I64) ? 0b1 : 0b0, 0b0, (0b00001 << (type)) | ((index) << ((type) + 1)), 0b0111, (rd), (rn)) #define arm_neon_dup_e(p, width, type, rd, rn, index) arm_neon_cpy_opcode ((p), (width), 0b0, (0b00001 << (type)) | ((index) << ((type)+1)), 0b0000, (rd), (rn)) #define arm_neon_fdup_e(p, width, type, rd, rn, index) arm_neon_dup_e ((p), (width), (type) + TYPE_I32, (rd), (rn), (index)) +#define arm_neon_dup_g(p, width, type, rd, rn) arm_neon_cpy_opcode ((p), (width), 0b0, (0b00001 << (type)), 0b0001, (rd), (rn)) // Specific opcodes: #define arm_neon_dup_g_8b(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_LOW, 0b0, 0b00001, 0b0001, (rd), (rn)) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 40da7ff07d5589..1b075a0926dcc2 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -514,6 +514,12 @@ extract_r4: dest:f src1:x len:4 extract_r8: dest:f src1:x len:4 arm64_xaddv: dest:x src1:x len:8 xop_ovr_x_x: dest:x src1:x len:4 +expand_i1: dest:x src1:i len:4 +expand_i2: dest:x src1:i len:4 +expand_i4: dest:x src1:i len:4 +expand_i8: dest:x src1:i len:4 +expand_r4: dest:x src1:f len:4 +expand_r8: dest:x src1:f len:4 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 512e0df0814c9d..96822e9b9b4fa5 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -3746,7 +3746,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } case OP_XCAST: break; - + case OP_EXPAND_I1: + case OP_EXPAND_I2: + case OP_EXPAND_I4: + case OP_EXPAND_I8: { + const int t = get_type_size_macro (ins->inst_c1); + arm_neon_dup_g (code, VREG_FULL, t, ins->dreg, ins->sreg1); + break; + } + case OP_EXPAND_R4: + case OP_EXPAND_R8: { + const int t = get_type_size_macro (ins->inst_c1); + arm_neon_fdup_e (code, VREG_FULL, t, ins->dreg, ins->sreg1, 0); + break; + } case OP_EXTRACT_I1: case OP_EXTRACT_I2: case OP_EXTRACT_I4: diff --git a/src/mono/mono/mini/simd-arm64.h b/src/mono/mono/mini/simd-arm64.h index 746bf5f3b6e1b6..e832bea74fa9c8 100644 --- a/src/mono/mono/mini/simd-arm64.h +++ b/src/mono/mono/mini/simd-arm64.h @@ -59,6 +59,9 @@ SIMD_OP (128, OP_XBINOP, OP_FMAX, WTDSS, _UNDEF, SIMD_OP (128, OP_XBINOP, OP_IMIN, WTDSS, arm_neon_smin, arm_neon_smin, arm_neon_smin, _SKIP, _UNDEF, _UNDEF) SIMD_OP (128, OP_XBINOP, OP_IMIN_UN, WTDSS, arm_neon_umin, arm_neon_umin, arm_neon_umin, _SKIP, _UNDEF, _UNDEF) SIMD_OP (128, OP_XBINOP, OP_FMIN, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmin, arm_neon_fmin) +SIMD_OP (128, OP_XBINOP, OP_IMUL, WTDSS, arm_neon_mul, arm_neon_mul, arm_neon_mul, arm_neon_mul, _UNDEF, _UNDEF) +SIMD_OP (128, OP_XBINOP, OP_FMUL, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmul, arm_neon_fmul) +SIMD_OP (128, OP_XBINOP, OP_FDIV, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fdiv, arm_neon_fdiv) SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_AND, WDSS, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and) SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_OR, WDSS, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr) SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR, WDSS, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 40547d9affea2c..633167c6335841 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -268,6 +268,28 @@ emit_simd_ins_for_sig (MonoCompile *cfg, MonoClass *klass, int opcode, int instc static gboolean type_enum_is_unsigned (MonoTypeEnum type); static gboolean type_enum_is_float (MonoTypeEnum type); +static int type_to_expand_op (MonoTypeEnum type); + +static MonoInst* +handle_mul_div_by_scalar (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum arg_type, int scalar_reg, int vector_reg, int sub_op) +{ + MonoInst* ins; + + if (COMPILE_LLVM (cfg)) { + ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, scalar_reg, -1); + ins->inst_c1 = arg_type; + ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, vector_reg, ins->dreg); + ins->inst_c0 = sub_op; + } else { + ins = emit_simd_ins (cfg, klass, type_to_expand_op (arg_type), scalar_reg, -1); + ins->inst_c1 = arg_type; + ins = emit_simd_ins (cfg, klass, OP_XBINOP, vector_reg, ins->dreg); + ins->inst_c0 = sub_op; + ins->inst_c1 = arg_type; + } + + return ins; +} static MonoInst* emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignature *fsig, MonoInst **args, MonoTypeEnum arg_type, int id) @@ -304,13 +326,9 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna case SN_op_Division: { const char *class_name = m_class_get_name (klass); if (strcmp ("Vector2", class_name) && strcmp ("Vector4", class_name) && strcmp ("Quaternion", class_name) && strcmp ("Plane", class_name)) { - if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type != MONO_TYPE_GENERICINST)) { - MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [1]->dreg, -1); - ins->inst_c1 = arg_type; - ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [0]->dreg, ins->dreg); - ins->inst_c0 = OP_FDIV; - return ins; - } else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) { + if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type != MONO_TYPE_GENERICINST)) + return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_FDIV); + else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) { instc0 = OP_FDIV; break; } else { @@ -330,19 +348,11 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna case SN_op_Multiply: { const char *class_name = m_class_get_name (klass); if (strcmp ("Vector2", class_name) && strcmp ("Vector4", class_name) && strcmp ("Quaternion", class_name) && strcmp ("Plane", class_name)) { - if (fsig->params [1]->type != MONO_TYPE_GENERICINST) { - MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [1]->dreg, -1); - ins->inst_c1 = arg_type; - ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [0]->dreg, ins->dreg); - ins->inst_c0 = OP_FMUL; - return ins; - } else if (fsig->params [0]->type != MONO_TYPE_GENERICINST) { - MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [0]->dreg, -1); - ins->inst_c1 = arg_type; - ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [1]->dreg, ins->dreg); - ins->inst_c0 = OP_FMUL; - return ins; - } else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) { + if (fsig->params [1]->type != MONO_TYPE_GENERICINST) + return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_FMUL); + else if (fsig->params [0]->type != MONO_TYPE_GENERICINST) + return handle_mul_div_by_scalar (cfg, klass, arg_type, args [0]->dreg, args [1]->dreg, OP_FMUL); + else if ((fsig->params [0]->type == MONO_TYPE_GENERICINST) && (fsig->params [1]->type == MONO_TYPE_GENERICINST)) { instc0 = OP_FMUL; break; } else { @@ -375,22 +385,18 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna instc0 = type_enum_is_unsigned (arg_type) ? OP_IMIN_UN : OP_IMIN; break; case SN_Multiply: - case SN_op_Multiply: - if (fsig->params [1]->type != MONO_TYPE_GENERICINST) { - MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [1]->dreg, -1); - ins->inst_c1 = arg_type; - ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [0]->dreg, ins->dreg); - ins->inst_c0 = OP_IMUL; - return ins; - } else if (fsig->params [0]->type != MONO_TYPE_GENERICINST) { - MonoInst* ins = emit_simd_ins (cfg, klass, OP_CREATE_SCALAR_UNSAFE, args [0]->dreg, -1); - ins->inst_c1 = arg_type; - ins = emit_simd_ins (cfg, klass, OP_XBINOP_BYSCALAR, args [1]->dreg, ins->dreg); - ins->inst_c0 = OP_IMUL; - return ins; - } + case SN_op_Multiply: { +#ifdef TARGET_ARM64 + if (!COMPILE_LLVM (cfg) && (arg_type == MONO_TYPE_I8 || arg_type == MONO_TYPE_U8)) + return NULL; +#endif + if (fsig->params [1]->type != MONO_TYPE_GENERICINST) + return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_IMUL); + else if (fsig->params [0]->type != MONO_TYPE_GENERICINST) + return handle_mul_div_by_scalar (cfg, klass, arg_type, args [0]->dreg, args [1]->dreg, OP_IMUL); instc0 = OP_IMUL; break; + } case SN_Subtract: case SN_op_Subtraction: instc0 = OP_ISUB; @@ -799,9 +805,9 @@ type_enum_is_float (MonoTypeEnum type) } static int -type_to_expand_op (MonoType *type) +type_to_expand_op (MonoTypeEnum type) { - switch (type->type) { + switch (type) { case MONO_TYPE_I1: case MONO_TYPE_U1: return OP_EXPAND_I1; @@ -1262,6 +1268,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi case SN_ToScalar: case SN_Floor: case SN_Ceiling: + case SN_Divide: + case SN_Multiply: break; default: return NULL; @@ -1447,7 +1455,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) return NULL; if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) - return emit_simd_ins (cfg, klass, type_to_expand_op (etype), args [0]->dreg, -1); + return emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1); else if (is_create_from_half_vectors_overload (fsig)) return emit_simd_ins (cfg, klass, OP_XCONCAT, args [0]->dreg, args [1]->dreg); else if (is_elementwise_create_overload (fsig, etype)) @@ -1940,7 +1948,7 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign #ifdef TARGET_ARM64 if (!COMPILE_LLVM (cfg)) { return NULL; - /*if (size != 16) + if (size != 16) return NULL; switch (id) { case SN_get_One: @@ -1955,10 +1963,12 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign case SN_op_ExclusiveOr: case SN_op_Equality: case SN_op_Inequality: + case SN_op_Division: + case SN_op_Multiply: break; default: return NULL; - }*/ + } } #endif @@ -2166,7 +2176,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f gboolean indirect = FALSE; int dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect); - int opcode = type_to_expand_op (etype); + int opcode = type_to_expand_op (etype->type); ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1); for (int i = 1; i < fsig->param_count; ++i) { @@ -2639,7 +2649,7 @@ emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSig if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) { int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL); - int opcode = type_to_expand_op (etype); + int opcode = type_to_expand_op (etype->type); ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1); ins->dreg = dreg; return ins; @@ -3408,7 +3418,7 @@ emit_arm64_intrinsics ( break; } } - return emit_simd_ins (cfg, ret_klass, type_to_expand_op (rtype), scalar_src_reg, -1); + return emit_simd_ins (cfg, ret_klass, type_to_expand_op (rtype->type), scalar_src_reg, -1); } case SN_Extract: { int extract_op = type_to_xextract_op (arg0_type); @@ -3448,7 +3458,7 @@ emit_arm64_intrinsics ( MonoType *etype = get_vector_t_elem_type (fsig->ret); gboolean is_unsigned = type_is_unsigned (fsig->ret); gboolean scalar = id == SN_ShiftLeftLogicalSaturateScalar; - int s2v = scalar ? OP_CREATE_SCALAR_UNSAFE : type_to_expand_op (etype); + int s2v = scalar ? OP_CREATE_SCALAR_UNSAFE : type_to_expand_op (etype->type); int xop = scalar ? OP_XOP_OVR_SCALAR_X_X_X : OP_XOP_OVR_X_X_X; int iid = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UQSHL : INTRINS_AARCH64_ADV_SIMD_SQSHL; MonoInst *shift_vector = emit_simd_ins (cfg, ret_klass, s2v, args [1]->dreg, -1); @@ -4851,7 +4861,7 @@ emit_wasm_supported_intrinsics ( case SN_Splat: { MonoType *etype = get_vector_t_elem_type (fsig->ret); g_assert (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)); - return emit_simd_ins (cfg, klass, type_to_expand_op (etype), args [0]->dreg, -1); + return emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1); } case SN_Dot: return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, INTRINS_WASM_DOT, -1, fsig, args);