diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 0f8a3a2d234550..99f55aedb3a6ef 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -122,7 +122,7 @@ r8const: dest:f len:20 label: len:0 store_membase_imm: dest:b len:20 store_membase_reg: dest:b src1:i len:20 -storex_membase: dest:b src1:x len:16 +storex_membase: dest:b src1:x len:20 storei1_membase_imm: dest:b len:20 storei1_membase_reg: dest:b src1:i len:12 storei2_membase_imm: dest:b len:20 @@ -136,7 +136,7 @@ storei1_memindex: dest:b src1:i src2:i len:4 storei2_memindex: dest:b src1:i src2:i len:4 storei4_memindex: dest:b src1:i src2:i len:4 load_membase: dest:i src1:b len:20 -loadx_membase: dest:x src1:b len:16 +loadx_membase: dest:x src1:b len:20 loadi1_membase: dest:i src1:b len:32 loadu1_membase: dest:i src1:b len:32 loadi2_membase: dest:i src1:b len:32 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index dafaabaa5ba06f..0e3da92fd41c0d 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -437,6 +437,7 @@ get_vector_size_macro (MonoInst *ins) g_assert (ins->klass); int size = mono_class_value_size (ins->klass, NULL); switch (size) { + case 12: case 16: return VREG_FULL; case 8: @@ -4064,13 +4065,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_STOREX_MEMBASE: if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8) code = emit_strfpx (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); - else + else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12) { + arm_neon_ins_e (code, SIZE_4, ARMREG_IP0, sreg1, 0, 2); + code = emit_strfpx (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); + code = emit_strfpw (code, ARMREG_IP0, dreg , GTMREG_TO_INT (ins->inst_offset + 8)); + } else code = emit_strfpq (code, sreg1, dreg, GTMREG_TO_INT (ins->inst_offset)); break; case OP_LOADX_MEMBASE: if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8) code = emit_ldrfpx (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); - else + else if (ins->klass && mono_class_value_size (ins->klass, NULL) == 12) { + code = emit_ldrfpx (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); + code = emit_ldrfpw (code, ARMREG_IP0, sreg1, GTMREG_TO_INT (ins->inst_offset + 8)); + arm_neon_ins_e (code, SIZE_4, dreg, ARMREG_IP0, 2, 0); + } else code = emit_ldrfpq (code, dreg, sreg1, GTMREG_TO_INT (ins->inst_offset)); break; case OP_XMOVE: diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 77e389a51cf9a2..a0b197866a0dbb 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -469,12 +469,12 @@ ovr_tag_from_mono_vector_class (MonoClass *klass) llvm_ovr_tag_t ret = 0; switch (size) { case 8: ret |= INTRIN_vector64; break; + case 12: ret |= INTRIN_vector128; break; case 16: ret |= INTRIN_vector128; break; } const char *class_name = m_class_get_name (klass); - if (!strcmp ("Vector2", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Quaternion", class_name) || !strcmp ("Plane", class_name)) { - // FIXME: Support Vector3 + if (!strcmp ("Vector2", class_name) || !strcmp ("Vector3", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Quaternion", class_name) || !strcmp ("Plane", class_name)) { return ret | INTRIN_float32; } @@ -507,6 +507,7 @@ ovr_tag_from_llvm_type (LLVMTypeRef type) unsigned int bits = mono_llvm_get_prim_size_bits (type); switch (bits) { case 64: ret |= INTRIN_vector64; break; + case 96: ret |= INTRIN_vector128; break; case 128: ret |= INTRIN_vector128; break; default: g_assert_not_reached (); } @@ -4168,9 +4169,18 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder) case LLVMArgVtypeByRef: case LLVMArgAsFpArgs: { - if (mini_class_is_simd (ctx->cfg, mono_class_from_mono_type_internal (ainfo->type))) - /* Treat these as normal values */ - ctx->values [reg] = LLVMBuildLoad2 (builder, ctx->addresses [reg]->type, ctx->addresses [reg]->value, "simd_vtype"); + MonoClass *klass = mono_class_from_mono_type_internal (ainfo->type); + if (mini_class_is_simd (ctx->cfg, klass)) { + LLVMValueRef loadedVector = LLVMBuildLoad2 (builder, ctx->addresses [reg]->type, ctx->addresses [reg]->value, "simd_vtype"); + + if (mono_class_value_size (klass, NULL) == 12) { + LLVMValueRef zero = LLVMConstReal (LLVMFloatType (), 0.0); + LLVMValueRef index = LLVMConstInt (LLVMInt32Type (), 3, 0); + loadedVector = LLVMBuildInsertElement (builder, loadedVector, zero, index, "insert_zero"); + } + + ctx->values [reg] = loadedVector; + } break; } default: @@ -6195,13 +6205,17 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case LLVMArgFpStruct: { LLVMTypeRef ret_type = LLVMGetReturnType (ctx->lmethod_type); LLVMValueRef retval, elem; - gboolean is_simd = mini_class_is_simd (ctx->cfg, mono_class_from_mono_type_internal (sig->ret)); + + MonoClass *klass= mono_class_from_mono_type_internal (sig->ret); + gboolean is_simd = mini_class_is_simd (ctx->cfg, klass); if (is_simd) { retval = LLVMConstNull(ret_type); if (lhs) { - int len = LLVMGetVectorSize (LLVMTypeOf (lhs)); + // Vector3: ret_type is Vector3, lhs is Vector3 represented as a Vector4 (three elements + zero). We need to extract only the first 3 elements from lhs. + int len = mono_class_value_size (klass, NULL) == 12 ? 3 : LLVMGetVectorSize (LLVMTypeOf (lhs)); + for (int i = 0; i < len; i++) { elem = LLVMBuildExtractElement (builder, lhs, const_int32 (i), "extract_elem"); retval = LLVMBuildInsertValue (builder, retval, elem, i, "insert_val_struct"); @@ -8301,6 +8315,11 @@ MONO_RESTORE_WARNING src = convert (ctx, LLVMBuildAdd (builder, convert (ctx, values [ins->inst_basereg], IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_offset, FALSE), ""), pointer_type (t)); values [ins->dreg] = mono_llvm_build_aligned_load (builder, t, src, "", FALSE, 1); + if (mono_class_value_size (ins->klass, NULL) == 12) { + LLVMValueRef zero = LLVMConstReal (LLVMFloatType (), 0.0); + LLVMValueRef index = LLVMConstInt (LLVMInt32Type (), 3, 0); + values [ins->dreg] = LLVMBuildInsertElement (builder, values [ins->dreg], zero, index, "insert_zero"); + } break; } case OP_STOREX_MEMBASE: { diff --git a/src/mono/mono/mini/mini-runtime.c b/src/mono/mono/mini/mini-runtime.c index 2a4a2d75c16ac7..f5212c9143948a 100644 --- a/src/mono/mono/mini/mini-runtime.c +++ b/src/mono/mono/mini/mini-runtime.c @@ -4489,8 +4489,7 @@ init_class (MonoClass *klass) #ifdef TARGET_ARM64 if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) { - // FIXME: Support Vector3 https://github.com/dotnet/runtime/issues/81501 - if (!strcmp (name, "Vector2") || !strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane")) + if (!strcmp (name, "Vector2") || !strcmp (name, "Vector3") ||!strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane")) mono_class_set_is_simd_type (klass, TRUE); } #endif diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index b6935d312c6ee8..ff424783275204 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -4591,6 +4591,10 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) } else if (!strcmp (klass_name, "Vector2")) { *nelems = 2; return MONO_TYPE_R4; + } else if (!strcmp (klass_name, "Vector3")) { + // For LLVM SIMD support, Vector3 is treated as a 4-element vector (three elements + zero). + *nelems = 4; + return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; int size = mono_class_value_size (klass, NULL); @@ -4602,3 +4606,4 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) return MONO_TYPE_VOID; } } + diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index cd2cad8e7dcece..c9adaf023f7e97 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2998,7 +2998,7 @@ mini_class_is_simd (MonoCompile *cfg, MonoClass *klass) return TRUE; int size = mono_type_size (m_class_get_byval_arg (klass), NULL); #ifdef TARGET_ARM64 - if (size == 8 || size == 16) + if (size == 8 || size == 12 || size == 16) return TRUE; #else if (size == 16) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 8955ede9b10506..60f81123512893 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -554,13 +554,14 @@ static MonoInst* emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, MonoInst *arg1, MonoInst *arg2) { #ifdef TARGET_ARM64 + gint32 simd_size = mono_class_value_size (klass, NULL); if (!COMPILE_LLVM (cfg)) { MonoInst* cmp = emit_xcompare (cfg, klass, element_type, arg1, arg2); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); return ret; - } else if (mono_class_value_size (klass, NULL) == 16) { + } else if (simd_size == 12 || simd_size == 16) { return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg); } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); @@ -649,10 +650,15 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type); int vector_size = mono_class_value_size (vector_class, NULL); int element_size; - - // FIXME: Support Vector3 + guint32 nelems; - mini_get_simd_type_info (vector_class, &nelems); + mini_get_simd_type_info (vector_class, &nelems); + + // Override nelems for Vector3, with actual number of elements, instead of treating it as a 4-element vector (three elements + zero). + const char *klass_name = m_class_get_name (vector_class); + if (!strcmp (klass_name, "Vector3")) + nelems = 3; + element_size = vector_size / nelems; gboolean has_single_element = vector_size == element_size; @@ -2721,6 +2727,17 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f for (int i = 1; i < fsig->param_count; ++i) ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, args [i + 1], i, FALSE); + if (len == 3) { + static float r4_0 = 0; + MonoInst *zero; + int zero_dreg = alloc_freg (cfg); + MONO_INST_NEW (cfg, zero, OP_R4CONST); + zero->inst_p0 = (void*)&r4_0; + zero->dreg = zero_dreg; + MONO_ADD_INS (cfg->cbb, zero); + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 3, FALSE); + } + ins->dreg = dreg; if (indirect) { @@ -5923,7 +5940,7 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom if (!strcmp (class_ns, "System.Numerics")) { // FIXME: Support Vector2 https://github.com/dotnet/runtime/issues/81501 - if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector4") || + if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector3") || !strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane")) return emit_vector_2_3_4 (cfg, cmethod, fsig, args); }