Skip to content

Commit 6486250

Browse files
authored
[mono][jit] Fuse SIMD extract and insert on arm64 (#92714)
* Fuse extract and insert into arm64 ins. * Extending to vector4. * Ins index is now a function. Removed newline. * Refactoring. * SIMD extract ops have no side offects (to facilitate their elimination). Fixed bug. * Fixed arm32 build.
1 parent 55945c6 commit 6486250

File tree

3 files changed

+54
-26
lines changed

3 files changed

+54
-26
lines changed

src/mono/mono/mini/method-to-ir.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12746,6 +12746,14 @@ mono_op_no_side_effects (int opcode)
1274612746
case OP_NOT_NULL:
1274712747
case OP_IL_SEQ_POINT:
1274812748
case OP_RTTYPE:
12749+
#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_ARM64)
12750+
case OP_EXTRACT_I1:
12751+
case OP_EXTRACT_I2:
12752+
case OP_EXTRACT_I4:
12753+
case OP_EXTRACT_I8:
12754+
case OP_EXTRACT_R4:
12755+
case OP_EXTRACT_R8:
12756+
#endif
1274912757
return TRUE;
1275012758
default:
1275112759
return FALSE;

src/mono/mono/mini/mini-arm64.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4061,18 +4061,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
40614061
break;
40624062
}
40634063

4064+
int idx_to = GTMREG_TO_UINT32 (ins->inst_c0) & 0xff;
4065+
int idx_from = GTMREG_TO_UINT32 (ins->inst_c0) >> 8;
40644066
if (dreg != sreg1) {
40654067
if (dreg != sreg2) {
40664068
arm_neon_mov (code, dreg, sreg1);
4067-
arm_neon_ins_e(code, t, dreg, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0);
4069+
arm_neon_ins_e(code, t, dreg, sreg2, idx_to, idx_from);
40684070
} else {
40694071
arm_neon_mov (code, NEON_TMP_REG, sreg1);
4070-
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0);
4072+
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg2, idx_to, idx_from);
40714073
arm_neon_mov (code, dreg, NEON_TMP_REG);
40724074
}
40734075
} else {
40744076
g_assert (dreg != sreg2);
4075-
arm_neon_ins_e(code, t, dreg, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0);
4077+
arm_neon_ins_e(code, t, dreg, sreg2, idx_to, idx_from);
40764078
}
40774079
break;
40784080
}

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,21 +1068,42 @@ emit_hardware_intrinsics (
10681068
return custom_emit (cfg, fsig, args, klass, intrin_group, info, id, arg0_type, is_64bit);
10691069
}
10701070

1071+
static MonoInst*
1072+
emit_vector_insert_element (
1073+
MonoCompile* cfg, MonoClass* vklass, MonoInst* ins, MonoTypeEnum type, MonoInst* element,
1074+
int index, gboolean is_zero_inited)
1075+
{
1076+
int op = type_to_insert_op (type);
1077+
1078+
if (is_zero_inited && is_zero_const (element)) {
1079+
// element already set to zero
1080+
#ifdef TARGET_ARM64
1081+
} else if (!COMPILE_LLVM (cfg) && element->opcode == type_to_extract_op (type) &&
1082+
(type == MONO_TYPE_R4 || type == MONO_TYPE_R8)) {
1083+
// OP_INSERT_Ix inserts from GP reg, not SIMD. Cannot optimize for int types.
1084+
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, element->sreg1);
1085+
ins->inst_c0 = index | ((element->inst_c0) << 8);
1086+
ins->inst_c1 = type;
1087+
#endif
1088+
} else {
1089+
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, element->dreg);
1090+
ins->inst_c0 = index;
1091+
ins->inst_c1 = type;
1092+
}
1093+
1094+
return ins;
1095+
}
1096+
10711097
static MonoInst *
10721098
emit_vector_create_elementwise (
10731099
MonoCompile *cfg, MonoMethodSignature *fsig, MonoType *vtype,
10741100
MonoTypeEnum type, MonoInst **args)
10751101
{
1076-
int op = type_to_insert_op (type);
10771102
MonoClass *vklass = mono_class_from_mono_type_internal (vtype);
10781103
MonoInst *ins = emit_xzero (cfg, vklass);
1079-
for (int i = 0; i < fsig->param_count; ++i) {
1080-
if (!is_zero_const (args [i])) {
1081-
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, args [i]->dreg);
1082-
ins->inst_c0 = i;
1083-
ins->inst_c1 = type;
1084-
}
1085-
}
1104+
for (int i = 0; i < fsig->param_count; ++i)
1105+
ins = emit_vector_insert_element (cfg, vklass, ins, type, args[i], i, TRUE);
1106+
10861107
return ins;
10871108
}
10881109

@@ -2282,17 +2303,12 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
22822303
if (args [1]->opcode == OP_ICONST) {
22832304
// If the index is provably a constant, we can generate vastly better code.
22842305
int index = GTMREG_TO_INT (args[1]->inst_c0);
2285-
22862306
if (index < 0 || index >= elems) {
22872307
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
22882308
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
22892309
}
22902310

2291-
int insert_op = type_to_insert_op (arg0_type);
2292-
MonoInst *ins = emit_simd_ins (cfg, klass, insert_op, args [0]->dreg, args [2]->dreg);
2293-
ins->inst_c0 = index;
2294-
ins->inst_c1 = arg0_type;
2295-
return ins;
2311+
return emit_vector_insert_element (cfg, klass, args [0], arg0_type, args [2], index, FALSE);
22962312
}
22972313

22982314
if (!COMPILE_LLVM (cfg) && fsig->params [0]->type != MONO_TYPE_GENERICINST)
@@ -2690,11 +2706,9 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
26902706
ins->dreg = dreg;
26912707
ins->inst_c1 = MONO_TYPE_R4;
26922708

2693-
for (int i = 1; i < fsig->param_count; ++i) {
2694-
ins = emit_simd_ins (cfg, klass, OP_INSERT_R4, ins->dreg, args [i + 1]->dreg);
2695-
ins->inst_c0 = i;
2696-
ins->inst_c1 = MONO_TYPE_R4;
2697-
}
2709+
for (int i = 1; i < fsig->param_count; ++i)
2710+
ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, args [i + 1], i, FALSE);
2711+
26982712
ins->dreg = dreg;
26992713

27002714
if (indirect) {
@@ -2835,10 +2849,14 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
28352849
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
28362850
}
28372851

2838-
ins = emit_simd_ins (cfg, klass, OP_INSERT_R4, dreg, args [2]->dreg);
2839-
ins->inst_c0 = index;
2840-
ins->inst_c1 = MONO_TYPE_R4;
2841-
ins->dreg = dreg;
2852+
if (args [0]->dreg == dreg) {
2853+
ins = emit_vector_insert_element (cfg, klass, args [0], MONO_TYPE_R4, args [2], index, FALSE);
2854+
} else {
2855+
ins = emit_simd_ins (cfg, klass, OP_INSERT_R4, dreg, args [2]->dreg);
2856+
ins->inst_c0 = index;
2857+
ins->inst_c1 = MONO_TYPE_R4;
2858+
ins->dreg = dreg;
2859+
}
28422860

28432861
if (indirect) {
28442862
EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, args [0]->dreg, 0, dreg);

0 commit comments

Comments
 (0)