Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -4065,18 +4065,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
break;
}

int idx_to = GTMREG_TO_UINT32 (ins->inst_c0) & 0xff;
int idx_from = GTMREG_TO_UINT32 (ins->inst_c0) >> 8;
if (dreg != sreg1) {
if (dreg != sreg2) {
arm_neon_mov (code, dreg, sreg1);
arm_neon_ins_e(code, t, dreg, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0);
arm_neon_ins_e(code, t, dreg, sreg2, idx_to, idx_from);
} else {
arm_neon_mov (code, NEON_TMP_REG, sreg1);
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0);
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg2, idx_to, idx_from);
arm_neon_mov (code, dreg, NEON_TMP_REG);
}
} else {
g_assert (dreg != sreg2);
arm_neon_ins_e(code, t, dreg, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0);
arm_neon_ins_e(code, t, dreg, sreg2, idx_to, idx_from);
}
break;
}
Expand Down
37 changes: 32 additions & 5 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,7 +1077,18 @@ emit_vector_create_elementwise (
MonoClass *vklass = mono_class_from_mono_type_internal (vtype);
MonoInst *ins = emit_xzero (cfg, vklass);
for (int i = 0; i < fsig->param_count; ++i) {
if (!is_zero_const (args [i])) {
if (is_zero_const (args [i])) {
// element already set to zero
#ifdef TARGET_ARM64
} else if (!COMPILE_LLVM (cfg) && args [i]->opcode == type_to_extract_op (type) &&
(type == MONO_TYPE_R4 || type == MONO_TYPE_R8)) {
// OP_INSERT_Ix inserts from GP reg, not SIMD. Cannot optimize for int types.
int srcidx = args [i]->inst_c0;
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, args [i]->sreg1);
ins->inst_c0 = i | (srcidx << 8);
ins->inst_c1 = type;
#endif
} else {
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, args [i]->dreg);
ins->inst_c0 = i;
ins->inst_c1 = type;
Expand All @@ -1086,6 +1097,7 @@ emit_vector_create_elementwise (
return ins;
}


#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM)

static int
Expand Down Expand Up @@ -2287,10 +2299,25 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
}

int insert_op = type_to_insert_op (arg0_type);
MonoInst *ins = emit_simd_ins (cfg, klass, insert_op, args [0]->dreg, args [2]->dreg);
ins->inst_c0 = index;
ins->inst_c1 = arg0_type;
return ins;

#ifdef TARGET_ARM64
if (!COMPILE_LLVM (cfg) && args [2]->opcode == type_to_extract_op (arg0_type) && (arg0_type == MONO_TYPE_R4 || arg0_type == MONO_TYPE_R8)) {
// Optimize WithElement(GetElement(x, const_1), const_2) into one ins instruction on arm64
// OP_INSERT_Ix inserts from GP reg, not SIMD. Cannot optimize for int types.
int srcidx = args [2]->inst_c0;
MonoInst* ins = emit_simd_ins (cfg, klass, insert_op, args [0]->dreg, args [2]->sreg1);
ins->inst_c0 = index | (srcidx << 8);
ins->inst_c1 = arg0_type;
return ins;
}
else
#endif
{
MonoInst *ins = emit_simd_ins (cfg, klass, insert_op, args [0]->dreg, args [2]->dreg);
ins->inst_c0 = index;
ins->inst_c1 = arg0_type;
return ins;
}
}

if (!COMPILE_LLVM (cfg) && fsig->params [0]->type != MONO_TYPE_GENERICINST)
Expand Down