@@ -1068,21 +1068,42 @@ emit_hardware_intrinsics (
10681068 return custom_emit (cfg , fsig , args , klass , intrin_group , info , id , arg0_type , is_64bit );
10691069}
10701070
1071+ static MonoInst *
1072+ emit_vector_insert_element (
1073+ MonoCompile * cfg , MonoClass * vklass , MonoInst * ins , MonoTypeEnum type , MonoInst * element ,
1074+ int index , gboolean is_zero_inited )
1075+ {
1076+ int op = type_to_insert_op (type );
1077+
1078+ if (is_zero_inited && is_zero_const (element )) {
1079+ // element already set to zero
1080+ #ifdef TARGET_ARM64
1081+ } else if (!COMPILE_LLVM (cfg ) && element -> opcode == type_to_extract_op (type ) &&
1082+ (type == MONO_TYPE_R4 || type == MONO_TYPE_R8 )) {
1083+ // OP_INSERT_Ix inserts from GP reg, not SIMD. Cannot optimize for int types.
1084+ ins = emit_simd_ins (cfg , vklass , op , ins -> dreg , element -> sreg1 );
1085+ ins -> inst_c0 = index | ((element -> inst_c0 ) << 8 );
1086+ ins -> inst_c1 = type ;
1087+ #endif
1088+ } else {
1089+ ins = emit_simd_ins (cfg , vklass , op , ins -> dreg , element -> dreg );
1090+ ins -> inst_c0 = index ;
1091+ ins -> inst_c1 = type ;
1092+ }
1093+
1094+ return ins ;
1095+ }
1096+
10711097static MonoInst *
10721098emit_vector_create_elementwise (
10731099 MonoCompile * cfg , MonoMethodSignature * fsig , MonoType * vtype ,
10741100 MonoTypeEnum type , MonoInst * * args )
10751101{
1076- int op = type_to_insert_op (type );
10771102 MonoClass * vklass = mono_class_from_mono_type_internal (vtype );
10781103 MonoInst * ins = emit_xzero (cfg , vklass );
1079- for (int i = 0 ; i < fsig -> param_count ; ++ i ) {
1080- if (!is_zero_const (args [i ])) {
1081- ins = emit_simd_ins (cfg , vklass , op , ins -> dreg , args [i ]-> dreg );
1082- ins -> inst_c0 = i ;
1083- ins -> inst_c1 = type ;
1084- }
1085- }
1104+ for (int i = 0 ; i < fsig -> param_count ; ++ i )
1105+ ins = emit_vector_insert_element (cfg , vklass , ins , type , args [i ], i , TRUE);
1106+
10861107 return ins ;
10871108}
10881109
@@ -2282,17 +2303,12 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
22822303 if (args [1 ]-> opcode == OP_ICONST ) {
22832304 // If the index is provably a constant, we can generate vastly better code.
22842305 int index = GTMREG_TO_INT (args [1 ]-> inst_c0 );
2285-
22862306 if (index < 0 || index >= elems ) {
22872307 MONO_EMIT_NEW_BIALU_IMM (cfg , OP_COMPARE_IMM , -1 , args [1 ]-> dreg , elems );
22882308 MONO_EMIT_NEW_COND_EXC (cfg , GE_UN , "ArgumentOutOfRangeException" );
22892309 }
22902310
2291- int insert_op = type_to_insert_op (arg0_type );
2292- MonoInst * ins = emit_simd_ins (cfg , klass , insert_op , args [0 ]-> dreg , args [2 ]-> dreg );
2293- ins -> inst_c0 = index ;
2294- ins -> inst_c1 = arg0_type ;
2295- return ins ;
2311+ return emit_vector_insert_element (cfg , klass , args [0 ], arg0_type , args [2 ], index , FALSE);
22962312 }
22972313
22982314 if (!COMPILE_LLVM (cfg ) && fsig -> params [0 ]-> type != MONO_TYPE_GENERICINST )
@@ -2690,11 +2706,9 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
26902706 ins -> dreg = dreg ;
26912707 ins -> inst_c1 = MONO_TYPE_R4 ;
26922708
2693- for (int i = 1 ; i < fsig -> param_count ; ++ i ) {
2694- ins = emit_simd_ins (cfg , klass , OP_INSERT_R4 , ins -> dreg , args [i + 1 ]-> dreg );
2695- ins -> inst_c0 = i ;
2696- ins -> inst_c1 = MONO_TYPE_R4 ;
2697- }
2709+ for (int i = 1 ; i < fsig -> param_count ; ++ i )
2710+ ins = emit_vector_insert_element (cfg , klass , ins , MONO_TYPE_R4 , args [i + 1 ], i , FALSE);
2711+
26982712 ins -> dreg = dreg ;
26992713
27002714 if (indirect ) {
@@ -2835,10 +2849,14 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
28352849 MONO_EMIT_NEW_COND_EXC (cfg , GE_UN , "ArgumentOutOfRangeException" );
28362850 }
28372851
2838- ins = emit_simd_ins (cfg , klass , OP_INSERT_R4 , dreg , args [2 ]-> dreg );
2839- ins -> inst_c0 = index ;
2840- ins -> inst_c1 = MONO_TYPE_R4 ;
2841- ins -> dreg = dreg ;
2852+ if (args [0 ]-> dreg == dreg ) {
2853+ ins = emit_vector_insert_element (cfg , klass , args [0 ], MONO_TYPE_R4 , args [2 ], index , FALSE);
2854+ } else {
2855+ ins = emit_simd_ins (cfg , klass , OP_INSERT_R4 , dreg , args [2 ]-> dreg );
2856+ ins -> inst_c0 = index ;
2857+ ins -> inst_c1 = MONO_TYPE_R4 ;
2858+ ins -> dreg = dreg ;
2859+ }
28422860
28432861 if (indirect ) {
28442862 EMIT_NEW_STORE_MEMBASE (cfg , ins , OP_STOREX_MEMBASE , args [0 ]-> dreg , 0 , dreg );
0 commit comments