diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def index 68f535c70fc49..ba317d6a13d24 100644 --- a/src/mono/mono/mini/interp/interp-simd-intrins.def +++ b/src/mono/mono/mini/interp/interp-simd-intrins.def @@ -1,34 +1,46 @@ // FIXME: SIMD causes compile errors on WASI #ifdef HOST_BROWSER #ifndef INTERP_WASM_SIMD_INTRINSIC_V_P -#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode) #endif #ifndef INTERP_WASM_SIMD_INTRINSIC_V_V -#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode) +#endif +#ifndef INTERP_WASM_SIMD_INTRINSIC_V_C1 +#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, c_function, wasm_opcode) #endif #ifndef INTERP_WASM_SIMD_INTRINSIC_I_V -#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode) #endif #ifndef INTERP_WASM_SIMD_INTRINSIC_V_VV -#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(id, _mono_interp_simd_ ## id, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode) #endif #ifndef INTERP_WASM_SIMD_INTRINSIC_V_VI -#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(id, _mono_interp_simd_ ## id, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode) +#endif +#ifndef INTERP_WASM_SIMD_INTRINSIC_V_C2 +#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, c_function, wasm_opcode) #endif #ifndef INTERP_WASM_SIMD_INTRINSIC_V_VVV -#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(id, _mono_interp_simd_ ## id, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode) +#endif +#ifndef INTERP_WASM_SIMD_INTRINSIC_V_C3 +#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, c_function, wasm_opcode) #endif #else // HOST_BROWSER -#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) -#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) -#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) -#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) -#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) -#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode) #endif // HOST_BROWSER // The third argument is the wasm opcode that corresponds to this simd intrinsic, if any. -// Specify 0 if there is no exact 1:1 mapping (the opcode can still be implemented manually in the jiterpreter.) +// Specify -1 if there is no exact 1:1 mapping (the opcode can still be implemented manually in the jiterpreter.) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_ADD, interp_v128_i1_op_addition, 110) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_ADD, interp_v128_i2_op_addition, 142) @@ -43,12 +55,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_SUB, interp_v128_r4_op INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_AND, interp_v128_op_bitwise_and, 78) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_op_bitwise_or, 80) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, 0) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, 0) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply, 0) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_MULTIPLY, interp_v128_i2_op_multiply, 149) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_MULTIPLY, interp_v128_i4_op_multiply, 181) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_r4_op_multiply, 230) @@ -92,23 +104,23 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_EQUALS, interp_v128_i4 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_EQUALS, interp_v128_r4_equals, 65) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_EQUALS, interp_v128_i8_equals, 214) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar, 0) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar, -1) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb, 0) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb, -1) // wasm opcode is 0 because it has a different calling convention -INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select, 0) +INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select, -1) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create, 0) -INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create, 0) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_AND_NOT, interp_v128_and_not, 79) @@ -116,72 +128,259 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U2_LESS_THAN_EQUAL, inter // wasm only has a swizzle opcode for i8x16, none of the others // jiterp has special handling for i1 shuffles to secure a v8 optimization -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle, 0) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle, 0) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle, 0) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle, 0) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle, -1) // Wasm PackedSimd (see PackedSimd.cs) // We automatically generate C wrappers around clang's wasm simd intrinsics for each of these intrinsics -// The 2nd argument is the name of the clang intrinsic and the 3rd argument is the wasm opcode. - -INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I8X16_SPLAT, wasm_v128_load8_splat, 0x07) -INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I16X8_SPLAT, wasm_v128_load16_splat, 0x08) -INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I32X4_SPLAT, wasm_v128_load32_splat, 0x09) -INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I64X2_SPLAT, wasm_v128_load64_splat, 0x0a) -// FIXME: ExtractLane and ReplaceLane -// FIXME: Shuffle -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_SWIZZLE, wasm_i8x16_swizzle, 0x0e) -// FIXME: f32/f64 versions of add/subtract/multiply/negate are missing -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_ADD, wasm_i8x16_add, 0x6e) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_ADD, wasm_i16x8_add, 0x8e) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_ADD, wasm_i32x4_add, 0xae) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_ADD, wasm_i64x2_add, 0xce) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_SUBTRACT, wasm_i8x16_sub, 0x71) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_SUBTRACT, wasm_i16x8_sub, 0x91) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_SUBTRACT, wasm_i32x4_sub, 0xb1) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_SUBTRACT, wasm_i64x2_sub, 0xd1) +// Last two arguments are the name of the clang C intrinsic and the wasm simd opcode for the operation. +// The first argument is the name of the corresponding method(s) and must be a case sensitive match. +// The second argument specifies the type(s) that are valid in the first argument slot for the method. +// ANY = all types, X4 == 4-byte type (I4/U4/R4), D4 == 4-byte integer (I4/U4), and you can use I/R/U +// For the VV and VVV versions, all the relevant signatures are symmetric, so we only specify one type, +// i.e. 'V_VV(Add, D1)' -> Add(Vector128, Vector128). +// For VI the second arg is always Int32 so that is omitted as well. +// The V_C1/C2/C3 variants use hand-written C functions with the interp intrinsic calling convention. +// Method names here are case sensitive and must match the names in PackedSimd.cs. + +INTERP_WASM_SIMD_INTRINSIC_V_P (Splat, X1, wasm_v128_load8_splat, 0x07) +INTERP_WASM_SIMD_INTRINSIC_V_P (Splat, X2, wasm_v128_load16_splat, 0x08) +INTERP_WASM_SIMD_INTRINSIC_V_P (Splat, X4, wasm_v128_load32_splat, 0x09) +INTERP_WASM_SIMD_INTRINSIC_V_P (Splat, X8, wasm_v128_load64_splat, 0x0a) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, I1, interp_packedsimd_extractlane_i1, 0x15) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, U1, interp_packedsimd_extractlane_u1, 0x16) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, I2, interp_packedsimd_extractlane_i2, 0x18) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, U2, interp_packedsimd_extractlane_u2, 0x19) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, D4, interp_packedsimd_extractlane_i4, 0x1b) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, D8, interp_packedsimd_extractlane_i8, 0x1d) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, R4, interp_packedsimd_extractlane_r4, 0x1f) +INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, R8, interp_packedsimd_extractlane_r8, 0x21) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D1, interp_packedsimd_replacelane_i1, 0x17) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D2, interp_packedsimd_replacelane_i2, 0x1a) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D4, interp_packedsimd_replacelane_i4, 0x1c) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D8, interp_packedsimd_replacelane_i8, 0x1e) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, R4, interp_packedsimd_replacelane_r4, 0x20) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, R8, interp_packedsimd_replacelane_r8, 0x22) +// Shuffle has a dedicated implementation in the jiterpreter so the wasm opcode is -1 +INTERP_WASM_SIMD_INTRINSIC_V_C3 (Shuffle, D1, interp_packedsimd_shuffle, -1) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Swizzle, D1, wasm_i8x16_swizzle, 0x0e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D1, wasm_i8x16_add, 0x6e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D2, wasm_i16x8_add, 0x8e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D4, wasm_i32x4_add, 0xae) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D8, wasm_i64x2_add, 0xce) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, R4, wasm_f32x4_add, 0xe4) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, R8, wasm_f64x2_add, 0xf0) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D1, wasm_i8x16_sub, 0x71) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D2, wasm_i16x8_sub, 0x91) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D4, wasm_i32x4_sub, 0xb1) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D8, wasm_i64x2_sub, 0xd1) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, R4, wasm_f32x4_sub, 0xe5) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, R8, wasm_f64x2_sub, 0xf1) // There is no i8x16 mul opcode -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_MULTIPLY, _interp_wasm_simd_assert_not_reached, 0x0) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_MULTIPLY, wasm_i16x8_mul, 0x95) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_MULTIPLY, wasm_i32x4_mul, 0xb5) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_MULTIPLY, wasm_i64x2_mul, 0xd5) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_DOT_I16X8, wasm_i32x4_dot_i16x8, 0xba) -INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I8X16_NEGATE, wasm_i8x16_neg, 0x61) -INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I16X8_NEGATE, wasm_i16x8_neg, 0x81) -INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I32X4_NEGATE, wasm_i32x4_neg, 0xa1) -INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I64X2_NEGATE, wasm_i64x2_neg, 0xc1) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTLEFT, wasm_i8x16_shl, 0x6b) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTLEFT, wasm_i16x8_shl, 0x8b) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTLEFT, wasm_i32x4_shl, 0xab) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTLEFT, wasm_i64x2_shl, 0xcb) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTARITHMETIC, wasm_i8x16_shr, 0x6c) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTRIGHTARITHMETIC, wasm_i16x8_shr, 0x8c) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTRIGHTARITHMETIC, wasm_i32x4_shr, 0xac) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTRIGHTARITHMETIC, wasm_i64x2_shr, 0xcc) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTLOGICAL, wasm_u8x16_shr, 0x6d) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTRIGHTLOGICAL, wasm_u16x8_shr, 0x8d) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTRIGHTLOGICAL, wasm_u32x4_shr, 0xad) -INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTRIGHTLOGICAL, wasm_u64x2_shr, 0xcd) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_AND, wasm_v128_and, 0x4e) -// FIXME: NOT, OR, XOR -INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I8X16_BITMASK, wasm_i8x16_bitmask, 0x64) -INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I16X8_BITMASK, wasm_i16x8_bitmask, 0x84) -INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I32X4_BITMASK, wasm_i32x4_bitmask, 0xa4) -INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I64X2_BITMASK, wasm_i64x2_bitmask, 0xc4) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, wasm_i8x16_eq, 0x23) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPAREEQUAL, wasm_i16x8_eq, 0x2d) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPAREEQUAL, wasm_i32x4_eq, 0x37) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPAREEQUAL, wasm_i64x2_eq, 0xd6) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPAREEQUAL, wasm_f32x4_eq, 0x41) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPAREEQUAL, wasm_f64x2_eq, 0x47) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, wasm_i8x16_ne, 0x24) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPARENOTEQUAL, wasm_i16x8_ne, 0x2e) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPARENOTEQUAL, wasm_i32x4_ne, 0x38) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPARENOTEQUAL, wasm_i64x2_ne, 0xd7) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPARENOTEQUAL, wasm_f32x4_ne, 0x42) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPARENOTEQUAL, wasm_f64x2_ne, 0x48) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_S, wasm_i8x16_narrow_i16x8, 0x65) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_S, wasm_i16x8_narrow_i32x4, 0x85) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_U, wasm_u8x16_narrow_i16x8, 0x66) -INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U, wasm_u16x8_narrow_i32x4, 0x86) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, D2, wasm_i16x8_mul, 0x95) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, D4, wasm_i32x4_mul, 0xb5) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, D8, wasm_i64x2_mul, 0xd5) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, R4, wasm_f32x4_mul, 0xe6) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, R8, wasm_f64x2_mul, 0xf2) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Divide, R4, wasm_f32x4_div, 0xe7) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Divide, R8, wasm_f64x2_div, 0xf3) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Dot, I2, wasm_i32x4_dot_i16x8, 0xba) +INTERP_WASM_SIMD_INTRINSIC_V_V (Negate, D1, wasm_i8x16_neg, 0x61) +INTERP_WASM_SIMD_INTRINSIC_V_V (Negate, D2, wasm_i16x8_neg, 0x81) +INTERP_WASM_SIMD_INTRINSIC_V_V (Negate, D4, wasm_i32x4_neg, 0xa1) +INTERP_WASM_SIMD_INTRINSIC_V_V (Negate, D8, wasm_i64x2_neg, 0xc1) +INTERP_WASM_SIMD_INTRINSIC_V_V (Negate, R4, wasm_f32x4_neg, 0xe1) +INTERP_WASM_SIMD_INTRINSIC_V_V (Negate, R8, wasm_f64x2_neg, 0xed) +INTERP_WASM_SIMD_INTRINSIC_V_V (Sqrt, R4, wasm_f32x4_sqrt, 0xe3) +INTERP_WASM_SIMD_INTRINSIC_V_V (Sqrt, R8, wasm_f64x2_sqrt, 0xef) +INTERP_WASM_SIMD_INTRINSIC_V_V (Ceiling, R4, wasm_f32x4_ceil, 0x67) +INTERP_WASM_SIMD_INTRINSIC_V_V (Ceiling, R8, wasm_f64x2_ceil, 0x74) +INTERP_WASM_SIMD_INTRINSIC_V_V (Floor, R4, wasm_f32x4_floor, 0x68) +INTERP_WASM_SIMD_INTRINSIC_V_V (Floor, R8, wasm_f64x2_floor, 0x75) +INTERP_WASM_SIMD_INTRINSIC_V_V (Truncate, R4, wasm_f32x4_trunc, 0x69) +INTERP_WASM_SIMD_INTRINSIC_V_V (Truncate, R8, wasm_f64x2_trunc, 0x7a) +INTERP_WASM_SIMD_INTRINSIC_V_V (RoundToNearest, R4, wasm_f32x4_nearest, 0x6a) +INTERP_WASM_SIMD_INTRINSIC_V_V (RoundToNearest, R8, wasm_f64x2_nearest, 0x94) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D1, wasm_i8x16_shl, 0x6b) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D2, wasm_i16x8_shl, 0x8b) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D4, wasm_i32x4_shl, 0xab) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D8, wasm_i64x2_shl, 0xcb) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D1, wasm_i8x16_shr, 0x6c) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D2, wasm_i16x8_shr, 0x8c) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D4, wasm_i32x4_shr, 0xac) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D8, wasm_i64x2_shr, 0xcc) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D1, wasm_u8x16_shr, 0x6d) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D2, wasm_u16x8_shr, 0x8d) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D4, wasm_u32x4_shr, 0xad) +INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D8, wasm_u64x2_shr, 0xcd) +INTERP_WASM_SIMD_INTRINSIC_V_VV (And, ANY, wasm_v128_and, 0x4e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AndNot, ANY, wasm_v128_andnot, 0x4f) +INTERP_WASM_SIMD_INTRINSIC_V_V (Not, ANY, wasm_v128_not, 0x4d) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Or, ANY, wasm_v128_or, 0x50) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Xor, ANY, wasm_v128_xor, 0x51) +INTERP_WASM_SIMD_INTRINSIC_V_VVV(BitwiseSelect, ANY, wasm_v128_bitselect, 0x52) +INTERP_WASM_SIMD_INTRINSIC_I_V (AnyTrue, ANY, wasm_v128_any_true, 0x53) +INTERP_WASM_SIMD_INTRINSIC_I_V (AllTrue, D1, wasm_i8x16_all_true, 0x63) +INTERP_WASM_SIMD_INTRINSIC_I_V (AllTrue, D2, wasm_i16x8_all_true, 0x83) +INTERP_WASM_SIMD_INTRINSIC_I_V (AllTrue, D4, wasm_i32x4_all_true, 0xa3) +INTERP_WASM_SIMD_INTRINSIC_I_V (AllTrue, D8, wasm_i64x2_all_true, 0xc3) +INTERP_WASM_SIMD_INTRINSIC_V_V (PopCount, U1, wasm_i8x16_popcnt, 0x62) +INTERP_WASM_SIMD_INTRINSIC_I_V (Bitmask, D1, wasm_i8x16_bitmask, 0x64) +INTERP_WASM_SIMD_INTRINSIC_I_V (Bitmask, D2, wasm_i16x8_bitmask, 0x84) +INTERP_WASM_SIMD_INTRINSIC_I_V (Bitmask, D4, wasm_i32x4_bitmask, 0xa4) +INTERP_WASM_SIMD_INTRINSIC_I_V (Bitmask, D8, wasm_i64x2_bitmask, 0xc4) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D1, wasm_i8x16_eq, 0x23) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D2, wasm_i16x8_eq, 0x2d) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D4, wasm_i32x4_eq, 0x37) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D8, wasm_i64x2_eq, 0xd6) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, R4, wasm_f32x4_eq, 0x41) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, R8, wasm_f64x2_eq, 0x47) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D1, wasm_i8x16_ne, 0x24) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D2, wasm_i16x8_ne, 0x2e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D4, wasm_i32x4_ne, 0x38) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D8, wasm_i64x2_ne, 0xd7) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, R4, wasm_f32x4_ne, 0x42) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, R8, wasm_f64x2_ne, 0x48) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I1, wasm_i8x16_lt, 0x25) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U1, wasm_u8x16_lt, 0x26) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I2, wasm_i16x8_lt, 0x2f) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U2, wasm_u16x8_lt, 0x30) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I4, wasm_i32x4_lt, 0x39) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U4, wasm_u32x4_lt, 0x3a) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I8, wasm_i64x2_lt, 0xd8) +// INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U8, wasm_u64x2_lt, -1) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, R4, wasm_f32x4_lt, 0x43) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, R8, wasm_f64x2_lt, 0x49) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I1, wasm_i8x16_le, 0x29) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U1, wasm_u8x16_le, 0x2a) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I2, wasm_i16x8_le, 0x33) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U2, wasm_u16x8_le, 0x34) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I4, wasm_i32x4_le, 0x3d) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U4, wasm_u32x4_le, 0x3e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I8, wasm_i64x2_le, 0xda) +// INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U8, wasm_u64x2_le, -1) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, R4, wasm_f32x4_le, 0x45) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, R8, wasm_f64x2_le, 0x4b) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I1, wasm_i8x16_gt, 0x27) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, U1, wasm_u8x16_gt, 0x28) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I2, wasm_i16x8_gt, 0x31) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, U2, wasm_u16x8_gt, 0x32) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I4, wasm_i32x4_gt, 0x3b) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, U4, wasm_u32x4_gt, 0x3c) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I8, wasm_i64x2_gt, 0xd9) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, R4, wasm_f32x4_gt, 0x44) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, R8, wasm_f64x2_gt, 0x4a) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I1, wasm_i8x16_ge, 0x2b) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, U1, wasm_u8x16_ge, 0x2c) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I2, wasm_i16x8_ge, 0x35) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, U2, wasm_u16x8_ge, 0x36) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I4, wasm_i32x4_ge, 0x3f) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, U4, wasm_u32x4_ge, 0x40) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I8, wasm_i64x2_ge, 0xdb) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, R4, wasm_f32x4_ge, 0x46) +INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, R8, wasm_f64x2_ge, 0x4c) +INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingSignedSaturate, I2, wasm_i8x16_narrow_i16x8, 0x65) +INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingSignedSaturate, I4, wasm_i16x8_narrow_i32x4, 0x85) +INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingUnsignedSaturate, I2, wasm_u8x16_narrow_i16x8, 0x66) +INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingUnsignedSaturate, I4, wasm_u16x8_narrow_i32x4, 0x86) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, I1, wasm_i16x8_extmul_low_i8x16, 0x9c) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, I2, wasm_i32x4_extmul_low_i16x8, 0xbc) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, I4, wasm_i64x2_extmul_low_i32x4, 0xdc) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, U1, wasm_u16x8_extmul_low_u8x16, 0x9e) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, U2, wasm_u32x4_extmul_low_u16x8, 0xbe) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, U4, wasm_u64x2_extmul_low_u32x4, 0xde) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, I1, wasm_i16x8_extmul_high_i8x16, 0x9d) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, I2, wasm_i32x4_extmul_high_i16x8, 0xbd) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, I4, wasm_i64x2_extmul_high_i32x4, 0xdd) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, U1, wasm_u16x8_extmul_high_u8x16, 0x9f) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, U2, wasm_u32x4_extmul_high_u16x8, 0xbf) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, U4, wasm_u64x2_extmul_high_u32x4, 0xdf) +INTERP_WASM_SIMD_INTRINSIC_V_V (AddPairwiseWidening, I1, wasm_i16x8_extadd_pairwise_i8x16, 0x7c) +INTERP_WASM_SIMD_INTRINSIC_V_V (AddPairwiseWidening, U1, wasm_u16x8_extadd_pairwise_u8x16, 0x7d) +INTERP_WASM_SIMD_INTRINSIC_V_V (AddPairwiseWidening, I2, wasm_i32x4_extadd_pairwise_i16x8, 0x7e) +INTERP_WASM_SIMD_INTRINSIC_V_V (AddPairwiseWidening, U2, wasm_u32x4_extadd_pairwise_u16x8, 0x7f) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, I1, wasm_i8x16_add_sat, 0x6f) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, U1, wasm_u8x16_add_sat, 0x70) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, I2, wasm_i16x8_add_sat, 0x8f) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, U2, wasm_u16x8_add_sat, 0x90) +INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, I1, wasm_i8x16_sub_sat, 0x72) +INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, U1, wasm_u8x16_sub_sat, 0x73) +INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, I2, wasm_i16x8_sub_sat, 0x92) +INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, U2, wasm_u16x8_sub_sat, 0x93) +INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyRoundedSaturateQ15, I2, wasm_i16x8_q15mulr_sat, 0x82) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, I1, wasm_i8x16_min, 0x76) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, I2, wasm_i16x8_min, 0x96) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, I4, wasm_i32x4_min, 0xb6) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, U1, wasm_u8x16_min, 0x77) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, U2, wasm_u16x8_min, 0x97) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, U4, wasm_u32x4_min, 0xb7) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, I1, wasm_i8x16_max, 0x78) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, I2, wasm_i16x8_max, 0x98) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, I4, wasm_i32x4_max, 0xb8) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, U1, wasm_u8x16_max, 0x79) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, U2, wasm_u16x8_max, 0x99) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, U4, wasm_u32x4_max, 0xb9) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AverageRounded, U1, wasm_u8x16_avgr, 0x7b) +INTERP_WASM_SIMD_INTRINSIC_V_VV (AverageRounded, U2, wasm_u16x8_avgr, 0x9b) +INTERP_WASM_SIMD_INTRINSIC_V_V (Abs, I1, wasm_i8x16_abs, 0x60) +INTERP_WASM_SIMD_INTRINSIC_V_V (Abs, I2, wasm_i16x8_abs, 0x80) +INTERP_WASM_SIMD_INTRINSIC_V_V (Abs, I4, wasm_i32x4_abs, 0xa0) +INTERP_WASM_SIMD_INTRINSIC_V_V (Abs, I8, wasm_i64x2_abs, 0xc0) +INTERP_WASM_SIMD_INTRINSIC_V_V (Abs, R4, wasm_f32x4_abs, 0xe0) +INTERP_WASM_SIMD_INTRINSIC_V_V (Abs, R8, wasm_f64x2_abs, 0xec) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, R4, wasm_f32x4_min, 0xe8) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, R8, wasm_f64x2_min, 0xf4) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, R4, wasm_f32x4_max, 0xe9) +INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, R8, wasm_f64x2_max, 0xf5) +INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMin, R4, wasm_f32x4_pmin, 0xea) +INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMin, R8, wasm_f64x2_pmin, 0xf6) +INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMax, R4, wasm_f32x4_pmax, 0xeb) +INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMax, R8, wasm_f64x2_pmax, 0xf7) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToSingle, I4, wasm_f32x4_convert_i32x4, 0xfa) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToSingle, U4, wasm_f32x4_convert_u32x4, 0xfb) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToSingle, R8, wasm_f32x4_demote_f64x2_zero, 0x5e) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToDoubleLower, I4, wasm_f64x2_convert_low_i32x4, 0xfe) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToDoubleLower, U4, wasm_f64x2_convert_low_u32x4, 0xff) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToDoubleLower, R8, wasm_f64x2_promote_low_f32x4, 0x5f) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToInt32Saturate, R4, wasm_i32x4_trunc_sat_f32x4, 0xf8) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToUnsignedInt32Saturate, R4, wasm_u32x4_trunc_sat_f32x4, 0xf9) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToInt32Saturate, R8, wasm_i32x4_trunc_sat_f64x2_zero, 0xfc) +INTERP_WASM_SIMD_INTRINSIC_V_V (ConvertToUnsignedInt32Saturate, R8, wasm_u32x4_trunc_sat_f64x2_zero, 0xfd) +INTERP_WASM_SIMD_INTRINSIC_V_V (SignExtendWideningLower, D1, wasm_i16x8_extend_low_i8x16, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (SignExtendWideningLower, D2, wasm_i32x4_extend_low_i16x8, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (SignExtendWideningLower, D4, wasm_i64x2_extend_low_i32x4, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (SignExtendWideningUpper, D1, wasm_i16x8_extend_high_i8x16, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (SignExtendWideningUpper, D2, wasm_i32x4_extend_high_i16x8, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (SignExtendWideningUpper, D4, wasm_i64x2_extend_high_i32x4, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (ZeroExtendWideningLower, D1, wasm_u16x8_extend_low_u8x16, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (ZeroExtendWideningLower, D2, wasm_u32x4_extend_low_u16x8, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (ZeroExtendWideningLower, D4, wasm_u64x2_extend_low_u32x4, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (ZeroExtendWideningUpper, D1, wasm_u16x8_extend_high_u8x16, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (ZeroExtendWideningUpper, D2, wasm_u32x4_extend_high_u16x8, -1) +INTERP_WASM_SIMD_INTRINSIC_V_V (ZeroExtendWideningUpper, D4, wasm_u64x2_extend_high_u32x4, -1) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadVector128, ANY, interp_packedsimd_load128, 0x0) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarVector128, X4, interp_packedsimd_load32_zero, 0x5c) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarVector128, X8, interp_packedsimd_load64_zero, 0x5d) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X1, interp_packedsimd_load8_splat, 0x07) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X2, interp_packedsimd_load16_splat, 0x08) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X4, interp_packedsimd_load32_splat, 0x09) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X8, interp_packedsimd_load64_splat, 0x0a) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, I1, interp_packedsimd_load8x8_s, 0x01) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, U1, interp_packedsimd_load8x8_u, 0x02) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, I2, interp_packedsimd_load16x4_s, 0x03) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, U2, interp_packedsimd_load16x4_u, 0x04) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, I4, interp_packedsimd_load32x2_s, 0x05) +INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, U4, interp_packedsimd_load32x2_u, 0x06) +// FIXME: Specify opcodes and add jiterp support +INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X1, interp_packedsimd_load8_lane, -1) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X2, interp_packedsimd_load16_lane, -1) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X4, interp_packedsimd_load32_lane, -1) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X8, interp_packedsimd_load64_lane, -1) +// HACK: These store methods don't return a value but the custom calling convention is sufficient +INTERP_WASM_SIMD_INTRINSIC_V_C2 (Store, ANY, interp_packedsimd_store, -1) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X1, interp_packedsimd_store8_lane, 0x58) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X2, interp_packedsimd_store16_lane, 0x59) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X4, interp_packedsimd_store32_lane, 0x5a) +INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X8, interp_packedsimd_store64_lane, 0x5b) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index 8031b1f79b4b1..a824b940d04c1 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -585,42 +585,276 @@ _interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) { g_assert_not_reached (); } -#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \ +#define LANE_COUNT(lane_type) (sizeof(v128_t) / sizeof(lane_type)) + +// ensure the lane is valid by wrapping it (in AOT it would fail to compile) +#define WRAP_LANE(lane_type, lane_ptr) \ + *((unsigned char *)lane_ptr) & (LANE_COUNT(lane_type) - 1) + +#define EXTRACT_LANE(result_type, lane_type) \ + int _lane = WRAP_LANE(lane_type, lane); \ + *((result_type *)res) = ((lane_type *)vec)[_lane]; + +#define REPLACE_LANE(lane_type) \ + int _lane = WRAP_LANE(lane_type, lane); \ + v128_t temp = *((v128_t *)vec); \ + ((lane_type *)&temp)[_lane] = *(lane_type *)value; \ + *((v128_t *)res) = temp; + +static void +interp_packedsimd_extractlane_i1 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(gint32, gint8); +} + +static void +interp_packedsimd_extractlane_u1 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(gint32, guint8); +} + +static void +interp_packedsimd_extractlane_i2 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(gint32, gint16); +} + +static void +interp_packedsimd_extractlane_u2 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(gint32, guint16); +} + +static void +interp_packedsimd_extractlane_i4 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(gint32, gint32); +} + +static void +interp_packedsimd_extractlane_i8 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(gint64, gint64); +} + +static void +interp_packedsimd_extractlane_r4 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(float, float); +} + +static void +interp_packedsimd_extractlane_r8 (gpointer res, gpointer vec, gpointer lane) { + EXTRACT_LANE(double, double); +} + +static void +interp_packedsimd_replacelane_i1 (gpointer res, gpointer vec, gpointer lane, gpointer value) { + REPLACE_LANE(gint8); +} + +static void +interp_packedsimd_replacelane_i2 (gpointer res, gpointer vec, gpointer lane, gpointer value) { + REPLACE_LANE(gint16); +} + +static void +interp_packedsimd_replacelane_i4 (gpointer res, gpointer vec, gpointer lane, gpointer value) { + REPLACE_LANE(gint32); +} + +static void +interp_packedsimd_replacelane_i8 (gpointer res, gpointer vec, gpointer lane, gpointer value) { + REPLACE_LANE(gint64); +} + +static void +interp_packedsimd_replacelane_r4 (gpointer res, gpointer vec, gpointer lane, gpointer value) { + REPLACE_LANE(float); +} + +static void +interp_packedsimd_replacelane_r8 (gpointer res, gpointer vec, gpointer lane, gpointer value) { + REPLACE_LANE(double); +} + +static void +interp_packedsimd_shuffle (gpointer res, gpointer _lower, gpointer _upper, gpointer _indices) { + v128_i1 indices = *((v128_i1 *)_indices), + lower = *((v128_i1 *)_lower), + upper = *((v128_i1 *)_upper), + result = { 0 }; + + for (int i = 0; i < 16; i++) { + int index = indices[i] & 31; + if (index > 15) + result[i] = upper[index - 16]; + else + result[i] = lower[index]; + } + + *((v128_i1 *)res) = result; +} + +#define INDIRECT_LOAD(fn) \ + *(v128_t*)res = fn(*(void **)addr_of_addr); + +static void +interp_packedsimd_load128 (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load); +} + +static void +interp_packedsimd_load32_zero (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load32_zero); +} + +static void +interp_packedsimd_load64_zero (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load64_zero); +} + +static void +interp_packedsimd_load8_splat (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load8_splat); +} + +static void +interp_packedsimd_load16_splat (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load16_splat); +} + +static void +interp_packedsimd_load32_splat (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load32_splat); +} + +static void +interp_packedsimd_load64_splat (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_v128_load64_splat); +} + +static void +interp_packedsimd_load8x8_s (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_i16x8_load8x8); +} + +static void +interp_packedsimd_load8x8_u (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_u16x8_load8x8); +} + +static void +interp_packedsimd_load16x4_s (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_i32x4_load16x4); +} + +static void +interp_packedsimd_load16x4_u (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_u32x4_load16x4); +} + +static void +interp_packedsimd_load32x2_s (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_i64x2_load32x2); +} + +static void +interp_packedsimd_load32x2_u (gpointer res, gpointer addr_of_addr) { + INDIRECT_LOAD(wasm_u64x2_load32x2); +} + +static void +interp_packedsimd_store (gpointer res, gpointer addr_of_addr, gpointer vec) { + // HACK: Result is unused because Store has a void return value + **(v128_t **)addr_of_addr = *(v128_t *)vec; +} + +#define INDIRECT_STORE_LANE(lane_type) \ + int _lane = WRAP_LANE(lane_type, lane); \ + **(lane_type **)addr_of_addr = ((lane_type *)vec)[_lane]; + +static void +interp_packedsimd_store8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_STORE_LANE(guint8); +} + +static void +interp_packedsimd_store16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_STORE_LANE(guint16); +} + +static void +interp_packedsimd_store32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_STORE_LANE(guint32); +} + +static void +interp_packedsimd_store64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_STORE_LANE(guint64); +} + +#define INDIRECT_LOAD_LANE(lane_type) \ + int _lane = WRAP_LANE(lane_type, lane); \ + /* we need temporary storage to do this since res may be the same as vec, addr_of_addr, or lane */ \ + lane_type lanes[LANE_COUNT(lane_type)]; \ + memcpy (lanes, vec, 16); \ + lanes[_lane] = **(lane_type **)addr_of_addr; \ + memcpy (res, lanes, 16); + +static void +interp_packedsimd_load8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_LOAD_LANE(guint8); +} + +static void +interp_packedsimd_load16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_LOAD_LANE(guint16); +} + +static void +interp_packedsimd_load32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_LOAD_LANE(guint32); +} + +static void +interp_packedsimd_load64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) { + INDIRECT_LOAD_LANE(guint64); +} + +#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \ static void \ -_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \ +_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \ *((v128_t *)res) = c_intrinsic (v1); \ } -#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \ +#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \ static void \ -_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \ +_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \ } -#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \ +#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \ static void \ -_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \ +_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \ *((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \ } -#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \ +#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \ static void \ -_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \ +_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \ } -#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \ +#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \ static void \ -_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \ +_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \ } -#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \ +#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \ static void \ -_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \ +_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \ } +#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode) +#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode) + #include "interp-simd-intrins.def" #undef INTERP_WASM_SIMD_INTRINSIC_V_P @@ -629,6 +863,9 @@ _mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { #undef INTERP_WASM_SIMD_INTRINSIC_V_VV #undef INTERP_WASM_SIMD_INTRINSIC_V_VI #undef INTERP_WASM_SIMD_INTRINSIC_V_VVV +#undef INTERP_WASM_SIMD_INTRINSIC_V_C1 +#undef INTERP_WASM_SIMD_INTRINSIC_V_C2 +#undef INTERP_WASM_SIMD_INTRINSIC_V_C3 // Now generate the wasm opcode tables for the intrinsics diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 0ed06ab397135..cbdf7ccfc2cfe 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -5,6 +5,7 @@ #include "config.h" #include #include +#include // We use the same approach as jit/aot for identifying simd methods. // FIXME Consider sharing the code @@ -111,71 +112,10 @@ static guint16 sn_vector_t_methods [] = { }; static guint16 sri_packedsimd_methods [] = { - SN_Add, - SN_And, - SN_Bitmask, - SN_CompareEqual, - SN_CompareNotEqual, - SN_ConvertNarrowingSignedSaturate, - SN_ConvertNarrowingUnsignedSaturate, - SN_Dot, - SN_Multiply, - SN_Negate, - SN_ShiftLeft, - SN_ShiftRightArithmetic, - SN_ShiftRightLogical, - SN_Splat, - SN_Subtract, - SN_Swizzle, SN_get_IsHardwareAccelerated, SN_get_IsSupported, }; -#if HOST_BROWSER - -/* - * maps from INTERP_SIMD_INTRINSIC_WASM_I8X16_xxx to the correct one for the return type, - * assuming that they are laid out sequentially like this: - * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, wasm_i8x16_eq, 0x0) - * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPAREEQUAL, wasm_i16x8_eq, 0x0) - * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPAREEQUAL, wasm_i32x4_eq, 0x0) - * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPAREEQUAL, wasm_i64x2_eq, 0x0) - * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPAREEQUAL, wasm_f32x4_eq, 0x0) - * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPAREEQUAL, wasm_f64x2_eq, 0x0) - * It is your responsibility to ensure that it's actually laid out this way! - */ - -static int sri_packedsimd_offset_from_atype [] = { - -1, // MONO_TYPE_END = 0x00, - -1, // MONO_TYPE_VOID = 0x01, - -1, // MONO_TYPE_BOOLEAN = 0x02, - -1, // MONO_TYPE_CHAR = 0x03, - 0, // MONO_TYPE_I1 = 0x04, - 0, // MONO_TYPE_U1 = 0x05, - 1, // MONO_TYPE_I2 = 0x06, - 1, // MONO_TYPE_U2 = 0x07, - 2, // MONO_TYPE_I4 = 0x08, - 2, // MONO_TYPE_U4 = 0x09, - 3, // MONO_TYPE_I8 = 0x0a, - 3, // MONO_TYPE_U8 = 0x0b, - 4, // MONO_TYPE_R4 = 0x0c, - 5, // MONO_TYPE_R8 = 0x0d, - -1, // MONO_TYPE_STRING = 0x0e, - -1, // MONO_TYPE_PTR = 0x0f, - -1, // MONO_TYPE_BYREF = 0x10, - -1, // MONO_TYPE_VALUETYPE = 0x11, - -1, // MONO_TYPE_CLASS = 0x12, - -1, // MONO_TYPE_VAR = 0x13, - -1, // MONO_TYPE_ARRAY = 0x14, - -1, // MONO_TYPE_GENERICINST= 0x15, - -1, // MONO_TYPE_TYPEDBYREF = 0x16, - 2, // MONO_TYPE_I = 0x18, - 2, // MONO_TYPE_U = 0x19, -}; - -static const int sri_packedsimd_offset_from_atype_length = sizeof(sri_packedsimd_offset_from_atype) / sizeof(sri_packedsimd_offset_from_atype[0]); -#endif // HOST_BROWSER - // Returns if opcode was added static gboolean emit_common_simd_operations (TransformData *td, int id, int atype, int vector_size, int arg_size, int scalar_arg, gint16 *simd_opcode, gint16 *simd_intrins) @@ -349,21 +289,24 @@ get_common_simd_info (MonoClass *vector_klass, MonoMethodSignature *csignature, } static void -emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size) +emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size, gboolean allow_void) { td->sp -= csignature->param_count; for (int i = 0; i < csignature->param_count; i++) td->last_ins->sregs [i] = td->sp [i].local; - g_assert (csignature->ret->type != MONO_TYPE_VOID); int ret_mt = mono_mint_type (csignature->ret); - if (ret_mt == MINT_TYPE_VT) { + if (csignature->ret->type == MONO_TYPE_VOID) { + g_assert (allow_void); + interp_ins_set_dummy_dreg (td->last_ins, td); + } else if (ret_mt == MINT_TYPE_VT) { // For these intrinsics, if we return a VT then it is a V128 push_type_vt (td, vector_klass, vector_size); + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); } else { push_simple_type (td, stack_type [ret_mt]); + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); } - interp_ins_set_dreg (td->last_ins, td->sp [-1].local); td->ip += 5; } @@ -527,7 +470,7 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature td->last_ins->data [0] = simd_intrins; opcode_added: - emit_common_simd_epilogue (td, vector_klass, csignature, vector_size); + emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE); return TRUE; } @@ -559,7 +502,7 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur td->last_ins->data [0] = simd_intrins; opcode_added: - emit_common_simd_epilogue (td, vector_klass, csignature, vector_size); + emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE); return TRUE; } @@ -591,22 +534,213 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c td->last_ins->data [0] = simd_intrins; opcode_added: - emit_common_simd_epilogue (td, vector_klass, csignature, vector_size); + emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE); return TRUE; } #if HOST_BROWSER + +#define PSIMD_ARGTYPE_I1 MONO_TYPE_I1 +#define PSIMD_ARGTYPE_I2 MONO_TYPE_I2 +#define PSIMD_ARGTYPE_I4 MONO_TYPE_I4 +#define PSIMD_ARGTYPE_I8 MONO_TYPE_I8 +#define PSIMD_ARGTYPE_U1 MONO_TYPE_U1 +#define PSIMD_ARGTYPE_U2 MONO_TYPE_U2 +#define PSIMD_ARGTYPE_U4 MONO_TYPE_U4 +#define PSIMD_ARGTYPE_U8 MONO_TYPE_U8 +#define PSIMD_ARGTYPE_R4 MONO_TYPE_R4 +#define PSIMD_ARGTYPE_R8 MONO_TYPE_R8 +#define PSIMD_ARGTYPE_D1 0xF01 +#define PSIMD_ARGTYPE_D2 0xF02 +#define PSIMD_ARGTYPE_D4 0xF04 +#define PSIMD_ARGTYPE_D8 0xF08 +#define PSIMD_ARGTYPE_X1 0xF11 +#define PSIMD_ARGTYPE_X2 0xF12 +#define PSIMD_ARGTYPE_X4 0xF14 +#define PSIMD_ARGTYPE_X8 0xF18 +#define PSIMD_ARGTYPE_ANY 0xFFF + +static gboolean +packedsimd_type_matches (MonoTypeEnum type, int expected_type) +{ + if (expected_type == PSIMD_ARGTYPE_ANY) + return TRUE; + else if (type == expected_type) + return TRUE; + + switch (expected_type) { + case PSIMD_ARGTYPE_D1: + case PSIMD_ARGTYPE_X1: + return (type == MONO_TYPE_I1) || (type == MONO_TYPE_U1); + case PSIMD_ARGTYPE_D2: + case PSIMD_ARGTYPE_X2: + return (type == MONO_TYPE_I2) || (type == MONO_TYPE_U2); + case PSIMD_ARGTYPE_D4: + return (type == MONO_TYPE_I4) || (type == MONO_TYPE_U4); + case PSIMD_ARGTYPE_D8: + return (type == MONO_TYPE_I8) || (type == MONO_TYPE_U8); + case PSIMD_ARGTYPE_X4: + return (type == MONO_TYPE_I4) || (type == MONO_TYPE_U4) || (type == MONO_TYPE_R4); + case PSIMD_ARGTYPE_X8: + return (type == MONO_TYPE_I8) || (type == MONO_TYPE_U8) || (type == MONO_TYPE_R8); + default: + return FALSE; + } +} + +typedef struct { + const char *name, *intrinsic_name; + int arg_type, interp_opcode, simd_intrins; +} PackedSimdIntrinsicInfo; + +#define INTRINS_COMMON(_name, arg1, c_intrinsic, _interp_opcode, _id) \ + { #_name, #c_intrinsic, PSIMD_ARGTYPE_ ## arg1, _interp_opcode, _id }, + +#undef INTERP_WASM_SIMD_INTRINSIC_V_P +#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_V +#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_C1 +#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_I_V +#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_VV +#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PP, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_C2 +#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PP, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_VI +#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PP, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV +#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PPP, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +#undef INTERP_WASM_SIMD_INTRINSIC_V_C3 +#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_intrinsic, wasm_opcode) \ + INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PPP, INTERP_SIMD_INTRINSIC_ ## name ## arg1) + +static PackedSimdIntrinsicInfo unsorted_packedsimd_intrinsic_infos[] = { +#include "interp-simd-intrins.def" +}; +#undef INTERP_WASM_SIMD_INTRINSIC_V_P +#undef INTERP_WASM_SIMD_INTRINSIC_V_V +#undef INTERP_WASM_SIMD_INTRINSIC_V_C1 +#undef INTERP_WASM_SIMD_INTRINSIC_I_V +#undef INTERP_WASM_SIMD_INTRINSIC_V_VV +#undef INTERP_WASM_SIMD_INTRINSIC_V_VI +#undef INTERP_WASM_SIMD_INTRINSIC_V_C2 +#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV +#undef INTERP_WASM_SIMD_INTRINSIC_V_C3 + +static PackedSimdIntrinsicInfo *sorted_packedsimd_intrinsic_infos; + static int -map_packedsimd_intrins_based_on_atype (MonoTypeEnum atype, int base_intrins, gboolean allow_float) +compare_packedsimd_intrinsic_info (const void *_lhs, const void *_rhs) { - int max_offset = allow_float ? 5 : 3; - if ((atype < 0) || (atype >= sri_packedsimd_offset_from_atype_length)) - return -1; - int offset = sri_packedsimd_offset_from_atype [atype]; - if ((offset < 0) || (offset > max_offset)) - return -1; - return base_intrins + offset; + g_assert (_lhs); + g_assert (_rhs); + const PackedSimdIntrinsicInfo *lhs = _lhs, *rhs = _rhs; + return strcmp (lhs->name, rhs->name); } + +static PackedSimdIntrinsicInfo * +lookup_packedsimd_intrinsic (const char *name, MonoType *arg1) +{ + MonoClass *vector_klass = mono_class_from_mono_type_internal (arg1); + MonoType *arg_type = NULL; + + if (m_class_is_simd_type (vector_klass)) { + arg_type = mono_class_get_context (vector_klass)->class_inst->type_argv [0]; + } else if (arg1->type == MONO_TYPE_PTR) { + arg_type = arg1->data.type; + } else { + // g_printf ("%s arg1 type was not pointer or simd type: %s\n", name, m_class_get_name (vector_klass)); + return FALSE; + } + + if (!mono_type_is_primitive (arg_type)) { + // g_printf ("%s arg1 inner type was not primitive\n", name); + return FALSE; + } + + int arg_type_enum = arg_type->type, + search_begin_index, + num_intrinsics = sizeof(unsorted_packedsimd_intrinsic_infos) / sizeof(PackedSimdIntrinsicInfo); + if (arg_type_enum == MONO_TYPE_BOOLEAN) + return FALSE; + + PackedSimdIntrinsicInfo *result = NULL, *search_begin; + PackedSimdIntrinsicInfo search_key = { name, name }; + + // Ensure we have a sorted version of the intrinsics table + if (!sorted_packedsimd_intrinsic_infos) { + int buf_size = sizeof(unsorted_packedsimd_intrinsic_infos); + PackedSimdIntrinsicInfo *temp_sorted = g_malloc0 (buf_size); + memcpy (temp_sorted, unsorted_packedsimd_intrinsic_infos, buf_size); + mono_qsort (temp_sorted, num_intrinsics, sizeof(PackedSimdIntrinsicInfo), compare_packedsimd_intrinsic_info); + mono_atomic_cas_ptr ((gpointer*)&sorted_packedsimd_intrinsic_infos, (gpointer)temp_sorted, NULL); + if (sorted_packedsimd_intrinsic_infos != temp_sorted) + g_free (temp_sorted); + } + + // Binary search by name to find a suitable starting location for our search + search_begin = (PackedSimdIntrinsicInfo*)mono_binary_search ( + &search_key, sorted_packedsimd_intrinsic_infos, + num_intrinsics, sizeof(PackedSimdIntrinsicInfo), + compare_packedsimd_intrinsic_info + ); + if (!search_begin) { + // g_printf ("No matching PackedSimd intrinsics for name %s\n", name); + return FALSE; + } + + search_begin_index = search_begin - sorted_packedsimd_intrinsic_infos; + + // Search upwards and downwards through the table simultaneously from our starting location, + // looking for an intrinsic with a matching name that also has a compatible argument type + // NOTE: If there are two suitable matches because you got the table wrong, this is nondeterministic + for (int low = search_begin_index, high = search_begin_index; + (low >= 0) || (high < num_intrinsics); + --low, ++high) { + PackedSimdIntrinsicInfo *low_info = (low >= 0) ? &sorted_packedsimd_intrinsic_infos[low] : NULL, + *high_info = (high < num_intrinsics) ? &sorted_packedsimd_intrinsic_infos[high] : NULL; + // As long as either the low or high offset are within range and have a name match, we keep going + gboolean low_name_matches = low_info && !strcmp (name, low_info->name), + high_name_matches = high_info && !strcmp (name, high_info->name); + if (!low_name_matches && !high_name_matches) + break; + + // Now see whether we have a matching type and name at either offset + if (low_name_matches && packedsimd_type_matches (arg_type_enum, low_info->arg_type)) { + result = low_info; + break; + } + if (high_name_matches && packedsimd_type_matches (arg_type_enum, high_info->arg_type)) { + result = high_info; + break; + } + } + + /* + if (!result) + g_printf ("No matching PackedSimd intrinsic for %s[%s]\n", name, m_class_get_name (mono_class_from_mono_type_internal (arg_type))); + */ + return result; +} + #endif static gboolean @@ -616,7 +750,8 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature // We don't early-out for an unrecognized method, we will generate an NIY later MonoClass *vector_klass = mono_class_from_mono_type_internal (csignature->ret); - int vector_size = -1; + MonoTypeEnum atype; + int vector_size = -1, arg_size, scalar_arg; // NOTE: Linker substitutions (used in AOT) will prevent this from running. if ((id == SN_get_IsSupported) || (id == SN_get_IsHardwareAccelerated)) { @@ -628,13 +763,25 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature goto opcode_added; } + get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg); + #if HOST_BROWSER - if (id < 0) { - g_print ("MONO interpreter: Unimplemented method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name); + gint16 simd_opcode = -1; + gint16 simd_intrins = -1; + + PackedSimdIntrinsicInfo *info = lookup_packedsimd_intrinsic (cmethod->name, csignature->params[0]); + + if (info && info->interp_opcode && info->simd_intrins) { + simd_opcode = info->interp_opcode; + simd_intrins = info->simd_intrins; + // g_print ("%s %d -> %s %d %s\n", info->name, info->arg_type, mono_interp_opname (simd_opcode), simd_intrins, info->intrinsic_name); + } else { + g_warning ("MONO interpreter: Unimplemented method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name); // If we're missing a packedsimd method but the packedsimd method was AOT'd, we can // just let the interpreter generate a native call to the AOT method instead of // generating an NIY that will halt execution + // FIXME: Should we remove this now that the interpreter supports all of the methods? ERROR_DECL (error); gpointer addr = mono_aot_get_method (cmethod, error); if (addr) @@ -646,109 +793,6 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature goto opcode_added; } - gint16 simd_opcode = -1; - gint16 simd_intrins = -1; - - MonoTypeEnum atype; - int scalar_arg; - if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, NULL, &scalar_arg)) - return FALSE; - - switch (id) { - case SN_Splat: { - simd_opcode = MINT_SIMD_INTRINS_P_P; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SPLAT, FALSE); - break; - } - case SN_Swizzle: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_SWIZZLE; - break; - } - case SN_Add: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_ADD, FALSE); - break; - } - case SN_Subtract: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SUBTRACT, FALSE); - break; - } - case SN_Multiply: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_MULTIPLY, FALSE); - break; - } - case SN_Dot: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I32X4_DOT_I16X8; - break; - } - case SN_Negate: { - simd_opcode = MINT_SIMD_INTRINS_P_P; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_NEGATE, FALSE); - break; - } - case SN_ShiftLeft: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTLEFT, FALSE); - break; - } - case SN_ShiftRightArithmetic: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTARITHMETIC, FALSE); - break; - } - case SN_ShiftRightLogical: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTLOGICAL, FALSE); - break; - } - case SN_And: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_AND; - break; - } - case SN_Bitmask: { - simd_opcode = MINT_SIMD_INTRINS_P_P; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_BITMASK, FALSE); - break; - } - case SN_CompareEqual: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, TRUE); - break; - } - case SN_CompareNotEqual: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, TRUE); - break; - } - case SN_ConvertNarrowingSignedSaturate: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - if (atype == MONO_TYPE_I1) - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_S; - else if (atype == MONO_TYPE_I2) - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_S; - break; - } - case SN_ConvertNarrowingUnsignedSaturate: { - simd_opcode = MINT_SIMD_INTRINS_P_PP; - if (atype == MONO_TYPE_U1) - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_U; - else if (atype == MONO_TYPE_U2) - simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U; - break; - } - default: - return FALSE; - } - - if (simd_opcode == -1 || simd_intrins == -1) { - return FALSE; - } - interp_add_ins (td, simd_opcode); td->last_ins->data [0] = simd_intrins; #else // HOST_BROWSER @@ -756,7 +800,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature #endif // HOST_BROWSER opcode_added: - emit_common_simd_epilogue (td, vector_klass, csignature, vector_size); + emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, TRUE); return TRUE; } diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index f416818ca7358..ce047b2677bcf 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -403,6 +403,15 @@ create_interp_local_explicit (TransformData *td, MonoType *type, int size) } +static void +create_interp_dummy_var (TransformData *td) +{ + g_assert (td->dummy_var < 0); + td->dummy_var = create_interp_local_explicit (td, m_class_get_byval_arg (mono_defaults.void_class), 8); + td->locals [td->dummy_var].offset = 0; + td->locals [td->dummy_var].flags = INTERP_LOCAL_FLAG_GLOBAL; +} + static int get_tos_offset (TransformData *td) { @@ -1281,11 +1290,6 @@ interp_get_icall_sig (MonoMethodSignature *sig); static void interp_generate_icall_throw (TransformData *td, MonoJitICallInfo *icall_info, gpointer arg1, gpointer arg2) { - // Allocate dreg for call, only void calls are supported - push_simple_type (td, STACK_TYPE_I4); - td->sp--; - int dummy_dreg = td->sp [0].local; - int num_args = icall_info->sig->param_count; if (num_args > 0) emit_ldptr (td, arg1); @@ -1295,7 +1299,7 @@ interp_generate_icall_throw (TransformData *td, MonoJitICallInfo *icall_info, gp td->sp -= num_args; interp_add_ins (td, MINT_ICALL); - interp_ins_set_dreg (td->last_ins, dummy_dreg); + interp_ins_set_dummy_dreg (td->last_ins, td); interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); td->last_ins->data [0] = interp_get_icall_sig (icall_info->sig); td->last_ins->data [1] = get_data_item_index (td, (gpointer)icall_info->func); @@ -11046,6 +11050,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG td->mem_manager = m_method_get_mem_manager (method); td->n_data_items = 0; td->max_data_items = 0; + td->dummy_var = -1; td->data_items = NULL; td->data_hash = g_hash_table_new (NULL, NULL); #ifdef ENABLE_EXPERIMENT_TIERED diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index fcc51c25f9c44..7ff3f503a9fd4 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -224,6 +224,7 @@ typedef struct gint32 total_locals_size; gint32 max_stack_size; InterpLocal *locals; + int dummy_var; int *local_ref_count; unsigned int il_locals_offset; unsigned int il_locals_size; @@ -284,6 +285,11 @@ typedef struct #define STACK_TYPE_I STACK_TYPE_I4 #endif +#define interp_ins_set_dummy_dreg(ins,td) do { \ + if (td->dummy_var < 0) \ + create_interp_dummy_var (td); \ + ins->dreg = td->dummy_var; \ +} while (0) #define interp_ins_set_dreg(ins,dr) do { \ ins->dreg = dr; \ diff --git a/src/mono/wasm/runtime/genmintops.py b/src/mono/wasm/runtime/genmintops.py index 604c870ae0fd8..33c370690f8c0 100755 --- a/src/mono/wasm/runtime/genmintops.py +++ b/src/mono/wasm/runtime/genmintops.py @@ -22,9 +22,9 @@ tab = " " header_lines = src.read().splitlines() # strip preprocessing directives -simd_header_lines = (l for l in simd_src.read().splitlines() if not l.startswith("#")) +simd_header_lines = (l for l in simd_src.read().splitlines() if not (l.startswith("#") or l.startswith("//"))) # strip preprocessing directives and add indentation for tslint/eslint -header = "\n".join((tab + l) for l in header_lines if not l.startswith("#")) +header = "\n".join((tab + l) for l in header_lines if not (l.startswith("#") or l.startswith("//"))) src.close() simd_src.close() @@ -42,10 +42,13 @@ "INTERP_SIMD_INTRINSIC_P_PPP": simd_values_3, "INTERP_WASM_SIMD_INTRINSIC_V_P": simd_values_1, "INTERP_WASM_SIMD_INTRINSIC_V_V": simd_values_1, + "INTERP_WASM_SIMD_INTRINSIC_V_C1": simd_values_1, "INTERP_WASM_SIMD_INTRINSIC_I_V": simd_values_1, "INTERP_WASM_SIMD_INTRINSIC_V_VV": simd_values_2, + "INTERP_WASM_SIMD_INTRINSIC_V_C2": simd_values_2, "INTERP_WASM_SIMD_INTRINSIC_V_VI": simd_values_2, "INTERP_WASM_SIMD_INTRINSIC_V_VVV": simd_values_3, + "INTERP_WASM_SIMD_INTRINSIC_V_C3": simd_values_3, } for line in simd_header_lines: @@ -53,7 +56,11 @@ idx2 = line.index(",") if "," in line else None if (idx1 and idx2): key = line[0:idx1].strip() - simd_disp[key].append(line[(idx1 + 1):idx2].strip().replace("INTERP_SIMD_INTRINSIC_", "")) + vals = line[(idx1 + 1):].strip().split(",") + id = vals[0].replace("INTERP_SIMD_INTRINSIC_", "").strip() + if (len(vals) == 4): + id += vals[1].strip() + simd_disp[key].append(id) splitter = ",\n " splitter2 = ",\n " diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts index 742ae737a29c8..6260174b8d343 100644 --- a/src/mono/wasm/runtime/jiterpreter-support.ts +++ b/src/mono/wasm/runtime/jiterpreter-support.ts @@ -635,9 +635,21 @@ export class WasmBuilder { exportCount++; this.beginFunction(func.typeName, func.locals); - func.blob = func.generator(); - if (!func.blob) - func.blob = this.endFunction(false); + try { + func.blob = func.generator(); + } finally { + // If func.generator failed due to an error or didn't return a blob, we want + // to call endFunction to pop the stack and create the blob automatically. + // We may be in the middle of handling an exception so don't let this automatic + // logic throw and suppress the original exception being handled + try { + if (!func.blob) + func.blob = this.endFunction(false); + } catch { + // eslint-disable-next-line @typescript-eslint/no-extra-semi + ; + } + } } this._generateImportSection(includeFunctionTable); diff --git a/src/mono/wasm/runtime/jiterpreter-tables.ts b/src/mono/wasm/runtime/jiterpreter-tables.ts index 0fef162c42109..c3f4b42f57eae 100644 --- a/src/mono/wasm/runtime/jiterpreter-tables.ts +++ b/src/mono/wasm/runtime/jiterpreter-tables.ts @@ -2,7 +2,7 @@ import { WasmOpcode, WasmSimdOpcode, JiterpSpecialOpcode } from "./jiterpreter-opcodes"; import { - MintOpcode, SimdIntrinsic2, SimdIntrinsic3 + MintOpcode, SimdIntrinsic2, SimdIntrinsic3, SimdIntrinsic4 } from "./mintops"; export const ldcTable: { [opcode: number]: [WasmOpcode, number] } = { @@ -357,14 +357,57 @@ export const simdShiftTable = new Set([ SimdIntrinsic3.V128_I8_URIGHT_SHIFT, ]); -export const bitmaskTable : { [intrinsic: number]: WasmSimdOpcode } = { +export const simdExtractTable: { [intrinsic: number]: [laneCount: number, laneStoreOpcode: WasmOpcode] } = { + [SimdIntrinsic3.ExtractLaneI1]: [16, WasmOpcode.i32_store], + [SimdIntrinsic3.ExtractLaneU1]: [16, WasmOpcode.i32_store], + [SimdIntrinsic3.ExtractLaneI2]: [8, WasmOpcode.i32_store], + [SimdIntrinsic3.ExtractLaneU2]: [8, WasmOpcode.i32_store], + [SimdIntrinsic3.ExtractLaneD4]: [4, WasmOpcode.i32_store], + [SimdIntrinsic3.ExtractLaneR4]: [4, WasmOpcode.f32_store], + [SimdIntrinsic3.ExtractLaneD8]: [2, WasmOpcode.i64_store], + [SimdIntrinsic3.ExtractLaneR8]: [2, WasmOpcode.f64_store], +}; + +export const simdReplaceTable: { [intrinsic: number]: [laneCount: number, laneLoadOpcode: WasmOpcode] } = { + [SimdIntrinsic4.ReplaceLaneD1]: [16, WasmOpcode.i32_load], + [SimdIntrinsic4.ReplaceLaneD2]: [8, WasmOpcode.i32_load], + [SimdIntrinsic4.ReplaceLaneD4]: [4, WasmOpcode.i32_load], + [SimdIntrinsic4.ReplaceLaneR4]: [4, WasmOpcode.f32_load], + [SimdIntrinsic4.ReplaceLaneD8]: [2, WasmOpcode.i64_load], + [SimdIntrinsic4.ReplaceLaneR8]: [2, WasmOpcode.f64_load], +}; + +export const simdLoadTable = new Set([ + SimdIntrinsic2.LoadVector128ANY, + SimdIntrinsic2.LoadScalarAndSplatVector128X1, + SimdIntrinsic2.LoadScalarAndSplatVector128X2, + SimdIntrinsic2.LoadScalarAndSplatVector128X4, + SimdIntrinsic2.LoadScalarAndSplatVector128X8, + SimdIntrinsic2.LoadScalarVector128X4, + SimdIntrinsic2.LoadScalarVector128X8, + SimdIntrinsic2.LoadWideningVector128I1, + SimdIntrinsic2.LoadWideningVector128U1, + SimdIntrinsic2.LoadWideningVector128I2, + SimdIntrinsic2.LoadWideningVector128U2, + SimdIntrinsic2.LoadWideningVector128I4, + SimdIntrinsic2.LoadWideningVector128U4, +]); + +export const simdStoreTable: { [intrinsic: number]: [laneCount: number] } = { + [SimdIntrinsic4.StoreSelectedScalarX1]: [16], + [SimdIntrinsic4.StoreSelectedScalarX2]: [8], + [SimdIntrinsic4.StoreSelectedScalarX4]: [4], + [SimdIntrinsic4.StoreSelectedScalarX8]: [2], +}; + +export const bitmaskTable: { [intrinsic: number]: WasmSimdOpcode } = { [SimdIntrinsic2.V128_I1_EXTRACT_MSB]: WasmSimdOpcode.i8x16_bitmask, [SimdIntrinsic2.V128_I2_EXTRACT_MSB]: WasmSimdOpcode.i16x8_bitmask, [SimdIntrinsic2.V128_I4_EXTRACT_MSB]: WasmSimdOpcode.i32x4_bitmask, [SimdIntrinsic2.V128_I8_EXTRACT_MSB]: WasmSimdOpcode.i64x2_bitmask, }; -export const createScalarTable : { [intrinsic: number]: [WasmOpcode, WasmSimdOpcode] } = { +export const createScalarTable: { [intrinsic: number]: [WasmOpcode, WasmSimdOpcode] } = { [SimdIntrinsic2.V128_I1_CREATE_SCALAR]: [WasmOpcode.i32_load8_s, WasmSimdOpcode.i8x16_replace_lane], [SimdIntrinsic2.V128_I2_CREATE_SCALAR]: [WasmOpcode.i32_load16_s, WasmSimdOpcode.i16x8_replace_lane], [SimdIntrinsic2.V128_I4_CREATE_SCALAR]: [WasmOpcode.i32_load, WasmSimdOpcode.i32x4_replace_lane], diff --git a/src/mono/wasm/runtime/jiterpreter-trace-generator.ts b/src/mono/wasm/runtime/jiterpreter-trace-generator.ts index ea16a95230124..d6b2d8d6b6b98 100644 --- a/src/mono/wasm/runtime/jiterpreter-trace-generator.ts +++ b/src/mono/wasm/runtime/jiterpreter-trace-generator.ts @@ -49,6 +49,8 @@ import { simdCreateLoadOps, simdCreateSizes, simdCreateStoreOps, simdShiftTable, bitmaskTable, createScalarTable, + simdExtractTable, simdReplaceTable, + simdLoadTable, simdStoreTable, } from "./jiterpreter-tables"; import { mono_log_error, mono_log_info } from "./logging"; @@ -3496,10 +3498,19 @@ function append_simd_4_load(builder: WasmBuilder, ip: MintOpcodePtr) { function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic2): boolean { const simple = cwraps.mono_jiterp_get_simd_opcode(1, index); - if (simple) { - append_simd_2_load(builder, ip); - builder.appendSimd(simple); - append_simd_store(builder, ip); + if (simple >= 0) { + if (simdLoadTable.has(index)) { + // Indirect load, so v1 is T** and res is Vector128* + builder.local("pLocals"); + append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load); + builder.appendSimd(simple); + builder.appendMemarg(0, 0); + append_simd_store(builder, ip); + } else { + append_simd_2_load(builder, ip); + builder.appendSimd(simple); + append_simd_store(builder, ip); + } return true; } @@ -3554,14 +3565,34 @@ function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic3): boolean { const simple = cwraps.mono_jiterp_get_simd_opcode(2, index); - if (simple) { - const isShift = simdShiftTable.has(index); + if (simple >= 0) { + const isShift = simdShiftTable.has(index), + extractTup = simdExtractTable[index]; + if (isShift) { builder.local("pLocals"); append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.i32_load); builder.appendSimd(simple); append_simd_store(builder, ip); + } else if (Array.isArray(extractTup)) { + const lane = get_known_constant_value(builder, getArgU16(ip, 3)), + laneCount = extractTup[0]; + if (typeof (lane) !== "number") { + mono_log_error (`${builder.functions[0].name}: Non-constant lane index passed to ExtractLane`); + return false; + } else if ((lane >= laneCount) || (lane < 0)) { + mono_log_error (`${builder.functions[0].name}: ExtractLane index ${lane} out of range (0 - ${laneCount - 1})`); + return false; + } + + // load vec onto stack and then emit extract + lane imm + builder.local("pLocals"); + append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); + builder.appendSimd(simple); + builder.appendU8(lane); + // Store using the opcode from the tuple + append_stloc_tail(builder, getArgU16(ip, 1), extractTup[1]); } else { append_simd_3_load(builder, ip); builder.appendSimd(simple); @@ -3571,6 +3602,13 @@ function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins } switch (index) { + case SimdIntrinsic3.StoreANY: + // Indirect store where args are [V128**, V128*] + append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load); + append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); + builder.appendSimd(WasmSimdOpcode.v128_store); + builder.appendMemarg(0, 0); + return true; case SimdIntrinsic3.V128_BITWISE_EQUALITY: case SimdIntrinsic3.V128_BITWISE_INEQUALITY: append_simd_3_load(builder, ip); @@ -3682,10 +3720,49 @@ function emit_shuffle(builder: WasmBuilder, ip: MintOpcodePtr, elementCount: num function emit_simd_4(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic4): boolean { const simple = cwraps.mono_jiterp_get_simd_opcode(3, index); - if (simple) { - append_simd_4_load(builder, ip); - builder.appendSimd(simple); - append_simd_store(builder, ip); + if (simple >= 0) { + // [lane count, value load opcode] + const rtup = simdReplaceTable[index], + stup = simdStoreTable[index]; + if (Array.isArray(rtup)) { + const laneCount = rtup[0], + lane = get_known_constant_value(builder, getArgU16(ip, 3)); + if (typeof (lane) !== "number") { + mono_log_error (`${builder.functions[0].name}: Non-constant lane index passed to ReplaceLane`); + return false; + } else if ((lane >= laneCount) || (lane < 0)) { + mono_log_error (`${builder.functions[0].name}: ReplaceLane index ${lane} out of range (0 - ${laneCount - 1})`); + return false; + } + + // arrange stack as [vec, value] and then write replace + lane imm + builder.local("pLocals"); + append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); + append_ldloc(builder, getArgU16(ip, 4), rtup[1]); + builder.appendSimd(simple); + builder.appendU8(lane); + append_simd_store(builder, ip); + } else if (Array.isArray(stup)) { + // Indirect store where args are [Scalar**, V128*] + const laneCount = stup[0], + lane = get_known_constant_value(builder, getArgU16(ip, 4)); + if (typeof (lane) !== "number") { + mono_log_error (`${builder.functions[0].name}: Non-constant lane index passed to store method`); + return false; + } else if ((lane >= laneCount) || (lane < 0)) { + mono_log_error (`${builder.functions[0].name}: Store lane ${lane} out of range (0 - ${laneCount - 1})`); + return false; + } + append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load); + append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); + builder.appendSimd(simple); + builder.appendMemarg(0, 0); + builder.appendU8(lane); + } else { + append_simd_4_load(builder, ip); + builder.appendSimd(simple); + append_simd_store(builder, ip); + } return true; } @@ -3700,6 +3777,28 @@ function emit_simd_4(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins builder.appendSimd(WasmSimdOpcode.v128_bitselect); append_simd_store(builder, ip); return true; + case SimdIntrinsic4.ShuffleD1: { + const indices = get_known_constant_value(builder, getArgU16(ip, 4)); + if (typeof (indices) !== "object") { + mono_log_error (`${builder.functions[0].name}: Non-constant indices passed to PackedSimd.Shuffle`); + return false; + } + for (let i = 0; i < 32; i++) { + const lane = indices[i]; + if ((lane < 0) || (lane > 31)) { + mono_log_error (`${builder.functions[0].name}: Shuffle lane index #${i} (${lane}) out of range (0 - 31)`); + return false; + } + } + + builder.local("pLocals"); + append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); + append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); + builder.appendSimd(WasmSimdOpcode.i8x16_shuffle); + builder.appendBytes(indices); + append_simd_store(builder, ip); + return true; + } default: return false; }