diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def
index 68f535c70fc49..ba317d6a13d24 100644
--- a/src/mono/mono/mini/interp/interp-simd-intrins.def
+++ b/src/mono/mono/mini/interp/interp-simd-intrins.def
@@ -1,34 +1,46 @@
 // FIXME: SIMD causes compile errors on WASI
 #ifdef HOST_BROWSER
 #ifndef INTERP_WASM_SIMD_INTRINSIC_V_P
-#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode)
 #endif
 #ifndef INTERP_WASM_SIMD_INTRINSIC_V_V
-#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_C1
+#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, c_function, wasm_opcode)
 #endif
 #ifndef INTERP_WASM_SIMD_INTRINSIC_I_V
-#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode)
 #endif
 #ifndef INTERP_WASM_SIMD_INTRINSIC_V_VV
-#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(id, _mono_interp_simd_ ## id, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode)
 #endif
 #ifndef INTERP_WASM_SIMD_INTRINSIC_V_VI
-#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(id, _mono_interp_simd_ ## id, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_C2
+#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, c_function, wasm_opcode)
 #endif
 #ifndef INTERP_WASM_SIMD_INTRINSIC_V_VVV
-#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(id, _mono_interp_simd_ ## id, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, _mono_interp_simd_ ## c_intrinsic, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_C3
+#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(INTERP_SIMD_INTRINSIC_ ## name ## arg1, c_function, wasm_opcode)
 #endif
 #else // HOST_BROWSER
-#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode)
-#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode)
-#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode)
-#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode)
-#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode)
-#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode)
 #endif // HOST_BROWSER
 
 // The third argument is the wasm opcode that corresponds to this simd intrinsic, if any.
-// Specify 0 if there is no exact 1:1 mapping (the opcode can still be implemented manually in the jiterpreter.)
+// Specify -1 if there is no exact 1:1 mapping (the opcode can still be implemented manually in the jiterpreter.)
 
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_ADD, interp_v128_i1_op_addition, 110)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_ADD, interp_v128_i2_op_addition, 142)
@@ -43,12 +55,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_SUB, interp_v128_r4_op
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_AND, interp_v128_op_bitwise_and, 78)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_op_bitwise_or, 80)
 
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, 0)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1)
 
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81)
 
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply, -1)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_MULTIPLY, interp_v128_i2_op_multiply, 149)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_MULTIPLY, interp_v128_i4_op_multiply, 181)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_r4_op_multiply, 230)
@@ -92,23 +104,23 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_EQUALS, interp_v128_i4
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_EQUALS, interp_v128_r4_equals, 65)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_EQUALS, interp_v128_i8_equals, 214)
 
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar, -1)
 
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb, -1)
 
 // wasm opcode is 0 because it has a different calling convention
-INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select, 0)
+INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select, -1)
 
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create, 0)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create, -1)
 
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_AND_NOT, interp_v128_and_not, 79)
 
@@ -116,72 +128,259 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U2_LESS_THAN_EQUAL, inter
 
 // wasm only has a swizzle opcode for i8x16, none of the others
 // jiterp has special handling for i1 shuffles to secure a v8 optimization
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle, 0)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle, 0)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle, 0)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle, -1)
 
 // Wasm PackedSimd (see PackedSimd.cs)
 // We automatically generate C wrappers around clang's wasm simd intrinsics for each of these intrinsics
-// The 2nd argument is the name of the clang intrinsic and the 3rd argument is the wasm opcode.
-
-INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I8X16_SPLAT, wasm_v128_load8_splat, 0x07)
-INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I16X8_SPLAT, wasm_v128_load16_splat, 0x08)
-INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I32X4_SPLAT, wasm_v128_load32_splat, 0x09)
-INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I64X2_SPLAT, wasm_v128_load64_splat, 0x0a)
-// FIXME: ExtractLane and ReplaceLane
-// FIXME: Shuffle
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_SWIZZLE, wasm_i8x16_swizzle, 0x0e)
-// FIXME: f32/f64 versions of add/subtract/multiply/negate are missing
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_ADD, wasm_i8x16_add, 0x6e)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_ADD, wasm_i16x8_add, 0x8e)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_ADD, wasm_i32x4_add, 0xae)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_ADD, wasm_i64x2_add, 0xce)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_SUBTRACT, wasm_i8x16_sub, 0x71)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_SUBTRACT, wasm_i16x8_sub, 0x91)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_SUBTRACT, wasm_i32x4_sub, 0xb1)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_SUBTRACT, wasm_i64x2_sub, 0xd1)
+// Last two arguments are the name of the clang C intrinsic and the wasm simd opcode for the operation.
+// The first argument is the name of the corresponding method(s) and must be a case sensitive match.
+// The second argument specifies the type(s) that are valid in the first argument slot for the method.
+// ANY = all types, X4 == 4-byte type (I4/U4/R4), D4 == 4-byte integer (I4/U4), and you can use I/R/U
+// For the VV and VVV versions, all the relevant signatures are symmetric, so we only specify one type,
+//  i.e. 'V_VV(Add, D1)' -> Add(Vector128<byte>, Vector128<byte>).
+// For VI the second arg is always Int32 so that is omitted as well.
+// The V_C1/C2/C3 variants use hand-written C functions with the interp intrinsic calling convention.
+// Method names here are case sensitive and must match the names in PackedSimd.cs.
+
+INTERP_WASM_SIMD_INTRINSIC_V_P  (Splat, X1, wasm_v128_load8_splat, 0x07)
+INTERP_WASM_SIMD_INTRINSIC_V_P  (Splat, X2, wasm_v128_load16_splat, 0x08)
+INTERP_WASM_SIMD_INTRINSIC_V_P  (Splat, X4, wasm_v128_load32_splat, 0x09)
+INTERP_WASM_SIMD_INTRINSIC_V_P  (Splat, X8, wasm_v128_load64_splat, 0x0a)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, I1, interp_packedsimd_extractlane_i1, 0x15)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, U1, interp_packedsimd_extractlane_u1, 0x16)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, I2, interp_packedsimd_extractlane_i2, 0x18)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, U2, interp_packedsimd_extractlane_u2, 0x19)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, D4, interp_packedsimd_extractlane_i4, 0x1b)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, D8, interp_packedsimd_extractlane_i8, 0x1d)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, R4, interp_packedsimd_extractlane_r4, 0x1f)
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (ExtractLane, R8, interp_packedsimd_extractlane_r8, 0x21)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D1, interp_packedsimd_replacelane_i1, 0x17)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D2, interp_packedsimd_replacelane_i2, 0x1a)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D4, interp_packedsimd_replacelane_i4, 0x1c)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, D8, interp_packedsimd_replacelane_i8, 0x1e)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, R4, interp_packedsimd_replacelane_r4, 0x20)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (ReplaceLane, R8, interp_packedsimd_replacelane_r8, 0x22)
+// Shuffle has a dedicated implementation in the jiterpreter so the wasm opcode is -1
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (Shuffle, D1, interp_packedsimd_shuffle, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Swizzle, D1, wasm_i8x16_swizzle, 0x0e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D1, wasm_i8x16_add, 0x6e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D2, wasm_i16x8_add, 0x8e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D4, wasm_i32x4_add, 0xae)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, D8, wasm_i64x2_add, 0xce)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, R4, wasm_f32x4_add, 0xe4)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Add, R8, wasm_f64x2_add, 0xf0)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D1, wasm_i8x16_sub, 0x71)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D2, wasm_i16x8_sub, 0x91)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D4, wasm_i32x4_sub, 0xb1)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, D8, wasm_i64x2_sub, 0xd1)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, R4, wasm_f32x4_sub, 0xe5)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Subtract, R8, wasm_f64x2_sub, 0xf1)
 // There is no i8x16 mul opcode
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_MULTIPLY, _interp_wasm_simd_assert_not_reached, 0x0)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_MULTIPLY, wasm_i16x8_mul, 0x95)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_MULTIPLY, wasm_i32x4_mul, 0xb5)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_MULTIPLY, wasm_i64x2_mul, 0xd5)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_DOT_I16X8, wasm_i32x4_dot_i16x8, 0xba)
-INTERP_WASM_SIMD_INTRINSIC_V_V  (INTERP_SIMD_INTRINSIC_WASM_I8X16_NEGATE, wasm_i8x16_neg, 0x61)
-INTERP_WASM_SIMD_INTRINSIC_V_V  (INTERP_SIMD_INTRINSIC_WASM_I16X8_NEGATE, wasm_i16x8_neg, 0x81)
-INTERP_WASM_SIMD_INTRINSIC_V_V  (INTERP_SIMD_INTRINSIC_WASM_I32X4_NEGATE, wasm_i32x4_neg, 0xa1)
-INTERP_WASM_SIMD_INTRINSIC_V_V  (INTERP_SIMD_INTRINSIC_WASM_I64X2_NEGATE, wasm_i64x2_neg, 0xc1)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTLEFT, wasm_i8x16_shl, 0x6b)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTLEFT, wasm_i16x8_shl, 0x8b)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTLEFT, wasm_i32x4_shl, 0xab)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTLEFT, wasm_i64x2_shl, 0xcb)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTARITHMETIC, wasm_i8x16_shr, 0x6c)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTRIGHTARITHMETIC, wasm_i16x8_shr, 0x8c)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTRIGHTARITHMETIC, wasm_i32x4_shr, 0xac)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTRIGHTARITHMETIC, wasm_i64x2_shr, 0xcc)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTLOGICAL, wasm_u8x16_shr, 0x6d)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTRIGHTLOGICAL, wasm_u16x8_shr, 0x8d)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTRIGHTLOGICAL, wasm_u32x4_shr, 0xad)
-INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTRIGHTLOGICAL, wasm_u64x2_shr, 0xcd)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_AND, wasm_v128_and, 0x4e)
-// FIXME: NOT, OR, XOR
-INTERP_WASM_SIMD_INTRINSIC_I_V  (INTERP_SIMD_INTRINSIC_WASM_I8X16_BITMASK, wasm_i8x16_bitmask, 0x64)
-INTERP_WASM_SIMD_INTRINSIC_I_V  (INTERP_SIMD_INTRINSIC_WASM_I16X8_BITMASK, wasm_i16x8_bitmask, 0x84)
-INTERP_WASM_SIMD_INTRINSIC_I_V  (INTERP_SIMD_INTRINSIC_WASM_I32X4_BITMASK, wasm_i32x4_bitmask, 0xa4)
-INTERP_WASM_SIMD_INTRINSIC_I_V  (INTERP_SIMD_INTRINSIC_WASM_I64X2_BITMASK, wasm_i64x2_bitmask, 0xc4)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, wasm_i8x16_eq, 0x23)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPAREEQUAL, wasm_i16x8_eq, 0x2d)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPAREEQUAL, wasm_i32x4_eq, 0x37)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPAREEQUAL, wasm_i64x2_eq, 0xd6)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPAREEQUAL, wasm_f32x4_eq, 0x41)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPAREEQUAL, wasm_f64x2_eq, 0x47)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, wasm_i8x16_ne, 0x24)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPARENOTEQUAL, wasm_i16x8_ne, 0x2e)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPARENOTEQUAL, wasm_i32x4_ne, 0x38)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPARENOTEQUAL, wasm_i64x2_ne, 0xd7)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPARENOTEQUAL, wasm_f32x4_ne, 0x42)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPARENOTEQUAL, wasm_f64x2_ne, 0x48)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_S, wasm_i8x16_narrow_i16x8, 0x65)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_S, wasm_i16x8_narrow_i32x4, 0x85)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_U, wasm_u8x16_narrow_i16x8, 0x66)
-INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U, wasm_u16x8_narrow_i32x4, 0x86)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, D2, wasm_i16x8_mul, 0x95)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, D4, wasm_i32x4_mul, 0xb5)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, D8, wasm_i64x2_mul, 0xd5)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, R4, wasm_f32x4_mul, 0xe6)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Multiply, R8, wasm_f64x2_mul, 0xf2)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Divide, R4, wasm_f32x4_div, 0xe7)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Divide, R8, wasm_f64x2_div, 0xf3)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Dot, I2, wasm_i32x4_dot_i16x8, 0xba)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Negate, D1, wasm_i8x16_neg, 0x61)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Negate, D2, wasm_i16x8_neg, 0x81)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Negate, D4, wasm_i32x4_neg, 0xa1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Negate, D8, wasm_i64x2_neg, 0xc1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Negate, R4, wasm_f32x4_neg, 0xe1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Negate, R8, wasm_f64x2_neg, 0xed)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Sqrt, R4, wasm_f32x4_sqrt, 0xe3)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Sqrt, R8, wasm_f64x2_sqrt, 0xef)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Ceiling, R4, wasm_f32x4_ceil, 0x67)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Ceiling, R8, wasm_f64x2_ceil, 0x74)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Floor, R4, wasm_f32x4_floor, 0x68)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Floor, R8, wasm_f64x2_floor, 0x75)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Truncate, R4, wasm_f32x4_trunc, 0x69)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Truncate, R8, wasm_f64x2_trunc, 0x7a)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (RoundToNearest, R4, wasm_f32x4_nearest, 0x6a)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (RoundToNearest, R8, wasm_f64x2_nearest, 0x94)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D1, wasm_i8x16_shl, 0x6b)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D2, wasm_i16x8_shl, 0x8b)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D4, wasm_i32x4_shl, 0xab)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftLeft, D8, wasm_i64x2_shl, 0xcb)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D1, wasm_i8x16_shr, 0x6c)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D2, wasm_i16x8_shr, 0x8c)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D4, wasm_i32x4_shr, 0xac)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightArithmetic, D8, wasm_i64x2_shr, 0xcc)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D1, wasm_u8x16_shr, 0x6d)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D2, wasm_u16x8_shr, 0x8d)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D4, wasm_u32x4_shr, 0xad)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (ShiftRightLogical, D8, wasm_u64x2_shr, 0xcd)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (And, ANY, wasm_v128_and, 0x4e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AndNot, ANY, wasm_v128_andnot, 0x4f)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Not, ANY, wasm_v128_not, 0x4d)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Or,  ANY, wasm_v128_or,  0x50)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Xor, ANY, wasm_v128_xor, 0x51)
+INTERP_WASM_SIMD_INTRINSIC_V_VVV(BitwiseSelect, ANY, wasm_v128_bitselect, 0x52)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (AnyTrue, ANY, wasm_v128_any_true, 0x53)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (AllTrue, D1, wasm_i8x16_all_true, 0x63)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (AllTrue, D2, wasm_i16x8_all_true, 0x83)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (AllTrue, D4, wasm_i32x4_all_true, 0xa3)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (AllTrue, D8, wasm_i64x2_all_true, 0xc3)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (PopCount, U1, wasm_i8x16_popcnt, 0x62)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (Bitmask, D1, wasm_i8x16_bitmask, 0x64)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (Bitmask, D2, wasm_i16x8_bitmask, 0x84)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (Bitmask, D4, wasm_i32x4_bitmask, 0xa4)
+INTERP_WASM_SIMD_INTRINSIC_I_V  (Bitmask, D8, wasm_i64x2_bitmask, 0xc4)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D1, wasm_i8x16_eq, 0x23)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D2, wasm_i16x8_eq, 0x2d)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D4, wasm_i32x4_eq, 0x37)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, D8, wasm_i64x2_eq, 0xd6)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, R4, wasm_f32x4_eq, 0x41)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareEqual, R8, wasm_f64x2_eq, 0x47)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D1, wasm_i8x16_ne, 0x24)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D2, wasm_i16x8_ne, 0x2e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D4, wasm_i32x4_ne, 0x38)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, D8, wasm_i64x2_ne, 0xd7)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, R4, wasm_f32x4_ne, 0x42)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareNotEqual, R8, wasm_f64x2_ne, 0x48)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I1, wasm_i8x16_lt, 0x25)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U1, wasm_u8x16_lt, 0x26)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I2, wasm_i16x8_lt, 0x2f)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U2, wasm_u16x8_lt, 0x30)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I4, wasm_i32x4_lt, 0x39)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U4, wasm_u32x4_lt, 0x3a)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, I8, wasm_i64x2_lt, 0xd8)
+// INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, U8, wasm_u64x2_lt, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, R4, wasm_f32x4_lt, 0x43)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThan, R8, wasm_f64x2_lt, 0x49)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I1, wasm_i8x16_le, 0x29)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U1, wasm_u8x16_le, 0x2a)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I2, wasm_i16x8_le, 0x33)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U2, wasm_u16x8_le, 0x34)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I4, wasm_i32x4_le, 0x3d)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U4, wasm_u32x4_le, 0x3e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, I8, wasm_i64x2_le, 0xda)
+// INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, U8, wasm_u64x2_le, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, R4, wasm_f32x4_le, 0x45)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareLessThanOrEqual, R8, wasm_f64x2_le, 0x4b)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I1, wasm_i8x16_gt, 0x27)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, U1, wasm_u8x16_gt, 0x28)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I2, wasm_i16x8_gt, 0x31)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, U2, wasm_u16x8_gt, 0x32)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I4, wasm_i32x4_gt, 0x3b)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, U4, wasm_u32x4_gt, 0x3c)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, I8, wasm_i64x2_gt, 0xd9)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, R4, wasm_f32x4_gt, 0x44)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThan, R8, wasm_f64x2_gt, 0x4a)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I1, wasm_i8x16_ge, 0x2b)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, U1, wasm_u8x16_ge, 0x2c)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I2, wasm_i16x8_ge, 0x35)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, U2, wasm_u16x8_ge, 0x36)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I4, wasm_i32x4_ge, 0x3f)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, U4, wasm_u32x4_ge, 0x40)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, I8, wasm_i64x2_ge, 0xdb)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, R4, wasm_f32x4_ge, 0x46)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (CompareGreaterThanOrEqual, R8, wasm_f64x2_ge, 0x4c)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingSignedSaturate, I2, wasm_i8x16_narrow_i16x8, 0x65)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingSignedSaturate, I4, wasm_i16x8_narrow_i32x4, 0x85)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingUnsignedSaturate, I2, wasm_u8x16_narrow_i16x8, 0x66)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (ConvertNarrowingUnsignedSaturate, I4, wasm_u16x8_narrow_i32x4, 0x86)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, I1, wasm_i16x8_extmul_low_i8x16, 0x9c)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, I2, wasm_i32x4_extmul_low_i16x8, 0xbc)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, I4, wasm_i64x2_extmul_low_i32x4, 0xdc)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, U1, wasm_u16x8_extmul_low_u8x16, 0x9e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, U2, wasm_u32x4_extmul_low_u16x8, 0xbe)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningLower, U4, wasm_u64x2_extmul_low_u32x4, 0xde)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, I1, wasm_i16x8_extmul_high_i8x16, 0x9d)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, I2, wasm_i32x4_extmul_high_i16x8, 0xbd)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, I4, wasm_i64x2_extmul_high_i32x4, 0xdd)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, U1, wasm_u16x8_extmul_high_u8x16, 0x9f)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, U2, wasm_u32x4_extmul_high_u16x8, 0xbf)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyWideningUpper, U4, wasm_u64x2_extmul_high_u32x4, 0xdf)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (AddPairwiseWidening, I1, wasm_i16x8_extadd_pairwise_i8x16, 0x7c)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (AddPairwiseWidening, U1, wasm_u16x8_extadd_pairwise_u8x16, 0x7d)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (AddPairwiseWidening, I2, wasm_i32x4_extadd_pairwise_i16x8, 0x7e)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (AddPairwiseWidening, U2, wasm_u32x4_extadd_pairwise_u16x8, 0x7f)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, I1, wasm_i8x16_add_sat, 0x6f)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, U1, wasm_u8x16_add_sat, 0x70)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, I2, wasm_i16x8_add_sat, 0x8f)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AddSaturate, U2, wasm_u16x8_add_sat, 0x90)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, I1, wasm_i8x16_sub_sat, 0x72)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, U1, wasm_u8x16_sub_sat, 0x73)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, I2, wasm_i16x8_sub_sat, 0x92)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (SubtractSaturate, U2, wasm_u16x8_sub_sat, 0x93)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (MultiplyRoundedSaturateQ15, I2, wasm_i16x8_q15mulr_sat, 0x82)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, I1, wasm_i8x16_min, 0x76)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, I2, wasm_i16x8_min, 0x96)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, I4, wasm_i32x4_min, 0xb6)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, U1, wasm_u8x16_min, 0x77)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, U2, wasm_u16x8_min, 0x97)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, U4, wasm_u32x4_min, 0xb7)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, I1, wasm_i8x16_max, 0x78)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, I2, wasm_i16x8_max, 0x98)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, I4, wasm_i32x4_max, 0xb8)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, U1, wasm_u8x16_max, 0x79)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, U2, wasm_u16x8_max, 0x99)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, U4, wasm_u32x4_max, 0xb9)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AverageRounded, U1, wasm_u8x16_avgr, 0x7b)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (AverageRounded, U2, wasm_u16x8_avgr, 0x9b)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Abs, I1, wasm_i8x16_abs, 0x60)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Abs, I2, wasm_i16x8_abs, 0x80)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Abs, I4, wasm_i32x4_abs, 0xa0)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Abs, I8, wasm_i64x2_abs, 0xc0)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Abs, R4, wasm_f32x4_abs, 0xe0)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (Abs, R8, wasm_f64x2_abs, 0xec)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, R4, wasm_f32x4_min, 0xe8)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Min, R8, wasm_f64x2_min, 0xf4)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, R4, wasm_f32x4_max, 0xe9)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (Max, R8, wasm_f64x2_max, 0xf5)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMin, R4, wasm_f32x4_pmin, 0xea)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMin, R8, wasm_f64x2_pmin, 0xf6)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMax, R4, wasm_f32x4_pmax, 0xeb)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (PseudoMax, R8, wasm_f64x2_pmax, 0xf7)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToSingle, I4, wasm_f32x4_convert_i32x4, 0xfa)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToSingle, U4, wasm_f32x4_convert_u32x4, 0xfb)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToSingle, R8, wasm_f32x4_demote_f64x2_zero, 0x5e)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToDoubleLower, I4, wasm_f64x2_convert_low_i32x4, 0xfe)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToDoubleLower, U4, wasm_f64x2_convert_low_u32x4, 0xff)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToDoubleLower, R8, wasm_f64x2_promote_low_f32x4, 0x5f)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToInt32Saturate, R4, wasm_i32x4_trunc_sat_f32x4, 0xf8)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToUnsignedInt32Saturate, R4, wasm_u32x4_trunc_sat_f32x4, 0xf9)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToInt32Saturate, R8, wasm_i32x4_trunc_sat_f64x2_zero, 0xfc)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ConvertToUnsignedInt32Saturate, R8, wasm_u32x4_trunc_sat_f64x2_zero, 0xfd)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (SignExtendWideningLower, D1, wasm_i16x8_extend_low_i8x16, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (SignExtendWideningLower, D2, wasm_i32x4_extend_low_i16x8, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (SignExtendWideningLower, D4, wasm_i64x2_extend_low_i32x4, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (SignExtendWideningUpper, D1, wasm_i16x8_extend_high_i8x16, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (SignExtendWideningUpper, D2, wasm_i32x4_extend_high_i16x8, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (SignExtendWideningUpper, D4, wasm_i64x2_extend_high_i32x4, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ZeroExtendWideningLower, D1, wasm_u16x8_extend_low_u8x16, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ZeroExtendWideningLower, D2, wasm_u32x4_extend_low_u16x8, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ZeroExtendWideningLower, D4, wasm_u64x2_extend_low_u32x4, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ZeroExtendWideningUpper, D1, wasm_u16x8_extend_high_u8x16, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ZeroExtendWideningUpper, D2, wasm_u32x4_extend_high_u16x8, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_V  (ZeroExtendWideningUpper, D4, wasm_u64x2_extend_high_u32x4, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadVector128, ANY, interp_packedsimd_load128, 0x0)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarVector128, X4, interp_packedsimd_load32_zero, 0x5c)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarVector128, X8, interp_packedsimd_load64_zero, 0x5d)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X1, interp_packedsimd_load8_splat, 0x07)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X2, interp_packedsimd_load16_splat, 0x08)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X4, interp_packedsimd_load32_splat, 0x09)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadScalarAndSplatVector128, X8, interp_packedsimd_load64_splat, 0x0a)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, I1, interp_packedsimd_load8x8_s, 0x01)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, U1, interp_packedsimd_load8x8_u, 0x02)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, I2, interp_packedsimd_load16x4_s, 0x03)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, U2, interp_packedsimd_load16x4_u, 0x04)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, I4, interp_packedsimd_load32x2_s, 0x05)
+INTERP_WASM_SIMD_INTRINSIC_V_C1 (LoadWideningVector128, U4, interp_packedsimd_load32x2_u, 0x06)
+// FIXME: Specify opcodes and add jiterp support
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X1, interp_packedsimd_load8_lane, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X2, interp_packedsimd_load16_lane, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X4, interp_packedsimd_load32_lane, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (LoadScalarAndInsert, X8, interp_packedsimd_load64_lane, -1)
+// HACK: These store methods don't return a value but the custom calling convention is sufficient
+INTERP_WASM_SIMD_INTRINSIC_V_C2 (Store, ANY, interp_packedsimd_store, -1)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X1, interp_packedsimd_store8_lane, 0x58)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X2, interp_packedsimd_store16_lane, 0x59)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X4, interp_packedsimd_store32_lane, 0x5a)
+INTERP_WASM_SIMD_INTRINSIC_V_C3 (StoreSelectedScalar, X8, interp_packedsimd_store64_lane, 0x5b)
diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c
index 8031b1f79b4b1..a824b940d04c1 100644
--- a/src/mono/mono/mini/interp/interp-simd.c
+++ b/src/mono/mono/mini/interp/interp-simd.c
@@ -585,42 +585,276 @@ _interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
 	g_assert_not_reached ();
 }
 
-#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
+#define LANE_COUNT(lane_type) (sizeof(v128_t) / sizeof(lane_type))
+
+// ensure the lane is valid by wrapping it (in AOT it would fail to compile)
+#define WRAP_LANE(lane_type, lane_ptr)  \
+	*((unsigned char *)lane_ptr) & (LANE_COUNT(lane_type) - 1)
+
+#define EXTRACT_LANE(result_type, lane_type) \
+	int _lane = WRAP_LANE(lane_type, lane); \
+	*((result_type *)res) = ((lane_type *)vec)[_lane];
+
+#define REPLACE_LANE(lane_type) \
+	int _lane = WRAP_LANE(lane_type, lane); \
+	v128_t temp = *((v128_t *)vec); \
+	((lane_type *)&temp)[_lane] = *(lane_type *)value; \
+	*((v128_t *)res) = temp;
+
+static void
+interp_packedsimd_extractlane_i1 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(gint32, gint8);
+}
+
+static void
+interp_packedsimd_extractlane_u1 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(gint32, guint8);
+}
+
+static void
+interp_packedsimd_extractlane_i2 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(gint32, gint16);
+}
+
+static void
+interp_packedsimd_extractlane_u2 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(gint32, guint16);
+}
+
+static void
+interp_packedsimd_extractlane_i4 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(gint32, gint32);
+}
+
+static void
+interp_packedsimd_extractlane_i8 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(gint64, gint64);
+}
+
+static void
+interp_packedsimd_extractlane_r4 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(float, float);
+}
+
+static void
+interp_packedsimd_extractlane_r8 (gpointer res, gpointer vec, gpointer lane) {
+	EXTRACT_LANE(double, double);
+}
+
+static void
+interp_packedsimd_replacelane_i1 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
+	REPLACE_LANE(gint8);
+}
+
+static void
+interp_packedsimd_replacelane_i2 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
+	REPLACE_LANE(gint16);
+}
+
+static void
+interp_packedsimd_replacelane_i4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
+	REPLACE_LANE(gint32);
+}
+
+static void
+interp_packedsimd_replacelane_i8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
+	REPLACE_LANE(gint64);
+}
+
+static void
+interp_packedsimd_replacelane_r4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
+	REPLACE_LANE(float);
+}
+
+static void
+interp_packedsimd_replacelane_r8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
+	REPLACE_LANE(double);
+}
+
+static void
+interp_packedsimd_shuffle (gpointer res, gpointer _lower, gpointer _upper, gpointer _indices) {
+	v128_i1 indices = *((v128_i1 *)_indices),
+		lower = *((v128_i1 *)_lower),
+		upper = *((v128_i1 *)_upper),
+		result = { 0 };
+
+	for (int i = 0; i < 16; i++) {
+		int index = indices[i] & 31;
+		if (index > 15)
+			result[i] = upper[index - 16];
+		else
+			result[i] = lower[index];
+	}
+
+	*((v128_i1 *)res) = result;
+}
+
+#define INDIRECT_LOAD(fn) \
+	*(v128_t*)res = fn(*(void **)addr_of_addr);
+
+static void
+interp_packedsimd_load128 (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load);
+}
+
+static void
+interp_packedsimd_load32_zero (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load32_zero);
+}
+
+static void
+interp_packedsimd_load64_zero (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load64_zero);
+}
+
+static void
+interp_packedsimd_load8_splat (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load8_splat);
+}
+
+static void
+interp_packedsimd_load16_splat (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load16_splat);
+}
+
+static void
+interp_packedsimd_load32_splat (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load32_splat);
+}
+
+static void
+interp_packedsimd_load64_splat (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_v128_load64_splat);
+}
+
+static void
+interp_packedsimd_load8x8_s (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_i16x8_load8x8);
+}
+
+static void
+interp_packedsimd_load8x8_u (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_u16x8_load8x8);
+}
+
+static void
+interp_packedsimd_load16x4_s (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_i32x4_load16x4);
+}
+
+static void
+interp_packedsimd_load16x4_u (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_u32x4_load16x4);
+}
+
+static void
+interp_packedsimd_load32x2_s (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_i64x2_load32x2);
+}
+
+static void
+interp_packedsimd_load32x2_u (gpointer res, gpointer addr_of_addr) {
+	INDIRECT_LOAD(wasm_u64x2_load32x2);
+}
+
+static void
+interp_packedsimd_store (gpointer res, gpointer addr_of_addr, gpointer vec) {
+	// HACK: Result is unused because Store has a void return value
+	**(v128_t **)addr_of_addr = *(v128_t *)vec;
+}
+
+#define INDIRECT_STORE_LANE(lane_type) \
+	int _lane = WRAP_LANE(lane_type, lane); \
+	**(lane_type **)addr_of_addr = ((lane_type *)vec)[_lane];
+
+static void
+interp_packedsimd_store8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_STORE_LANE(guint8);
+}
+
+static void
+interp_packedsimd_store16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_STORE_LANE(guint16);
+}
+
+static void
+interp_packedsimd_store32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_STORE_LANE(guint32);
+}
+
+static void
+interp_packedsimd_store64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_STORE_LANE(guint64);
+}
+
+#define INDIRECT_LOAD_LANE(lane_type) \
+	int _lane = WRAP_LANE(lane_type, lane); \
+	/* we need temporary storage to do this since res may be the same as vec, addr_of_addr, or lane */ \
+	lane_type lanes[LANE_COUNT(lane_type)]; \
+	memcpy (lanes, vec, 16); \
+	lanes[_lane] = **(lane_type **)addr_of_addr; \
+	memcpy (res, lanes, 16);
+
+static void
+interp_packedsimd_load8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_LOAD_LANE(guint8);
+}
+
+static void
+interp_packedsimd_load16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_LOAD_LANE(guint16);
+}
+
+static void
+interp_packedsimd_load32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_LOAD_LANE(guint32);
+}
+
+static void
+interp_packedsimd_load64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
+	INDIRECT_LOAD_LANE(guint64);
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \
 static void \
-_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
+_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
 	*((v128_t *)res) = c_intrinsic (v1); \
 }
 
-#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \
 static void \
-_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
+_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
 	*((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
 }
 
-#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \
 static void \
-_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
+_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
 	*((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
 }
 
-#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \
 static void \
-_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
+_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
 	*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
 }
 
-#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \
 static void \
-_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
+_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
 	*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
 }
 
-#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \
 static void \
-_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
+_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
 	*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
 }
 
+#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode)
+
 #include "interp-simd-intrins.def"
 
 #undef INTERP_WASM_SIMD_INTRINSIC_V_P
@@ -629,6 +863,9 @@ _mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) {
 #undef INTERP_WASM_SIMD_INTRINSIC_V_VV
 #undef INTERP_WASM_SIMD_INTRINSIC_V_VI
 #undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C1
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C2
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C3
 
 // Now generate the wasm opcode tables for the intrinsics
 
diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c
index 0ed06ab397135..cbdf7ccfc2cfe 100644
--- a/src/mono/mono/mini/interp/transform-simd.c
+++ b/src/mono/mono/mini/interp/transform-simd.c
@@ -5,6 +5,7 @@
 #include "config.h"
 #include <glib.h>
 #include <mono/utils/bsearch.h>
+#include <mono/metadata/class-internals.h>
 
 // We use the same approach as jit/aot for identifying simd methods.
 // FIXME Consider sharing the code
@@ -111,71 +112,10 @@ static guint16 sn_vector_t_methods [] = {
 };
 
 static guint16 sri_packedsimd_methods [] = {
-	SN_Add,
-	SN_And,
-	SN_Bitmask,
-	SN_CompareEqual,
-	SN_CompareNotEqual,
-	SN_ConvertNarrowingSignedSaturate,
-	SN_ConvertNarrowingUnsignedSaturate,
-	SN_Dot,
-	SN_Multiply,
-	SN_Negate,
-	SN_ShiftLeft,
-	SN_ShiftRightArithmetic,
-	SN_ShiftRightLogical,
-	SN_Splat,
-	SN_Subtract,
-	SN_Swizzle,
 	SN_get_IsHardwareAccelerated,
 	SN_get_IsSupported,
 };
 
-#if HOST_BROWSER
-
-/*
- * maps from INTERP_SIMD_INTRINSIC_WASM_I8X16_xxx to the correct one for the return type,
- * assuming that they are laid out sequentially like this:
- * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, wasm_i8x16_eq, 0x0)
- * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPAREEQUAL, wasm_i16x8_eq, 0x0)
- * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPAREEQUAL, wasm_i32x4_eq, 0x0)
- * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPAREEQUAL, wasm_i64x2_eq, 0x0)
- * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPAREEQUAL, wasm_f32x4_eq, 0x0)
- * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPAREEQUAL, wasm_f64x2_eq, 0x0)
- * It is your responsibility to ensure that it's actually laid out this way!
- */
-
-static int sri_packedsimd_offset_from_atype [] = {
-	-1, // MONO_TYPE_END        = 0x00,
-	-1, // MONO_TYPE_VOID       = 0x01,
-	-1, // MONO_TYPE_BOOLEAN    = 0x02,
-	-1, // MONO_TYPE_CHAR       = 0x03,
-	0, // MONO_TYPE_I1         = 0x04,
-	0, // MONO_TYPE_U1         = 0x05,
-	1, // MONO_TYPE_I2         = 0x06,
-	1, // MONO_TYPE_U2         = 0x07,
-	2, // MONO_TYPE_I4         = 0x08,
-	2, // MONO_TYPE_U4         = 0x09,
-	3, // MONO_TYPE_I8         = 0x0a,
-	3, // MONO_TYPE_U8         = 0x0b,
-	4, // MONO_TYPE_R4         = 0x0c,
-	5, // MONO_TYPE_R8         = 0x0d,
-	-1, // MONO_TYPE_STRING     = 0x0e,
-	-1, // MONO_TYPE_PTR        = 0x0f,
-	-1, // MONO_TYPE_BYREF      = 0x10,
-	-1, // MONO_TYPE_VALUETYPE  = 0x11,
-	-1, // MONO_TYPE_CLASS      = 0x12,
-	-1, // MONO_TYPE_VAR	     = 0x13,
-	-1, // MONO_TYPE_ARRAY      = 0x14,
-	-1, // MONO_TYPE_GENERICINST= 0x15,
-	-1, // MONO_TYPE_TYPEDBYREF = 0x16,
-	2, // MONO_TYPE_I          = 0x18,
-	2, // MONO_TYPE_U          = 0x19,
-};
-
-static const int sri_packedsimd_offset_from_atype_length = sizeof(sri_packedsimd_offset_from_atype) / sizeof(sri_packedsimd_offset_from_atype[0]);
-#endif // HOST_BROWSER
-
 // Returns if opcode was added
 static gboolean
 emit_common_simd_operations (TransformData *td, int id, int atype, int vector_size, int arg_size, int scalar_arg, gint16 *simd_opcode, gint16 *simd_intrins)
@@ -349,21 +289,24 @@ get_common_simd_info (MonoClass *vector_klass, MonoMethodSignature *csignature,
 }
 
 static void
-emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size)
+emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size, gboolean allow_void)
 {
 	td->sp -= csignature->param_count;
 	for (int i = 0; i < csignature->param_count; i++)
 		td->last_ins->sregs [i] = td->sp [i].local;
 
-	g_assert (csignature->ret->type != MONO_TYPE_VOID);
 	int ret_mt = mono_mint_type (csignature->ret);
-	if (ret_mt == MINT_TYPE_VT) {
+	if (csignature->ret->type == MONO_TYPE_VOID) {
+		g_assert (allow_void);
+		interp_ins_set_dummy_dreg (td->last_ins, td);
+	} else if (ret_mt == MINT_TYPE_VT) {
 		// For these intrinsics, if we return a VT then it is a V128
 		push_type_vt (td, vector_klass, vector_size);
+		interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 	} else {
 		push_simple_type (td, stack_type [ret_mt]);
+		interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 	}
-	interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 	td->ip += 5;
 }
 
@@ -527,7 +470,7 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 	td->last_ins->data [0] = simd_intrins;
 
 opcode_added:
-	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size);
+	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE);
 	return TRUE;
 }
 
@@ -559,7 +502,7 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur
 	td->last_ins->data [0] = simd_intrins;
 
 opcode_added:
-	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size);
+	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE);
 	return TRUE;
 }
 
@@ -591,22 +534,213 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c
 	td->last_ins->data [0] = simd_intrins;
 
 opcode_added:
-	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size);
+	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE);
 	return TRUE;
 }
 
 #if HOST_BROWSER
+
+#define PSIMD_ARGTYPE_I1 MONO_TYPE_I1
+#define PSIMD_ARGTYPE_I2 MONO_TYPE_I2
+#define PSIMD_ARGTYPE_I4 MONO_TYPE_I4
+#define PSIMD_ARGTYPE_I8 MONO_TYPE_I8
+#define PSIMD_ARGTYPE_U1 MONO_TYPE_U1
+#define PSIMD_ARGTYPE_U2 MONO_TYPE_U2
+#define PSIMD_ARGTYPE_U4 MONO_TYPE_U4
+#define PSIMD_ARGTYPE_U8 MONO_TYPE_U8
+#define PSIMD_ARGTYPE_R4 MONO_TYPE_R4
+#define PSIMD_ARGTYPE_R8 MONO_TYPE_R8
+#define PSIMD_ARGTYPE_D1 0xF01
+#define PSIMD_ARGTYPE_D2 0xF02
+#define PSIMD_ARGTYPE_D4 0xF04
+#define PSIMD_ARGTYPE_D8 0xF08
+#define PSIMD_ARGTYPE_X1 0xF11
+#define PSIMD_ARGTYPE_X2 0xF12
+#define PSIMD_ARGTYPE_X4 0xF14
+#define PSIMD_ARGTYPE_X8 0xF18
+#define PSIMD_ARGTYPE_ANY 0xFFF
+
+static gboolean
+packedsimd_type_matches (MonoTypeEnum type, int expected_type)
+{
+	if (expected_type == PSIMD_ARGTYPE_ANY)
+		return TRUE;
+	else if (type == expected_type)
+		return TRUE;
+
+	switch (expected_type) {
+		case PSIMD_ARGTYPE_D1:
+		case PSIMD_ARGTYPE_X1:
+			return (type == MONO_TYPE_I1) || (type == MONO_TYPE_U1);
+		case PSIMD_ARGTYPE_D2:
+		case PSIMD_ARGTYPE_X2:
+			return (type == MONO_TYPE_I2) || (type == MONO_TYPE_U2);
+		case PSIMD_ARGTYPE_D4:
+			return (type == MONO_TYPE_I4) || (type == MONO_TYPE_U4);
+		case PSIMD_ARGTYPE_D8:
+			return (type == MONO_TYPE_I8) || (type == MONO_TYPE_U8);
+		case PSIMD_ARGTYPE_X4:
+			return (type == MONO_TYPE_I4) || (type == MONO_TYPE_U4) || (type == MONO_TYPE_R4);
+		case PSIMD_ARGTYPE_X8:
+			return (type == MONO_TYPE_I8) || (type == MONO_TYPE_U8) || (type == MONO_TYPE_R8);
+		default:
+			return FALSE;
+	}
+}
+
+typedef struct {
+	const char *name, *intrinsic_name;
+	int arg_type, interp_opcode, simd_intrins;
+} PackedSimdIntrinsicInfo;
+
+#define INTRINS_COMMON(_name, arg1, c_intrinsic, _interp_opcode, _id) \
+	{ #_name, #c_intrinsic, PSIMD_ARGTYPE_ ## arg1, _interp_opcode, _id },
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_P
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_V
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C1
+#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_I_V
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_P, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PP, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C2
+#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PP, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PP, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PPP, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C3
+#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_intrinsic, wasm_opcode) \
+	INTRINS_COMMON(name, arg1, c_intrinsic, MINT_SIMD_INTRINS_P_PPP, INTERP_SIMD_INTRINSIC_ ## name ## arg1)
+
+static PackedSimdIntrinsicInfo unsorted_packedsimd_intrinsic_infos[] = {
+#include "interp-simd-intrins.def"
+};
+#undef INTERP_WASM_SIMD_INTRINSIC_V_P
+#undef INTERP_WASM_SIMD_INTRINSIC_V_V
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C1
+#undef INTERP_WASM_SIMD_INTRINSIC_I_V
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C2
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
+#undef INTERP_WASM_SIMD_INTRINSIC_V_C3
+
+static PackedSimdIntrinsicInfo *sorted_packedsimd_intrinsic_infos;
+
 static int
-map_packedsimd_intrins_based_on_atype (MonoTypeEnum atype, int base_intrins, gboolean allow_float)
+compare_packedsimd_intrinsic_info (const void *_lhs, const void *_rhs)
 {
-	int max_offset = allow_float ? 5 : 3;
-	if ((atype < 0) || (atype >= sri_packedsimd_offset_from_atype_length))
-		return -1;
-	int offset = sri_packedsimd_offset_from_atype [atype];
-	if ((offset < 0) || (offset > max_offset))
-		return -1;
-	return base_intrins + offset;
+	g_assert (_lhs);
+	g_assert (_rhs);
+	const PackedSimdIntrinsicInfo *lhs = _lhs, *rhs = _rhs;
+	return strcmp (lhs->name, rhs->name);
 }
+
+static PackedSimdIntrinsicInfo *
+lookup_packedsimd_intrinsic (const char *name, MonoType *arg1)
+{
+	MonoClass *vector_klass = mono_class_from_mono_type_internal (arg1);
+	MonoType *arg_type = NULL;
+
+	if (m_class_is_simd_type (vector_klass)) {
+		arg_type = mono_class_get_context (vector_klass)->class_inst->type_argv [0];
+	} else if (arg1->type == MONO_TYPE_PTR) {
+		arg_type = arg1->data.type;
+	} else {
+		// g_printf ("%s arg1 type was not pointer or simd type: %s\n", name, m_class_get_name (vector_klass));
+		return FALSE;
+	}
+
+	if (!mono_type_is_primitive (arg_type)) {
+		// g_printf ("%s arg1 inner type was not primitive\n", name);
+		return FALSE;
+	}
+
+	int arg_type_enum = arg_type->type,
+		search_begin_index,
+		num_intrinsics = sizeof(unsorted_packedsimd_intrinsic_infos) / sizeof(PackedSimdIntrinsicInfo);
+	if (arg_type_enum == MONO_TYPE_BOOLEAN)
+		return FALSE;
+
+	PackedSimdIntrinsicInfo *result = NULL, *search_begin;
+	PackedSimdIntrinsicInfo search_key = { name, name };
+
+	// Ensure we have a sorted version of the intrinsics table
+	if (!sorted_packedsimd_intrinsic_infos) {
+		int buf_size = sizeof(unsorted_packedsimd_intrinsic_infos);
+		PackedSimdIntrinsicInfo *temp_sorted = g_malloc0 (buf_size);
+		memcpy (temp_sorted, unsorted_packedsimd_intrinsic_infos, buf_size);
+		mono_qsort (temp_sorted, num_intrinsics, sizeof(PackedSimdIntrinsicInfo), compare_packedsimd_intrinsic_info);
+		mono_atomic_cas_ptr ((gpointer*)&sorted_packedsimd_intrinsic_infos, (gpointer)temp_sorted, NULL);
+		if (sorted_packedsimd_intrinsic_infos != temp_sorted)
+			g_free (temp_sorted);
+	}
+
+	// Binary search by name to find a suitable starting location for our search
+	search_begin = (PackedSimdIntrinsicInfo*)mono_binary_search (
+		&search_key, sorted_packedsimd_intrinsic_infos,
+		num_intrinsics, sizeof(PackedSimdIntrinsicInfo),
+		compare_packedsimd_intrinsic_info
+	);
+	if (!search_begin) {
+		// g_printf ("No matching PackedSimd intrinsics for name %s\n", name);
+		return FALSE;
+	}
+
+	search_begin_index = search_begin - sorted_packedsimd_intrinsic_infos;
+
+	// Search upwards and downwards through the table simultaneously from our starting location,
+	//  looking for an intrinsic with a matching name that also has a compatible argument type
+	// NOTE: If there are two suitable matches because you got the table wrong, this is nondeterministic
+	for (int low = search_begin_index, high = search_begin_index;
+		(low >= 0) || (high < num_intrinsics);
+		--low, ++high) {
+		PackedSimdIntrinsicInfo *low_info = (low >= 0) ? &sorted_packedsimd_intrinsic_infos[low] : NULL,
+			*high_info = (high < num_intrinsics) ? &sorted_packedsimd_intrinsic_infos[high] : NULL;
+		// As long as either the low or high offset are within range and have a name match, we keep going
+		gboolean low_name_matches = low_info && !strcmp (name, low_info->name),
+			high_name_matches = high_info && !strcmp (name, high_info->name);
+		if (!low_name_matches && !high_name_matches)
+			break;
+
+		// Now see whether we have a matching type and name at either offset
+		if (low_name_matches && packedsimd_type_matches (arg_type_enum, low_info->arg_type)) {
+			result = low_info;
+			break;
+		}
+		if (high_name_matches && packedsimd_type_matches (arg_type_enum, high_info->arg_type)) {
+			result = high_info;
+			break;
+		}
+	}
+
+	/*
+	if (!result)
+		g_printf ("No matching PackedSimd intrinsic for %s[%s]\n", name, m_class_get_name (mono_class_from_mono_type_internal (arg_type)));
+	*/
+	return result;
+}
+
 #endif
 
 static gboolean
@@ -616,7 +750,8 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 	// We don't early-out for an unrecognized method, we will generate an NIY later
 
 	MonoClass *vector_klass = mono_class_from_mono_type_internal (csignature->ret);
-	int vector_size = -1;
+	MonoTypeEnum atype;
+	int vector_size = -1, arg_size, scalar_arg;
 
 	// NOTE: Linker substitutions (used in AOT) will prevent this from running.
 	if ((id == SN_get_IsSupported) || (id == SN_get_IsHardwareAccelerated)) {
@@ -628,13 +763,25 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 		goto opcode_added;
 	}
 
+	get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg);
+
 #if HOST_BROWSER
-	if (id < 0) {
-		g_print ("MONO interpreter: Unimplemented method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name);
+	gint16 simd_opcode = -1;
+	gint16 simd_intrins = -1;
+
+	PackedSimdIntrinsicInfo *info = lookup_packedsimd_intrinsic (cmethod->name, csignature->params[0]);
+
+	if (info && info->interp_opcode && info->simd_intrins) {
+		simd_opcode = info->interp_opcode;
+		simd_intrins = info->simd_intrins;
+		// g_print ("%s %d -> %s %d %s\n", info->name, info->arg_type, mono_interp_opname (simd_opcode), simd_intrins, info->intrinsic_name);
+	} else {
+		g_warning ("MONO interpreter: Unimplemented method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name);
 
 		// If we're missing a packedsimd method but the packedsimd method was AOT'd, we can
 		//  just let the interpreter generate a native call to the AOT method instead of
 		//  generating an NIY that will halt execution
+		// FIXME: Should we remove this now that the interpreter supports all of the methods?
 		ERROR_DECL (error);
 		gpointer addr = mono_aot_get_method (cmethod, error);
 		if (addr)
@@ -646,109 +793,6 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 		goto opcode_added;
 	}
 
-	gint16 simd_opcode = -1;
-	gint16 simd_intrins = -1;
-
-	MonoTypeEnum atype;
-	int scalar_arg;
-	if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, NULL, &scalar_arg))
-		return FALSE;
-
-	switch (id) {
-		case SN_Splat: {
-			simd_opcode = MINT_SIMD_INTRINS_P_P;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SPLAT, FALSE);
-			break;
-		}
-		case SN_Swizzle: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_SWIZZLE;
-			break;
-		}
-		case SN_Add: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_ADD, FALSE);
-			break;
-		}
-		case SN_Subtract: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SUBTRACT, FALSE);
-			break;
-		}
-		case SN_Multiply: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_MULTIPLY, FALSE);
-			break;
-		}
-		case SN_Dot: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I32X4_DOT_I16X8;
-			break;
-		}
-		case SN_Negate: {
-			simd_opcode = MINT_SIMD_INTRINS_P_P;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_NEGATE, FALSE);
-			break;
-		}
-		case SN_ShiftLeft: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTLEFT, FALSE);
-			break;
-		}
-		case SN_ShiftRightArithmetic: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTARITHMETIC, FALSE);
-			break;
-		}
-		case SN_ShiftRightLogical: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTLOGICAL, FALSE);
-			break;
-		}
-		case SN_And: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = INTERP_SIMD_INTRINSIC_WASM_AND;
-			break;
-		}
-		case SN_Bitmask: {
-			simd_opcode = MINT_SIMD_INTRINS_P_P;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_BITMASK, FALSE);
-			break;
-		}
-		case SN_CompareEqual: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, TRUE);
-			break;
-		}
-		case SN_CompareNotEqual: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, TRUE);
-			break;
-		}
-		case SN_ConvertNarrowingSignedSaturate: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			if (atype == MONO_TYPE_I1)
-				simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_S;
-			else if (atype == MONO_TYPE_I2)
-				simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_S;
-			break;
-		}
-		case SN_ConvertNarrowingUnsignedSaturate: {
-			simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			if (atype == MONO_TYPE_U1)
-				simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_U;
-			else if (atype == MONO_TYPE_U2)
-				simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U;
-			break;
-		}
-		default:
-			return FALSE;
-	}
-
-	if (simd_opcode == -1 || simd_intrins == -1) {
-		return FALSE;
-	}
-
 	interp_add_ins (td, simd_opcode);
 	td->last_ins->data [0] = simd_intrins;
 #else // HOST_BROWSER
@@ -756,7 +800,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 #endif // HOST_BROWSER
 
 opcode_added:
-	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size);
+	emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, TRUE);
 	return TRUE;
 }
 
diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c
index f416818ca7358..ce047b2677bcf 100644
--- a/src/mono/mono/mini/interp/transform.c
+++ b/src/mono/mono/mini/interp/transform.c
@@ -403,6 +403,15 @@ create_interp_local_explicit (TransformData *td, MonoType *type, int size)
 
 }
 
+static void
+create_interp_dummy_var (TransformData *td)
+{
+	g_assert (td->dummy_var < 0);
+	td->dummy_var = create_interp_local_explicit (td, m_class_get_byval_arg (mono_defaults.void_class), 8);
+	td->locals [td->dummy_var].offset = 0;
+	td->locals [td->dummy_var].flags = INTERP_LOCAL_FLAG_GLOBAL;
+}
+
 static int
 get_tos_offset (TransformData *td)
 {
@@ -1281,11 +1290,6 @@ interp_get_icall_sig (MonoMethodSignature *sig);
 static void
 interp_generate_icall_throw (TransformData *td, MonoJitICallInfo *icall_info, gpointer arg1, gpointer arg2)
 {
-	// Allocate dreg for call, only void calls are supported
-	push_simple_type (td, STACK_TYPE_I4);
-	td->sp--;
-	int dummy_dreg = td->sp [0].local;
-
 	int num_args = icall_info->sig->param_count;
 	if (num_args > 0)
 		emit_ldptr (td, arg1);
@@ -1295,7 +1299,7 @@ interp_generate_icall_throw (TransformData *td, MonoJitICallInfo *icall_info, gp
 	td->sp -= num_args;
 
 	interp_add_ins (td, MINT_ICALL);
-	interp_ins_set_dreg (td->last_ins, dummy_dreg);
+	interp_ins_set_dummy_dreg (td->last_ins, td);
 	interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
 	td->last_ins->data [0] = interp_get_icall_sig (icall_info->sig);
 	td->last_ins->data [1] = get_data_item_index (td, (gpointer)icall_info->func);
@@ -11046,6 +11050,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 	td->mem_manager = m_method_get_mem_manager (method);
 	td->n_data_items = 0;
 	td->max_data_items = 0;
+	td->dummy_var = -1;
 	td->data_items = NULL;
 	td->data_hash = g_hash_table_new (NULL, NULL);
 #ifdef ENABLE_EXPERIMENT_TIERED
diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h
index fcc51c25f9c44..7ff3f503a9fd4 100644
--- a/src/mono/mono/mini/interp/transform.h
+++ b/src/mono/mono/mini/interp/transform.h
@@ -224,6 +224,7 @@ typedef struct
 	gint32 total_locals_size;
 	gint32 max_stack_size;
 	InterpLocal *locals;
+	int dummy_var;
 	int *local_ref_count;
 	unsigned int il_locals_offset;
 	unsigned int il_locals_size;
@@ -284,6 +285,11 @@ typedef struct
 #define STACK_TYPE_I STACK_TYPE_I4
 #endif
 
+#define interp_ins_set_dummy_dreg(ins,td) do { \
+	if (td->dummy_var < 0) \
+		create_interp_dummy_var (td); \
+	ins->dreg = td->dummy_var; \
+} while (0)
 
 #define interp_ins_set_dreg(ins,dr) do { \
         ins->dreg = dr; \
diff --git a/src/mono/wasm/runtime/genmintops.py b/src/mono/wasm/runtime/genmintops.py
index 604c870ae0fd8..33c370690f8c0 100755
--- a/src/mono/wasm/runtime/genmintops.py
+++ b/src/mono/wasm/runtime/genmintops.py
@@ -22,9 +22,9 @@
 tab = "    "
 header_lines = src.read().splitlines()
 # strip preprocessing directives
-simd_header_lines = (l for l in simd_src.read().splitlines() if not l.startswith("#"))
+simd_header_lines = (l for l in simd_src.read().splitlines() if not (l.startswith("#") or l.startswith("//")))
 # strip preprocessing directives and add indentation for tslint/eslint
-header = "\n".join((tab + l) for l in header_lines if not l.startswith("#"))
+header = "\n".join((tab + l) for l in header_lines if not (l.startswith("#") or l.startswith("//")))
 src.close()
 simd_src.close()
 
@@ -42,10 +42,13 @@
     "INTERP_SIMD_INTRINSIC_P_PPP": simd_values_3,
     "INTERP_WASM_SIMD_INTRINSIC_V_P": simd_values_1,
     "INTERP_WASM_SIMD_INTRINSIC_V_V": simd_values_1,
+    "INTERP_WASM_SIMD_INTRINSIC_V_C1": simd_values_1,
     "INTERP_WASM_SIMD_INTRINSIC_I_V": simd_values_1,
     "INTERP_WASM_SIMD_INTRINSIC_V_VV": simd_values_2,
+    "INTERP_WASM_SIMD_INTRINSIC_V_C2": simd_values_2,
     "INTERP_WASM_SIMD_INTRINSIC_V_VI": simd_values_2,
     "INTERP_WASM_SIMD_INTRINSIC_V_VVV": simd_values_3,
+    "INTERP_WASM_SIMD_INTRINSIC_V_C3": simd_values_3,
 }
 
 for line in simd_header_lines:
@@ -53,7 +56,11 @@
     idx2 = line.index(",") if "," in line else None
     if (idx1 and idx2):
         key = line[0:idx1].strip()
-        simd_disp[key].append(line[(idx1 + 1):idx2].strip().replace("INTERP_SIMD_INTRINSIC_", ""))
+        vals = line[(idx1 + 1):].strip().split(",")
+        id = vals[0].replace("INTERP_SIMD_INTRINSIC_", "").strip()
+        if (len(vals) == 4):
+            id += vals[1].strip()
+        simd_disp[key].append(id)
 
 splitter = ",\n    "
 splitter2 = ",\n        "
diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts
index 742ae737a29c8..6260174b8d343 100644
--- a/src/mono/wasm/runtime/jiterpreter-support.ts
+++ b/src/mono/wasm/runtime/jiterpreter-support.ts
@@ -635,9 +635,21 @@ export class WasmBuilder {
                 exportCount++;
 
             this.beginFunction(func.typeName, func.locals);
-            func.blob = func.generator();
-            if (!func.blob)
-                func.blob = this.endFunction(false);
+            try {
+                func.blob = func.generator();
+            } finally {
+                // If func.generator failed due to an error or didn't return a blob, we want
+                //  to call endFunction to pop the stack and create the blob automatically.
+                // We may be in the middle of handling an exception so don't let this automatic
+                //  logic throw and suppress the original exception being handled
+                try {
+                    if (!func.blob)
+                        func.blob = this.endFunction(false);
+                } catch {
+                    // eslint-disable-next-line @typescript-eslint/no-extra-semi
+                    ;
+                }
+            }
         }
 
         this._generateImportSection(includeFunctionTable);
diff --git a/src/mono/wasm/runtime/jiterpreter-tables.ts b/src/mono/wasm/runtime/jiterpreter-tables.ts
index 0fef162c42109..c3f4b42f57eae 100644
--- a/src/mono/wasm/runtime/jiterpreter-tables.ts
+++ b/src/mono/wasm/runtime/jiterpreter-tables.ts
@@ -2,7 +2,7 @@ import {
     WasmOpcode, WasmSimdOpcode, JiterpSpecialOpcode
 } from "./jiterpreter-opcodes";
 import {
-    MintOpcode, SimdIntrinsic2, SimdIntrinsic3
+    MintOpcode, SimdIntrinsic2, SimdIntrinsic3, SimdIntrinsic4
 } from "./mintops";
 
 export const ldcTable: { [opcode: number]: [WasmOpcode, number] } = {
@@ -357,14 +357,57 @@ export const simdShiftTable = new Set<SimdIntrinsic3>([
     SimdIntrinsic3.V128_I8_URIGHT_SHIFT,
 ]);
 
-export const bitmaskTable : { [intrinsic: number]: WasmSimdOpcode } = {
+export const simdExtractTable: { [intrinsic: number]: [laneCount: number, laneStoreOpcode: WasmOpcode] } = {
+    [SimdIntrinsic3.ExtractLaneI1]: [16, WasmOpcode.i32_store],
+    [SimdIntrinsic3.ExtractLaneU1]: [16, WasmOpcode.i32_store],
+    [SimdIntrinsic3.ExtractLaneI2]: [8, WasmOpcode.i32_store],
+    [SimdIntrinsic3.ExtractLaneU2]: [8, WasmOpcode.i32_store],
+    [SimdIntrinsic3.ExtractLaneD4]: [4, WasmOpcode.i32_store],
+    [SimdIntrinsic3.ExtractLaneR4]: [4, WasmOpcode.f32_store],
+    [SimdIntrinsic3.ExtractLaneD8]: [2, WasmOpcode.i64_store],
+    [SimdIntrinsic3.ExtractLaneR8]: [2, WasmOpcode.f64_store],
+};
+
+export const simdReplaceTable: { [intrinsic: number]: [laneCount: number, laneLoadOpcode: WasmOpcode] } = {
+    [SimdIntrinsic4.ReplaceLaneD1]: [16, WasmOpcode.i32_load],
+    [SimdIntrinsic4.ReplaceLaneD2]: [8, WasmOpcode.i32_load],
+    [SimdIntrinsic4.ReplaceLaneD4]: [4, WasmOpcode.i32_load],
+    [SimdIntrinsic4.ReplaceLaneR4]: [4, WasmOpcode.f32_load],
+    [SimdIntrinsic4.ReplaceLaneD8]: [2, WasmOpcode.i64_load],
+    [SimdIntrinsic4.ReplaceLaneR8]: [2, WasmOpcode.f64_load],
+};
+
+export const simdLoadTable = new Set<SimdIntrinsic2>([
+    SimdIntrinsic2.LoadVector128ANY,
+    SimdIntrinsic2.LoadScalarAndSplatVector128X1,
+    SimdIntrinsic2.LoadScalarAndSplatVector128X2,
+    SimdIntrinsic2.LoadScalarAndSplatVector128X4,
+    SimdIntrinsic2.LoadScalarAndSplatVector128X8,
+    SimdIntrinsic2.LoadScalarVector128X4,
+    SimdIntrinsic2.LoadScalarVector128X8,
+    SimdIntrinsic2.LoadWideningVector128I1,
+    SimdIntrinsic2.LoadWideningVector128U1,
+    SimdIntrinsic2.LoadWideningVector128I2,
+    SimdIntrinsic2.LoadWideningVector128U2,
+    SimdIntrinsic2.LoadWideningVector128I4,
+    SimdIntrinsic2.LoadWideningVector128U4,
+]);
+
+export const simdStoreTable: { [intrinsic: number]: [laneCount: number] } = {
+    [SimdIntrinsic4.StoreSelectedScalarX1]: [16],
+    [SimdIntrinsic4.StoreSelectedScalarX2]: [8],
+    [SimdIntrinsic4.StoreSelectedScalarX4]: [4],
+    [SimdIntrinsic4.StoreSelectedScalarX8]: [2],
+};
+
+export const bitmaskTable: { [intrinsic: number]: WasmSimdOpcode } = {
     [SimdIntrinsic2.V128_I1_EXTRACT_MSB]: WasmSimdOpcode.i8x16_bitmask,
     [SimdIntrinsic2.V128_I2_EXTRACT_MSB]: WasmSimdOpcode.i16x8_bitmask,
     [SimdIntrinsic2.V128_I4_EXTRACT_MSB]: WasmSimdOpcode.i32x4_bitmask,
     [SimdIntrinsic2.V128_I8_EXTRACT_MSB]: WasmSimdOpcode.i64x2_bitmask,
 };
 
-export const createScalarTable : { [intrinsic: number]: [WasmOpcode, WasmSimdOpcode] } = {
+export const createScalarTable: { [intrinsic: number]: [WasmOpcode, WasmSimdOpcode] } = {
     [SimdIntrinsic2.V128_I1_CREATE_SCALAR]: [WasmOpcode.i32_load8_s, WasmSimdOpcode.i8x16_replace_lane],
     [SimdIntrinsic2.V128_I2_CREATE_SCALAR]: [WasmOpcode.i32_load16_s, WasmSimdOpcode.i16x8_replace_lane],
     [SimdIntrinsic2.V128_I4_CREATE_SCALAR]: [WasmOpcode.i32_load, WasmSimdOpcode.i32x4_replace_lane],
diff --git a/src/mono/wasm/runtime/jiterpreter-trace-generator.ts b/src/mono/wasm/runtime/jiterpreter-trace-generator.ts
index ea16a95230124..d6b2d8d6b6b98 100644
--- a/src/mono/wasm/runtime/jiterpreter-trace-generator.ts
+++ b/src/mono/wasm/runtime/jiterpreter-trace-generator.ts
@@ -49,6 +49,8 @@ import {
     simdCreateLoadOps, simdCreateSizes,
     simdCreateStoreOps, simdShiftTable,
     bitmaskTable, createScalarTable,
+    simdExtractTable, simdReplaceTable,
+    simdLoadTable, simdStoreTable,
 } from "./jiterpreter-tables";
 import { mono_log_error, mono_log_info } from "./logging";
 
@@ -3496,10 +3498,19 @@ function append_simd_4_load(builder: WasmBuilder, ip: MintOpcodePtr) {
 
 function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic2): boolean {
     const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(1, index);
-    if (simple) {
-        append_simd_2_load(builder, ip);
-        builder.appendSimd(simple);
-        append_simd_store(builder, ip);
+    if (simple >= 0) {
+        if (simdLoadTable.has(index)) {
+            // Indirect load, so v1 is T** and res is Vector128*
+            builder.local("pLocals");
+            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
+            builder.appendSimd(simple);
+            builder.appendMemarg(0, 0);
+            append_simd_store(builder, ip);
+        } else {
+            append_simd_2_load(builder, ip);
+            builder.appendSimd(simple);
+            append_simd_store(builder, ip);
+        }
         return true;
     }
 
@@ -3554,14 +3565,34 @@ function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
 
 function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic3): boolean {
     const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(2, index);
-    if (simple) {
-        const isShift = simdShiftTable.has(index);
+    if (simple >= 0) {
+        const isShift = simdShiftTable.has(index),
+            extractTup = simdExtractTable[index];
+
         if (isShift) {
             builder.local("pLocals");
             append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
             append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.i32_load);
             builder.appendSimd(simple);
             append_simd_store(builder, ip);
+        } else if (Array.isArray(extractTup)) {
+            const lane = get_known_constant_value(builder, getArgU16(ip, 3)),
+                laneCount = extractTup[0];
+            if (typeof (lane) !== "number") {
+                mono_log_error (`${builder.functions[0].name}: Non-constant lane index passed to ExtractLane`);
+                return false;
+            } else if ((lane >= laneCount) || (lane < 0)) {
+                mono_log_error (`${builder.functions[0].name}: ExtractLane index ${lane} out of range (0 - ${laneCount - 1})`);
+                return false;
+            }
+
+            // load vec onto stack and then emit extract + lane imm
+            builder.local("pLocals");
+            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+            builder.appendSimd(simple);
+            builder.appendU8(lane);
+            // Store using the opcode from the tuple
+            append_stloc_tail(builder, getArgU16(ip, 1), extractTup[1]);
         } else {
             append_simd_3_load(builder, ip);
             builder.appendSimd(simple);
@@ -3571,6 +3602,13 @@ function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
     }
 
     switch (index) {
+        case SimdIntrinsic3.StoreANY:
+            // Indirect store where args are [V128**, V128*]
+            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
+            append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+            builder.appendSimd(WasmSimdOpcode.v128_store);
+            builder.appendMemarg(0, 0);
+            return true;
         case SimdIntrinsic3.V128_BITWISE_EQUALITY:
         case SimdIntrinsic3.V128_BITWISE_INEQUALITY:
             append_simd_3_load(builder, ip);
@@ -3682,10 +3720,49 @@ function emit_shuffle(builder: WasmBuilder, ip: MintOpcodePtr, elementCount: num
 
 function emit_simd_4(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic4): boolean {
     const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(3, index);
-    if (simple) {
-        append_simd_4_load(builder, ip);
-        builder.appendSimd(simple);
-        append_simd_store(builder, ip);
+    if (simple >= 0) {
+        // [lane count, value load opcode]
+        const rtup = simdReplaceTable[index],
+            stup = simdStoreTable[index];
+        if (Array.isArray(rtup)) {
+            const laneCount = rtup[0],
+                lane = get_known_constant_value(builder, getArgU16(ip, 3));
+            if (typeof (lane) !== "number") {
+                mono_log_error (`${builder.functions[0].name}: Non-constant lane index passed to ReplaceLane`);
+                return false;
+            } else if ((lane >= laneCount) || (lane < 0)) {
+                mono_log_error (`${builder.functions[0].name}: ReplaceLane index ${lane} out of range (0 - ${laneCount - 1})`);
+                return false;
+            }
+
+            // arrange stack as [vec, value] and then write replace + lane imm
+            builder.local("pLocals");
+            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+            append_ldloc(builder, getArgU16(ip, 4), rtup[1]);
+            builder.appendSimd(simple);
+            builder.appendU8(lane);
+            append_simd_store(builder, ip);
+        } else if (Array.isArray(stup)) {
+            // Indirect store where args are [Scalar**, V128*]
+            const laneCount = stup[0],
+                lane = get_known_constant_value(builder, getArgU16(ip, 4));
+            if (typeof (lane) !== "number") {
+                mono_log_error (`${builder.functions[0].name}: Non-constant lane index passed to store method`);
+                return false;
+            } else if ((lane >= laneCount) || (lane < 0)) {
+                mono_log_error (`${builder.functions[0].name}: Store lane ${lane} out of range (0 - ${laneCount - 1})`);
+                return false;
+            }
+            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
+            append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+            builder.appendSimd(simple);
+            builder.appendMemarg(0, 0);
+            builder.appendU8(lane);
+        } else {
+            append_simd_4_load(builder, ip);
+            builder.appendSimd(simple);
+            append_simd_store(builder, ip);
+        }
         return true;
     }
 
@@ -3700,6 +3777,28 @@ function emit_simd_4(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
             builder.appendSimd(WasmSimdOpcode.v128_bitselect);
             append_simd_store(builder, ip);
             return true;
+        case SimdIntrinsic4.ShuffleD1: {
+            const indices = get_known_constant_value(builder, getArgU16(ip, 4));
+            if (typeof (indices) !== "object") {
+                mono_log_error (`${builder.functions[0].name}: Non-constant indices passed to PackedSimd.Shuffle`);
+                return false;
+            }
+            for (let i = 0; i < 32; i++) {
+                const lane = indices[i];
+                if ((lane < 0) || (lane > 31)) {
+                    mono_log_error (`${builder.functions[0].name}: Shuffle lane index #${i} (${lane}) out of range (0 - 31)`);
+                    return false;
+                }
+            }
+
+            builder.local("pLocals");
+            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+            append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+            builder.appendSimd(WasmSimdOpcode.i8x16_shuffle);
+            builder.appendBytes(indices);
+            append_simd_store(builder, ip);
+            return true;
+        }
         default:
             return false;
     }