Skip to content

Commit

Permalink
[wasm] Implement complete PackedSimd feature set in interpreter (#87903)
Browse files Browse the repository at this point in the history
Implement all PackedSIMD APIs in wasm builds of the interpreter based on a table and auto-generated code
Implement most PackedSIMD APIs in jiterpreter
Detect constant simd arguments in jiterpreter and verify them
Simplify unused/temporary dregs in interp
  • Loading branch information
kg authored Jul 1, 2023
1 parent d64c06f commit 0185099
Show file tree
Hide file tree
Showing 9 changed files with 966 additions and 314 deletions.
387 changes: 293 additions & 94 deletions src/mono/mono/mini/interp/interp-simd-intrins.def

Large diffs are not rendered by default.

261 changes: 249 additions & 12 deletions src/mono/mono/mini/interp/interp-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -585,42 +585,276 @@ _interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
g_assert_not_reached ();
}

#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
#define LANE_COUNT(lane_type) (sizeof(v128_t) / sizeof(lane_type))

// ensure the lane is valid by wrapping it (in AOT it would fail to compile)
#define WRAP_LANE(lane_type, lane_ptr) \
*((unsigned char *)lane_ptr) & (LANE_COUNT(lane_type) - 1)

#define EXTRACT_LANE(result_type, lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
*((result_type *)res) = ((lane_type *)vec)[_lane];

#define REPLACE_LANE(lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
v128_t temp = *((v128_t *)vec); \
((lane_type *)&temp)[_lane] = *(lane_type *)value; \
*((v128_t *)res) = temp;

static void
interp_packedsimd_extractlane_i1 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, gint8);
}

static void
interp_packedsimd_extractlane_u1 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, guint8);
}

static void
interp_packedsimd_extractlane_i2 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, gint16);
}

static void
interp_packedsimd_extractlane_u2 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, guint16);
}

static void
interp_packedsimd_extractlane_i4 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, gint32);
}

static void
interp_packedsimd_extractlane_i8 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint64, gint64);
}

static void
interp_packedsimd_extractlane_r4 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(float, float);
}

static void
interp_packedsimd_extractlane_r8 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(double, double);
}

static void
interp_packedsimd_replacelane_i1 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint8);
}

static void
interp_packedsimd_replacelane_i2 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint16);
}

static void
interp_packedsimd_replacelane_i4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint32);
}

static void
interp_packedsimd_replacelane_i8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint64);
}

static void
interp_packedsimd_replacelane_r4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(float);
}

static void
interp_packedsimd_replacelane_r8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(double);
}

static void
interp_packedsimd_shuffle (gpointer res, gpointer _lower, gpointer _upper, gpointer _indices) {
v128_i1 indices = *((v128_i1 *)_indices),
lower = *((v128_i1 *)_lower),
upper = *((v128_i1 *)_upper),
result = { 0 };

for (int i = 0; i < 16; i++) {
int index = indices[i] & 31;
if (index > 15)
result[i] = upper[index - 16];
else
result[i] = lower[index];
}

*((v128_i1 *)res) = result;
}

#define INDIRECT_LOAD(fn) \
*(v128_t*)res = fn(*(void **)addr_of_addr);

static void
interp_packedsimd_load128 (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load);
}

static void
interp_packedsimd_load32_zero (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load32_zero);
}

static void
interp_packedsimd_load64_zero (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load64_zero);
}

static void
interp_packedsimd_load8_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load8_splat);
}

static void
interp_packedsimd_load16_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load16_splat);
}

static void
interp_packedsimd_load32_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load32_splat);
}

static void
interp_packedsimd_load64_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load64_splat);
}

static void
interp_packedsimd_load8x8_s (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_i16x8_load8x8);
}

static void
interp_packedsimd_load8x8_u (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_u16x8_load8x8);
}

static void
interp_packedsimd_load16x4_s (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_i32x4_load16x4);
}

static void
interp_packedsimd_load16x4_u (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_u32x4_load16x4);
}

static void
interp_packedsimd_load32x2_s (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_i64x2_load32x2);
}

static void
interp_packedsimd_load32x2_u (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_u64x2_load32x2);
}

static void
interp_packedsimd_store (gpointer res, gpointer addr_of_addr, gpointer vec) {
// HACK: Result is unused because Store has a void return value
**(v128_t **)addr_of_addr = *(v128_t *)vec;
}

#define INDIRECT_STORE_LANE(lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
**(lane_type **)addr_of_addr = ((lane_type *)vec)[_lane];

static void
interp_packedsimd_store8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint8);
}

static void
interp_packedsimd_store16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint16);
}

static void
interp_packedsimd_store32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint32);
}

static void
interp_packedsimd_store64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint64);
}

#define INDIRECT_LOAD_LANE(lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
/* we need temporary storage to do this since res may be the same as vec, addr_of_addr, or lane */ \
lane_type lanes[LANE_COUNT(lane_type)]; \
memcpy (lanes, vec, 16); \
lanes[_lane] = **(lane_type **)addr_of_addr; \
memcpy (res, lanes, 16);

static void
interp_packedsimd_load8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint8);
}

static void
interp_packedsimd_load16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint16);
}

static void
interp_packedsimd_load32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint32);
}

static void
interp_packedsimd_load64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint64);
}

#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (v1); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
*((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode)
#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode)
#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode)

#include "interp-simd-intrins.def"

#undef INTERP_WASM_SIMD_INTRINSIC_V_P
Expand All @@ -629,6 +863,9 @@ _mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) {
#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
#undef INTERP_WASM_SIMD_INTRINSIC_V_C1
#undef INTERP_WASM_SIMD_INTRINSIC_V_C2
#undef INTERP_WASM_SIMD_INTRINSIC_V_C3

// Now generate the wasm opcode tables for the intrinsics

Expand Down
Loading

0 comments on commit 0185099

Please sign in to comment.