Skip to content

Commit

Permalink
[wasm] Add Vector128 and PackedSimd support to the jiterpreter; add P…
Browse files Browse the repository at this point in the history
…ackedSimd to the interpreter (#82773)

* Add PackedSIMD support to the interpreter (off by default)
* Add SIMD support to the jiterpreter
* Add runtime options governing interpreter vector128 and packedsimd support
* Add some R4 vector128 operations to the interpreter
* Fix jiterpreter MINT_POPCNT_I8 implementation
* Enable compiling the runtime with wasm simd support so that intrinsics can be used
* Add browser-bench measurements for packing vector128
  • Loading branch information
kg authored May 7, 2023
1 parent d7c94a8 commit 7c75cbf
Show file tree
Hide file tree
Showing 23 changed files with 1,533 additions and 196 deletions.
1 change: 1 addition & 0 deletions src/mono/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ elseif(CLR_CMAKE_HOST_OS STREQUAL "emscripten")
add_compile_options(-Wno-strict-prototypes)
add_compile_options(-Wno-unused-but-set-variable)
add_compile_options(-Wno-single-bit-bitfield-constant-conversion)
add_compile_options(-msimd128)
set(DISABLE_EXECUTABLES 1)
# FIXME: Is there a cmake option for this ?
set(DISABLE_SHARED_LIBS 1)
Expand Down
8 changes: 7 additions & 1 deletion src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ typedef enum {

#define PROFILE_INTERP 0

#if !HOST_BROWSER && __GNUC__
#if __GNUC__
#define INTERP_ENABLE_SIMD
#endif

Expand Down Expand Up @@ -342,6 +342,12 @@ mono_jiterp_stackval_from_data (MonoType *type, stackval *result, const void *da
gpointer
mono_jiterp_frame_data_allocator_alloc (FrameDataAllocator *stack, InterpFrame *frame, int size);

gpointer
mono_jiterp_get_simd_intrinsic (int arity, int index);

int
mono_jiterp_get_simd_opcode (int arity, int index);

#endif

static inline int
Expand Down
266 changes: 185 additions & 81 deletions src/mono/mono/mini/interp/interp-simd-intrins.def

Large diffs are not rendered by default.

137 changes: 128 additions & 9 deletions src/mono/mono/mini/interp/interp-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
#include "interp-internals.h"
#include "interp-simd.h"

#if HOST_BROWSER
#include <wasm_simd128.h>
#endif

#ifdef INTERP_ENABLE_SIMD

typedef gint64 v128_i8 __attribute__ ((vector_size (SIZEOF_V128)));
Expand All @@ -12,6 +16,7 @@ typedef gint16 v128_i2 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint16 v128_u2 __attribute__ ((vector_size (SIZEOF_V128)));
typedef gint8 v128_i1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint8 v128_u1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef float v128_r4 __attribute__ ((vector_size (SIZEOF_V128)));

// get_AllBitsSet
static void
Expand Down Expand Up @@ -39,6 +44,12 @@ interp_v128_i4_op_addition (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 + *(v128_i4*)v2;
}

static void
interp_v128_r4_op_addition (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 + *(v128_r4*)v2;
}

// op_Subtraction
static void
interp_v128_i1_op_subtraction (gpointer res, gpointer v1, gpointer v2)
Expand All @@ -58,6 +69,12 @@ interp_v128_i4_op_subtraction (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 - *(v128_i4*)v2;
}

static void
interp_v128_r4_op_subtraction (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 - *(v128_r4*)v2;
}

// op_BitwiseAnd
static void
interp_v128_op_bitwise_and (gpointer res, gpointer v1, gpointer v2)
Expand Down Expand Up @@ -124,6 +141,18 @@ interp_v128_i4_op_multiply (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 * *(v128_i4*)v2;
}

static void
interp_v128_r4_op_multiply (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 * *(v128_r4*)v2;
}

static void
interp_v128_r4_op_division (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 / *(v128_r4*)v2;
}

// op_UnaryNegation
static void
interp_v128_i1_op_negation (gpointer res, gpointer v1)
Expand Down Expand Up @@ -535,32 +564,122 @@ interp_v128_i8_shuffle (gpointer res, gpointer v1, gpointer v2)
V128_SHUFFLE (gint64, guint64);
}

#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

// For the wasm packed simd intrinsics we want to automatically generate the C implementations from
// their corresponding clang intrinsics. See also:
// https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/wasm_simd128.h
// In this context V means Vector128 and P means void* pointer.
#ifdef HOST_BROWSER

static v128_t
_interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
g_assert_not_reached ();
}

#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (v1); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
*((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
}

#include "interp-simd-intrins.def"

#undef INTERP_WASM_SIMD_INTRINSIC_V_P
#undef INTERP_WASM_SIMD_INTRINSIC_V_V
#undef INTERP_WASM_SIMD_INTRINSIC_I_V
#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV

// Now generate the wasm opcode tables for the intrinsics

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) c,

int interp_simd_p_p_wasm_opcode_table [] = {
#include "interp-simd-intrins.def"
};

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) c,

int interp_simd_p_pp_wasm_opcode_table [] = {
#include "interp-simd-intrins.def"
};

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) c,

int interp_simd_p_ppp_wasm_opcode_table [] = {
#include "interp-simd-intrins.def"
};

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#endif // HOST_BROWSER

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b) b,
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) b,
PP_SIMD_Method interp_simd_p_p_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b) b,
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) b,
PPP_SIMD_Method interp_simd_p_pp_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b) b,
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) b,
PPPP_SIMD_Method interp_simd_p_ppp_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#endif // INTERP_ENABLE_SIMD
6 changes: 6 additions & 0 deletions src/mono/mono/mini/interp/interp-simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ extern PP_SIMD_Method interp_simd_p_p_table [];
extern PPP_SIMD_Method interp_simd_p_pp_table [];
extern PPPP_SIMD_Method interp_simd_p_ppp_table [];

#if HOST_BROWSER
extern int interp_simd_p_p_wasm_opcode_table [];
extern int interp_simd_p_pp_wasm_opcode_table [];
extern int interp_simd_p_ppp_wasm_opcode_table [];
#endif

#endif /* __MONO_MINI_INTERP_SIMD_H__ */


38 changes: 38 additions & 0 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -8907,4 +8907,42 @@ mono_jiterp_enum_hasflag (MonoClass *klass, gint32 *dest, stackval *sp1, stackva
*dest = mono_interp_enum_hasflag (sp1, sp2, klass);
}

EMSCRIPTEN_KEEPALIVE gpointer
mono_jiterp_get_simd_intrinsic (int arity, int index)
{
#ifdef INTERP_ENABLE_SIMD
switch (arity) {
case 1:
return interp_simd_p_p_table [index];
case 2:
return interp_simd_p_pp_table [index];
case 3:
return interp_simd_p_ppp_table [index];
default:
g_assert_not_reached();
}
#else
g_assert_not_reached();
#endif
}

EMSCRIPTEN_KEEPALIVE int
mono_jiterp_get_simd_opcode (int arity, int index)
{
#ifdef INTERP_ENABLE_SIMD
switch (arity) {
case 1:
return interp_simd_p_p_wasm_opcode_table [index];
case 2:
return interp_simd_p_pp_wasm_opcode_table [index];
case 3:
return interp_simd_p_ppp_wasm_opcode_table [index];
default:
g_assert_not_reached();
}
#else
g_assert_not_reached();
#endif
}

#endif
18 changes: 9 additions & 9 deletions src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,35 +41,35 @@ typedef enum {

/* SIMD opcodes, grouped by signature */

#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b) a,
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
} MintSIMDOpsPP;
#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b) a,
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
INTERP_SIMD_INTRINSIC_P_PP_LAST
} MintSIMDOpsPPP;
#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b) a,
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
INTERP_SIMD_INTRINSIC_P_PPP_LAST
} MintSIMDOpsPPPP;
#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#if NO_UNALIGNED_ACCESS
# if G_BYTE_ORDER == G_LITTLE_ENDIAN
Expand Down
20 changes: 20 additions & 0 deletions src/mono/mono/mini/interp/simd-methods.def
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
SIMD_METHOD(get_Count)
SIMD_METHOD(get_AllBitsSet)
SIMD_METHOD(get_IsHardwareAccelerated)
SIMD_METHOD(get_IsSupported)
SIMD_METHOD(get_Item)
SIMD_METHOD(get_One)
SIMD_METHOD(get_Zero)
SIMD_METHOD(op_Addition)
SIMD_METHOD(op_BitwiseAnd)
SIMD_METHOD(op_BitwiseOr)
SIMD_METHOD(op_Division)
SIMD_METHOD(op_Equality)
SIMD_METHOD(op_ExclusiveOr)
SIMD_METHOD(op_Explicit)
Expand All @@ -24,6 +26,7 @@ SIMD_METHOD(ConditionalSelect)
SIMD_METHOD(Create)
SIMD_METHOD(CreateScalar)
SIMD_METHOD(CreateScalarUnsafe)

SIMD_METHOD(Equals)
SIMD_METHOD(ExtractMostSignificantBits)
SIMD_METHOD(GreaterThan)
Expand All @@ -36,3 +39,20 @@ SIMD_METHOD(ShiftRightLogical)
SIMD_METHOD(Shuffle)
SIMD_METHOD(WidenLower)
SIMD_METHOD(WidenUpper)

// PackedSimd
SIMD_METHOD(Splat)
SIMD_METHOD(ExtractLane)
SIMD_METHOD(ReplaceLane)
SIMD_METHOD(Swizzle)
SIMD_METHOD(Add)
SIMD_METHOD(Subtract)
SIMD_METHOD(Multiply)
SIMD_METHOD(Dot)
SIMD_METHOD(Negate)
SIMD_METHOD(And)
SIMD_METHOD(Bitmask)
SIMD_METHOD(CompareEqual)
SIMD_METHOD(CompareNotEqual)
SIMD_METHOD(ConvertNarrowingSignedSaturate)
SIMD_METHOD(ConvertNarrowingUnsignedSaturate)
Loading

0 comments on commit 7c75cbf

Please sign in to comment.