-
Couldn't load subscription status.
- Fork 320
wasmi_core: add support for the Wasm simd proposal
#1395
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #1395 +/- ##
==========================================
- Coverage 70.52% 69.35% -1.18%
==========================================
Files 157 158 +1
Lines 14414 14695 +281
==========================================
+ Hits 10165 10191 +26
- Misses 4249 4504 +255 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
This is the skeleton with which is it going to be possible to implement most of the V128 API in an efficient way.
0095b78 to
53e526f
Compare
This will help later to remove the lint again.
This now exposes `load`, `load_at`, `store` and `store_at` functions additionally to the load_extend and store_wrap ones. This way we no longer require the weird `WrapInto` and `ExtendInto` impls for `T -> T`.
This allows to infer the LaneIdx type from more types such as i8, u32, f64, etc..
|
The The next step is to add Wasm |
The new `simd` crate feature is a full replacement for it.
- Fixes an overflow issue in the avgr_u SIMD instructions. - Now uses div_ceil as suggested by clippy. - Deduplicated logic via macro.
simd proposalwasmi_core: add support for the Wasm simd proposal
Implements
wasmi_corepart of #1364.This PR implements the
simdsubmodule inwasmi_corewhich provides basic types and functionality for Wasmsimdproposal support in Wasmi. This includes theV128type, several lane types, and the entire Wasmsimdproposal API which can then be used in Wasmi in const-evaluation, execution and initializer expressions.value128crate feature fromwasmi_core.simdcrate feature towasmi_core.ToDo: Instructions
simdInstructionv128.const(imm: ImmByte[16]) -> v128i8x16.splat(x: i32) -> v128i16x8.splat(x: i32) -> v128i32x4.splat(x: i32) -> v128i64x2.splat(x: i64) -> v128f32x4.splat(x: f32) -> v128f64x2.splat(x: f64) -> v128i8x16.extract_lane_s(a: v128, imm: ImmLaneIdx16) -> i32i8x16.extract_lane_u(a: v128, imm: ImmLaneIdx16) -> i32i16x8.extract_lane_s(a: v128, imm: ImmLaneIdx8) -> i32i16x8.extract_lane_u(a: v128, imm: ImmLaneIdx8) -> i32i32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> i32i64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> i64f32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> f32f64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> f64i8x16.replace_lane(a: v128, imm: ImmLaneIdx16, x: i32) -> v128i16x8.replace_lane(a: v128, imm: ImmLaneIdx8, x: i32) -> v128i32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: i32) -> v128i64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: i64) -> v128f32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: f32) -> v128f64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: f64) -> v128i8x16.shuffle(a: v128, b: v128, imm: ImmLaneIdx32[16]) -> v128i8x16.swizzle(a: v128, s: v128) -> v128i8x16.add(a: v128, b: v128) -> v128i16x8.add(a: v128, b: v128) -> v128i32x4.add(a: v128, b: v128) -> v128i64x2.add(a: v128, b: v128) -> v128i8x16.sub(a: v128, b: v128) -> v128i16x8.sub(a: v128, b: v128) -> v128i32x4.sub(a: v128, b: v128) -> v128i64x2.sub(a: v128, b: v128) -> v128i16x8.mul(a: v128, b: v128) -> v128i32x4.mul(a: v128, b: v128) -> v128i64x2.mul(a: v128, b: v128) -> v128i32x4.dot_i16x8_s(a: v128, b: v128) -> v128i8x16.neg(a: v128) -> v128i16x8.neg(a: v128) -> v128i32x4.neg(a: v128) -> v128i64x2.neg(a: v128) -> v128i16x8.extmul_low_i8x16_s(a: v128, b: v128) -> v128i16x8.extmul_high_i8x16_s(a: v128, b: v128) -> v128i16x8.extmul_low_i8x16_u(a: v128, b: v128) -> v128i16x8.extmul_high_i8x16_u(a: v128, b: v128) -> v128i32x4.extmul_low_i16x8_s(a: v128, b: v128) -> v128i32x4.extmul_high_i16x8_s(a: v128, b: v128) -> v128i32x4.extmul_low_i16x8_u(a: v128, b: v128) -> v128i32x4.extmul_high_i16x8_u(a: v128, b: v128) -> v128i64x2.extmul_low_i32x4_s(a: v128, b: v128) -> v128i64x2.extmul_high_i32x4_s(a: v128, b: v128) -> v128i64x2.extmul_low_i32x4_u(a: v128, b: v128) -> v128i64x2.extmul_high_i32x4_u(a: v128, b: v128) -> v128i16x8.extadd_pairwise_i8x16_s(a: v128) -> v128i16x8.extadd_pairwise_i8x16_u(a: v128) -> v128i32x4.extadd_pairwise_i16x8_s(a: v128) -> v128i32x4.extadd_pairwise_i16x8_u(a: v128) -> v128i8x16.add_sat_s(a: v128, b: v128) -> v128i8x16.add_sat_u(a: v128, b: v128) -> v128i16x8.add_sat_s(a: v128, b: v128) -> v128i16x8.add_sat_u(a: v128, b: v128) -> v128i8x16.sub_sat_s(a: v128, b: v128) -> v128i8x16.sub_sat_u(a: v128, b: v128) -> v128i16x8.sub_sat_s(a: v128, b: v128) -> v128i16x8.sub_sat_u(a: v128, b: v128) -> v128i16x8.q15mulr_sat_s(a: v128, b: v128) -> v128i8x16.min_s(a: v128, b: v128) -> v128i8x16.min_u(a: v128, b: v128) -> v128i16x8.min_s(a: v128, b: v128) -> v128i16x8.min_u(a: v128, b: v128) -> v128i32x4.min_s(a: v128, b: v128) -> v128i32x4.min_u(a: v128, b: v128) -> v128i8x16.max_s(a: v128, b: v128) -> v128i8x16.max_u(a: v128, b: v128) -> v128i16x8.max_s(a: v128, b: v128) -> v128i16x8.max_u(a: v128, b: v128) -> v128i32x4.max_s(a: v128, b: v128) -> v128i32x4.max_u(a: v128, b: v128) -> v128i8x16.avgr_u(a: v128, b: v128) -> v128i16x8.avgr_u(a: v128, b: v128) -> v128i8x16.abs(a: v128) -> v128i16x8.abs(a: v128) -> v128i32x4.abs(a: v128) -> v128i64x2.abs(a: v128) -> v128i8x16.shl(a: v128, y: i32) -> v128i16x8.shl(a: v128, y: i32) -> v128i32x4.shl(a: v128, y: i32) -> v128i64x2.shl(a: v128, y: i32) -> v128i8x16.shr_s(a: v128, y: i32) -> v128i8x16.shr_u(a: v128, y: i32) -> v128i16x8.shr_s(a: v128, y: i32) -> v128i16x8.shr_u(a: v128, y: i32) -> v128i32x4.shr_s(a: v128, y: i32) -> v128i32x4.shr_u(a: v128, y: i32) -> v128i64x2.shr_s(a: v128, y: i32) -> v128i64x2.shr_u(a: v128, y: i32) -> v128v128.and(a: v128, b: v128) -> v128v128.or(a: v128, b: v128) -> v128v128.xor(a: v128, b: v128) -> v128v128.not(a: v128) -> v128v128.andnot(a: v128, b: v128) -> v128v128.bitselect(v1: v128, v2: v128, c: v128) -> v128i8x16.popcnt(v: v128) -> v128v128.any_true(a: v128) -> i32i8x16.all_true(a: v128) -> i32i16x8.all_true(a: v128) -> i32i32x4.all_true(a: v128) -> i32i64x2.all_true(a: v128) -> i32i8x16.bitmask(a: v128) -> i32i16x8.bitmask(a: v128) -> i32i32x4.bitmask(a: v128) -> i32i64x2.bitmask(a: v128) -> i32i8x16.eq(a: v128, b: v128) -> v128i16x8.eq(a: v128, b: v128) -> v128i32x4.eq(a: v128, b: v128) -> v128i64x2.eq(a: v128, b: v128) -> v128f32x4.eq(a: v128, b: v128) -> v128f64x2.eq(a: v128, b: v128) -> v128i8x16.ne(a: v128, b: v128) -> v128i16x8.ne(a: v128, b: v128) -> v128i32x4.ne(a: v128, b: v128) -> v128i64x2.ne(a: v128, b: v128) -> v128f32x4.ne(a: v128, b: v128) -> v128f64x2.ne(a: v128, b: v128) -> v128i8x16.lt_s(a: v128, b: v128) -> v128i8x16.lt_u(a: v128, b: v128) -> v128i16x8.lt_s(a: v128, b: v128) -> v128i16x8.lt_u(a: v128, b: v128) -> v128i32x4.lt_s(a: v128, b: v128) -> v128i32x4.lt_u(a: v128, b: v128) -> v128i64x2.lt_s(a: v128, b: v128) -> v128f32x4.lt(a: v128, b: v128) -> v128f64x2.lt(a: v128, b: v128) -> v128i8x16.le_s(a: v128, b: v128) -> v128i8x16.le_u(a: v128, b: v128) -> v128i16x8.le_s(a: v128, b: v128) -> v128i16x8.le_u(a: v128, b: v128) -> v128i32x4.le_s(a: v128, b: v128) -> v128i32x4.le_u(a: v128, b: v128) -> v128i64x2.le_s(a: v128, b: v128) -> v128f32x4.le(a: v128, b: v128) -> v128f64x2.le(a: v128, b: v128) -> v128i8x16.gt_s(a: v128, b: v128) -> v128i8x16.gt_u(a: v128, b: v128) -> v128i16x8.gt_s(a: v128, b: v128) -> v128i16x8.gt_u(a: v128, b: v128) -> v128i32x4.gt_s(a: v128, b: v128) -> v128i32x4.gt_u(a: v128, b: v128) -> v128i64x2.gt_s(a: v128, b: v128) -> v128f32x4.gt(a: v128, b: v128) -> v128f64x2.gt(a: v128, b: v128) -> v128i8x16.ge_s(a: v128, b: v128) -> v128i8x16.ge_u(a: v128, b: v128) -> v128i16x8.ge_s(a: v128, b: v128) -> v128i16x8.ge_u(a: v128, b: v128) -> v128i32x4.ge_s(a: v128, b: v128) -> v128i32x4.ge_u(a: v128, b: v128) -> v128i64x2.ge_s(a: v128, b: v128) -> v128f32x4.ge(a: v128, b: v128) -> v128f64x2.ge(a: v128, b: v128) -> v128v128.load(m: memarg) -> v128v128.load32_zero(m: memarg) -> v128v128.load64_zero(m: memarg) -> v128v128.load8_splat(m: memarg) -> v128v128.load16_splat(m: memarg) -> v128v128.load32_splat(m: memarg) -> v128v128.load64_splat(m: memarg) -> v128v128.load8_lane(m: memarg, x: v128, imm: ImmLaneIdx16) -> v128v128.load16_lane(m: memarg, x: v128, imm: ImmLaneIdx8) -> v128v128.load32_lane(m: memarg, x: v128, imm: ImmLaneIdx4) -> v128v128.load64_lane(m: memarg, x: v128, imm: ImmLaneIdx2) -> v128v128.load8x8_s(m: memarg)v128.load8x8_u(m: memarg)v128.load16x4_s(m: memarg)v128.load16x4_u(m: memarg)v128.load32x2_s(m: memarg)v128.load32x2_u(m: memarg)v128.store(m: memarg, data: v128)v128.store8_lane(m: memarg, data: v128, imm: ImmLaneIdx16)v128.store16_lane(m: memarg, data: v128, imm: ImmLaneIdx8)v128.store32_lane(m: memarg, data: v128, imm: ImmLaneIdx4)v128.store64_lane(m: memarg, data: v128, imm: ImmLaneIdx2)f32x4.neg(a: v128) -> v128f64x2.neg(a: v128) -> v128f32x4.abs(a: v128) -> v128f64x2.abs(a: v128) -> v128f32x4.min(a: v128, b: v128) -> v128f64x2.min(a: v128, b: v128) -> v128f32x4.max(a: v128, b: v128) -> v128f64x2.max(a: v128, b: v128) -> v128f32x4.pmin(a: v128, b: v128) -> v128f64x2.pmin(a: v128, b: v128) -> v128f32x4.pmax(a: v128, b: v128) -> v128f64x2.pmax(a: v128, b: v128) -> v128f32x4.add(a: v128, b: v128) -> v128f64x2.add(a: v128, b: v128) -> v128f32x4.sub(a: v128, b: v128) -> v128f64x2.sub(a: v128, b: v128) -> v128f32x4.div(a: v128, b: v128) -> v128f64x2.div(a: v128, b: v128) -> v128f32x4.mul(a: v128, b: v128) -> v128f64x2.mul(a: v128, b: v128) -> v128f32x4.sqrt(a: v128) -> v128f64x2.sqrt(a: v128) -> v128f32x4.ceil(a: v128) -> v128f64x2.ceil(a: v128) -> v128f32x4.floor(a: v128) -> v128f64x2.floor(a: v128) -> v128f32x4.trunc(a: v128) -> v128f64x2.trunc(a: v128) -> v128f32x4.nearest(a: v128) -> v128f64x2.nearest(a: v128) -> v128f32x4.convert_i32x4_s(a: v128) -> v128f32x4.convert_i32x4_u(a: v128) -> v128f64x2.convert_low_i32x4_s(a: v128) -> v128f64x2.convert_low_i32x4_u(a: v128) -> v128i32x4.trunc_sat_f32x4_s(a: v128) -> v128i32x4.trunc_sat_f32x4_u(a: v128) -> v128i32x4.trunc_sat_f64x2_s_zero(a: v128) -> v128i32x4.trunc_sat_f64x2_u_zero(a: v128) -> v128f32x4.demote_f64x2_zero(a: v128) -> v128f64x2.promote_low_f32x4(a: v128) -> v128i8x16.narrow_i16x8_s(a: v128, b: v128) -> v128i8x16.narrow_i16x8_u(a: v128, b: v128) -> v128i16x8.narrow_i32x4_s(a: v128, b: v128) -> v128i16x8.narrow_i32x4_u(a: v128, b: v128) -> v128i16x8.extend_low_i8x16_s(a: v128) -> v128i16x8.extend_high_i8x16_s(a: v128) -> v128i16x8.extend_low_i8x16_u(a: v128) -> v128i16x8.extend_high_i8x16_u(a: v128) -> v128i32x4.extend_low_i16x8_s(a: v128) -> v128i32x4.extend_high_i16x8_s(a: v128) -> v128i32x4.extend_low_i16x8_u(a: v128) -> v128i32x4.extend_high_i16x8_u(a: v128) -> v128i64x2.extend_low_i32x4_s(a: v128) -> v128i64x2.extend_high_i32x4_s(a: v128) -> v128i64x2.extend_low_i32x4_u(a: v128) -> v128i64x2.extend_high_i32x4_u(a: v128) -> v128