-
Couldn't load subscription status.
- Fork 320
wasmi_ir: add support for the Wasm simd proposal
#1409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #1409 +/- ##
==========================================
- Coverage 69.36% 69.35% -0.01%
==========================================
Files 158 158
Lines 14698 14702 +4
==========================================
+ Hits 10195 10197 +2
- Misses 4503 4505 +2 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
wasmi_ir: add support for Wasm simd proposalwasmi_ir: add support for the Wasm simd proposal
|
The PR now implements all of the Wasm Additionally, Wasmi may implement some lowerings in the future. For example lowerings from |
Implements
wasmi_irpart of #1364.simdcrate feature towasmi_ir.Instructionvariants for all Wasmsimdinstructions whensimdcrate feature is enabled.Link to Wasm 3.0 Spec: https://webassembly.github.io/spec/core/appendix/index-instructions.html
🟡 Means that the Wasm instruction has no Wasmi instruction counterpart by design.
simdInstructionv128.const(imm: ImmByte[16]) -> v128i8x16.splat(x: i32) -> v128i16x8.splat(x: i32) -> v128i32x4.splat(x: i32) -> v128i64x2.splat(x: i64) -> v128f32x4.splat(x: f32) -> v128f64x2.splat(x: f64) -> v128i8x16.extract_lane_s(a: v128, imm: ImmLaneIdx16) -> i32i8x16.extract_lane_u(a: v128, imm: ImmLaneIdx16) -> i32i16x8.extract_lane_s(a: v128, imm: ImmLaneIdx8) -> i32i16x8.extract_lane_u(a: v128, imm: ImmLaneIdx8) -> i32i32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> i32i64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> i64f32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> f32f64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> f64i8x16.replace_lane(a: v128, imm: ImmLaneIdx16, x: i32) -> v128i16x8.replace_lane(a: v128, imm: ImmLaneIdx8, x: i32) -> v128i32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: i32) -> v128i64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: i64) -> v128f32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: f32) -> v128f64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: f64) -> v128i8x16.shuffle(a: v128, b: v128, imm: ImmLaneIdx32[16]) -> v128i8x16.swizzle(a: v128, s: v128) -> v128i8x16.add(a: v128, b: v128) -> v128i16x8.add(a: v128, b: v128) -> v128i32x4.add(a: v128, b: v128) -> v128i64x2.add(a: v128, b: v128) -> v128i8x16.sub(a: v128, b: v128) -> v128i16x8.sub(a: v128, b: v128) -> v128i32x4.sub(a: v128, b: v128) -> v128i64x2.sub(a: v128, b: v128) -> v128i16x8.mul(a: v128, b: v128) -> v128i32x4.mul(a: v128, b: v128) -> v128i64x2.mul(a: v128, b: v128) -> v128i32x4.dot_i16x8_s(a: v128, b: v128) -> v128i8x16.neg(a: v128) -> v128i16x8.neg(a: v128) -> v128i32x4.neg(a: v128) -> v128i64x2.neg(a: v128) -> v128i16x8.extmul_low_i8x16_s(a: v128, b: v128) -> v128i16x8.extmul_high_i8x16_s(a: v128, b: v128) -> v128i16x8.extmul_low_i8x16_u(a: v128, b: v128) -> v128i16x8.extmul_high_i8x16_u(a: v128, b: v128) -> v128i32x4.extmul_low_i16x8_s(a: v128, b: v128) -> v128i32x4.extmul_high_i16x8_s(a: v128, b: v128) -> v128i32x4.extmul_low_i16x8_u(a: v128, b: v128) -> v128i32x4.extmul_high_i16x8_u(a: v128, b: v128) -> v128i64x2.extmul_low_i32x4_s(a: v128, b: v128) -> v128i64x2.extmul_high_i32x4_s(a: v128, b: v128) -> v128i64x2.extmul_low_i32x4_u(a: v128, b: v128) -> v128i64x2.extmul_high_i32x4_u(a: v128, b: v128) -> v128i16x8.extadd_pairwise_i8x16_s(a: v128) -> v128i16x8.extadd_pairwise_i8x16_u(a: v128) -> v128i32x4.extadd_pairwise_i16x8_s(a: v128) -> v128i32x4.extadd_pairwise_i16x8_u(a: v128) -> v128i8x16.add_sat_s(a: v128, b: v128) -> v128i8x16.add_sat_u(a: v128, b: v128) -> v128i16x8.add_sat_s(a: v128, b: v128) -> v128i16x8.add_sat_u(a: v128, b: v128) -> v128i8x16.sub_sat_s(a: v128, b: v128) -> v128i8x16.sub_sat_u(a: v128, b: v128) -> v128i16x8.sub_sat_s(a: v128, b: v128) -> v128i16x8.sub_sat_u(a: v128, b: v128) -> v128i16x8.q15mulr_sat_s(a: v128, b: v128) -> v128i8x16.min_s(a: v128, b: v128) -> v128i8x16.min_u(a: v128, b: v128) -> v128i16x8.min_s(a: v128, b: v128) -> v128i16x8.min_u(a: v128, b: v128) -> v128i32x4.min_s(a: v128, b: v128) -> v128i32x4.min_u(a: v128, b: v128) -> v128i8x16.max_s(a: v128, b: v128) -> v128i8x16.max_u(a: v128, b: v128) -> v128i16x8.max_s(a: v128, b: v128) -> v128i16x8.max_u(a: v128, b: v128) -> v128i32x4.max_s(a: v128, b: v128) -> v128i32x4.max_u(a: v128, b: v128) -> v128i8x16.avgr_u(a: v128, b: v128) -> v128i16x8.avgr_u(a: v128, b: v128) -> v128i8x16.abs(a: v128) -> v128i16x8.abs(a: v128) -> v128i32x4.abs(a: v128) -> v128i64x2.abs(a: v128) -> v128i8x16.shl(a: v128, y: i32) -> v128i16x8.shl(a: v128, y: i32) -> v128i32x4.shl(a: v128, y: i32) -> v128i64x2.shl(a: v128, y: i32) -> v128i8x16.shr_s(a: v128, y: i32) -> v128i8x16.shr_u(a: v128, y: i32) -> v128i16x8.shr_s(a: v128, y: i32) -> v128i16x8.shr_u(a: v128, y: i32) -> v128i32x4.shr_s(a: v128, y: i32) -> v128i32x4.shr_u(a: v128, y: i32) -> v128i64x2.shr_s(a: v128, y: i32) -> v128i64x2.shr_u(a: v128, y: i32) -> v128v128.and(a: v128, b: v128) -> v128v128.or(a: v128, b: v128) -> v128v128.xor(a: v128, b: v128) -> v128v128.not(a: v128) -> v128v128.andnot(a: v128, b: v128) -> v128v128.bitselect(v1: v128, v2: v128, c: v128) -> v128i8x16.popcnt(v: v128) -> v128v128.any_true(a: v128) -> i32i8x16.all_true(a: v128) -> i32i16x8.all_true(a: v128) -> i32i32x4.all_true(a: v128) -> i32i64x2.all_true(a: v128) -> i32i8x16.bitmask(a: v128) -> i32i16x8.bitmask(a: v128) -> i32i32x4.bitmask(a: v128) -> i32i64x2.bitmask(a: v128) -> i32i8x16.eq(a: v128, b: v128) -> v128i16x8.eq(a: v128, b: v128) -> v128i32x4.eq(a: v128, b: v128) -> v128i64x2.eq(a: v128, b: v128) -> v128f32x4.eq(a: v128, b: v128) -> v128f64x2.eq(a: v128, b: v128) -> v128i8x16.ne(a: v128, b: v128) -> v128i16x8.ne(a: v128, b: v128) -> v128i32x4.ne(a: v128, b: v128) -> v128i64x2.ne(a: v128, b: v128) -> v128f32x4.ne(a: v128, b: v128) -> v128f64x2.ne(a: v128, b: v128) -> v128i8x16.lt_s(a: v128, b: v128) -> v128i8x16.lt_u(a: v128, b: v128) -> v128i16x8.lt_s(a: v128, b: v128) -> v128i16x8.lt_u(a: v128, b: v128) -> v128i32x4.lt_s(a: v128, b: v128) -> v128i32x4.lt_u(a: v128, b: v128) -> v128i64x2.lt_s(a: v128, b: v128) -> v128f32x4.lt(a: v128, b: v128) -> v128f64x2.lt(a: v128, b: v128) -> v128i8x16.le_s(a: v128, b: v128) -> v128i8x16.le_u(a: v128, b: v128) -> v128i16x8.le_s(a: v128, b: v128) -> v128i16x8.le_u(a: v128, b: v128) -> v128i32x4.le_s(a: v128, b: v128) -> v128i32x4.le_u(a: v128, b: v128) -> v128i64x2.le_s(a: v128, b: v128) -> v128f32x4.le(a: v128, b: v128) -> v128f64x2.le(a: v128, b: v128) -> v128i8x16.gt_s(a: v128, b: v128) -> v128i8x16.gt_u(a: v128, b: v128) -> v128i16x8.gt_s(a: v128, b: v128) -> v128i16x8.gt_u(a: v128, b: v128) -> v128i32x4.gt_s(a: v128, b: v128) -> v128i32x4.gt_u(a: v128, b: v128) -> v128i64x2.gt_s(a: v128, b: v128) -> v128f32x4.gt(a: v128, b: v128) -> v128f64x2.gt(a: v128, b: v128) -> v128i8x16.ge_s(a: v128, b: v128) -> v128i8x16.ge_u(a: v128, b: v128) -> v128i16x8.ge_s(a: v128, b: v128) -> v128i16x8.ge_u(a: v128, b: v128) -> v128i32x4.ge_s(a: v128, b: v128) -> v128i32x4.ge_u(a: v128, b: v128) -> v128i64x2.ge_s(a: v128, b: v128) -> v128f32x4.ge(a: v128, b: v128) -> v128f64x2.ge(a: v128, b: v128) -> v128v128.load(m: memarg) -> v128v128.load32_zero(m: memarg) -> v128v128.load64_zero(m: memarg) -> v128v128.load8_splat(m: memarg) -> v128v128.load16_splat(m: memarg) -> v128v128.load32_splat(m: memarg) -> v128v128.load64_splat(m: memarg) -> v128v128.load8_lane(m: memarg, x: v128, imm: ImmLaneIdx16) -> v128v128.load16_lane(m: memarg, x: v128, imm: ImmLaneIdx8) -> v128v128.load32_lane(m: memarg, x: v128, imm: ImmLaneIdx4) -> v128v128.load64_lane(m: memarg, x: v128, imm: ImmLaneIdx2) -> v128v128.load8x8_s(m: memarg)v128.load8x8_u(m: memarg)v128.load16x4_s(m: memarg)v128.load16x4_u(m: memarg)v128.load32x2_s(m: memarg)v128.load32x2_u(m: memarg)v128.store(m: memarg, data: v128)v128.store8_lane(m: memarg, data: v128, imm: ImmLaneIdx16)v128.store16_lane(m: memarg, data: v128, imm: ImmLaneIdx8)v128.store32_lane(m: memarg, data: v128, imm: ImmLaneIdx4)v128.store64_lane(m: memarg, data: v128, imm: ImmLaneIdx2)f32x4.neg(a: v128) -> v128f64x2.neg(a: v128) -> v128f32x4.abs(a: v128) -> v128f64x2.abs(a: v128) -> v128f32x4.min(a: v128, b: v128) -> v128f64x2.min(a: v128, b: v128) -> v128f32x4.max(a: v128, b: v128) -> v128f64x2.max(a: v128, b: v128) -> v128f32x4.pmin(a: v128, b: v128) -> v128f64x2.pmin(a: v128, b: v128) -> v128f32x4.pmax(a: v128, b: v128) -> v128f64x2.pmax(a: v128, b: v128) -> v128f32x4.add(a: v128, b: v128) -> v128f64x2.add(a: v128, b: v128) -> v128f32x4.sub(a: v128, b: v128) -> v128f64x2.sub(a: v128, b: v128) -> v128f32x4.div(a: v128, b: v128) -> v128f64x2.div(a: v128, b: v128) -> v128f32x4.mul(a: v128, b: v128) -> v128f64x2.mul(a: v128, b: v128) -> v128f32x4.sqrt(a: v128) -> v128f64x2.sqrt(a: v128) -> v128f32x4.ceil(a: v128) -> v128f64x2.ceil(a: v128) -> v128f32x4.floor(a: v128) -> v128f64x2.floor(a: v128) -> v128f32x4.trunc(a: v128) -> v128f64x2.trunc(a: v128) -> v128f32x4.nearest(a: v128) -> v128f64x2.nearest(a: v128) -> v128f32x4.convert_i32x4_s(a: v128) -> v128f32x4.convert_i32x4_u(a: v128) -> v128f64x2.convert_low_i32x4_s(a: v128) -> v128f64x2.convert_low_i32x4_u(a: v128) -> v128i32x4.trunc_sat_f32x4_s(a: v128) -> v128i32x4.trunc_sat_f32x4_u(a: v128) -> v128i32x4.trunc_sat_f64x2_s_zero(a: v128) -> v128i32x4.trunc_sat_f64x2_u_zero(a: v128) -> v128f32x4.demote_f64x2_zero(a: v128) -> v128f64x2.promote_low_f32x4(a: v128) -> v128i8x16.narrow_i16x8_s(a: v128, b: v128) -> v128i8x16.narrow_i16x8_u(a: v128, b: v128) -> v128i16x8.narrow_i32x4_s(a: v128, b: v128) -> v128i16x8.narrow_i32x4_u(a: v128, b: v128) -> v128i16x8.extend_low_i8x16_s(a: v128) -> v128i16x8.extend_high_i8x16_s(a: v128) -> v128i16x8.extend_low_i8x16_u(a: v128) -> v128i16x8.extend_high_i8x16_u(a: v128) -> v128i32x4.extend_low_i16x8_s(a: v128) -> v128i32x4.extend_high_i16x8_s(a: v128) -> v128i32x4.extend_low_i16x8_u(a: v128) -> v128i32x4.extend_high_i16x8_u(a: v128) -> v128i64x2.extend_low_i32x4_s(a: v128) -> v128i64x2.extend_high_i32x4_s(a: v128) -> v128i64x2.extend_low_i32x4_u(a: v128) -> v128i64x2.extend_high_i32x4_u(a: v128) -> v128