Skip to content

Conversation

@Robbepop
Copy link
Member

@Robbepop Robbepop commented Mar 21, 2025

Implements wasmi_ir part of #1364.

  • Adds the simd crate feature to wasmi_ir.
  • Adds Instruction variants for all Wasm simd instructions when simd crate feature is enabled.

Link to Wasm 3.0 Spec: https://webassembly.github.io/spec/core/appendix/index-instructions.html

🟡 Means that the Wasm instruction has no Wasmi instruction counterpart by design.

Status Wasm simd Instruction
🟡 v128.const(imm: ImmByte[16]) -> v128
i8x16.splat(x: i32) -> v128
i16x8.splat(x: i32) -> v128
i32x4.splat(x: i32) -> v128
i64x2.splat(x: i64) -> v128
f32x4.splat(x: f32) -> v128
f64x2.splat(x: f64) -> v128
i8x16.extract_lane_s(a: v128, imm: ImmLaneIdx16) -> i32
i8x16.extract_lane_u(a: v128, imm: ImmLaneIdx16) -> i32
i16x8.extract_lane_s(a: v128, imm: ImmLaneIdx8) -> i32
i16x8.extract_lane_u(a: v128, imm: ImmLaneIdx8) -> i32
i32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> i32
i64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> i64
f32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> f32
f64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> f64
i8x16.replace_lane(a: v128, imm: ImmLaneIdx16, x: i32) -> v128
i16x8.replace_lane(a: v128, imm: ImmLaneIdx8, x: i32) -> v128
i32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: i32) -> v128
i64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: i64) -> v128
f32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: f32) -> v128
f64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: f64) -> v128
i8x16.shuffle(a: v128, b: v128, imm: ImmLaneIdx32[16]) -> v128
i8x16.swizzle(a: v128, s: v128) -> v128
i8x16.add(a: v128, b: v128) -> v128
i16x8.add(a: v128, b: v128) -> v128
i32x4.add(a: v128, b: v128) -> v128
i64x2.add(a: v128, b: v128) -> v128
i8x16.sub(a: v128, b: v128) -> v128
i16x8.sub(a: v128, b: v128) -> v128
i32x4.sub(a: v128, b: v128) -> v128
i64x2.sub(a: v128, b: v128) -> v128
i16x8.mul(a: v128, b: v128) -> v128
i32x4.mul(a: v128, b: v128) -> v128
i64x2.mul(a: v128, b: v128) -> v128
i32x4.dot_i16x8_s(a: v128, b: v128) -> v128
i8x16.neg(a: v128) -> v128
i16x8.neg(a: v128) -> v128
i32x4.neg(a: v128) -> v128
i64x2.neg(a: v128) -> v128
i16x8.extmul_low_i8x16_s(a: v128, b: v128) -> v128
i16x8.extmul_high_i8x16_s(a: v128, b: v128) -> v128
i16x8.extmul_low_i8x16_u(a: v128, b: v128) -> v128
i16x8.extmul_high_i8x16_u(a: v128, b: v128) -> v128
i32x4.extmul_low_i16x8_s(a: v128, b: v128) -> v128
i32x4.extmul_high_i16x8_s(a: v128, b: v128) -> v128
i32x4.extmul_low_i16x8_u(a: v128, b: v128) -> v128
i32x4.extmul_high_i16x8_u(a: v128, b: v128) -> v128
i64x2.extmul_low_i32x4_s(a: v128, b: v128) -> v128
i64x2.extmul_high_i32x4_s(a: v128, b: v128) -> v128
i64x2.extmul_low_i32x4_u(a: v128, b: v128) -> v128
i64x2.extmul_high_i32x4_u(a: v128, b: v128) -> v128
i16x8.extadd_pairwise_i8x16_s(a: v128) -> v128
i16x8.extadd_pairwise_i8x16_u(a: v128) -> v128
i32x4.extadd_pairwise_i16x8_s(a: v128) -> v128
i32x4.extadd_pairwise_i16x8_u(a: v128) -> v128
i8x16.add_sat_s(a: v128, b: v128) -> v128
i8x16.add_sat_u(a: v128, b: v128) -> v128
i16x8.add_sat_s(a: v128, b: v128) -> v128
i16x8.add_sat_u(a: v128, b: v128) -> v128
i8x16.sub_sat_s(a: v128, b: v128) -> v128
i8x16.sub_sat_u(a: v128, b: v128) -> v128
i16x8.sub_sat_s(a: v128, b: v128) -> v128
i16x8.sub_sat_u(a: v128, b: v128) -> v128
i16x8.q15mulr_sat_s(a: v128, b: v128) -> v128
i8x16.min_s(a: v128, b: v128) -> v128
i8x16.min_u(a: v128, b: v128) -> v128
i16x8.min_s(a: v128, b: v128) -> v128
i16x8.min_u(a: v128, b: v128) -> v128
i32x4.min_s(a: v128, b: v128) -> v128
i32x4.min_u(a: v128, b: v128) -> v128
i8x16.max_s(a: v128, b: v128) -> v128
i8x16.max_u(a: v128, b: v128) -> v128
i16x8.max_s(a: v128, b: v128) -> v128
i16x8.max_u(a: v128, b: v128) -> v128
i32x4.max_s(a: v128, b: v128) -> v128
i32x4.max_u(a: v128, b: v128) -> v128
i8x16.avgr_u(a: v128, b: v128) -> v128
i16x8.avgr_u(a: v128, b: v128) -> v128
i8x16.abs(a: v128) -> v128
i16x8.abs(a: v128) -> v128
i32x4.abs(a: v128) -> v128
i64x2.abs(a: v128) -> v128
i8x16.shl(a: v128, y: i32) -> v128
i16x8.shl(a: v128, y: i32) -> v128
i32x4.shl(a: v128, y: i32) -> v128
i64x2.shl(a: v128, y: i32) -> v128
i8x16.shr_s(a: v128, y: i32) -> v128
i8x16.shr_u(a: v128, y: i32) -> v128
i16x8.shr_s(a: v128, y: i32) -> v128
i16x8.shr_u(a: v128, y: i32) -> v128
i32x4.shr_s(a: v128, y: i32) -> v128
i32x4.shr_u(a: v128, y: i32) -> v128
i64x2.shr_s(a: v128, y: i32) -> v128
i64x2.shr_u(a: v128, y: i32) -> v128
v128.and(a: v128, b: v128) -> v128
v128.or(a: v128, b: v128) -> v128
v128.xor(a: v128, b: v128) -> v128
v128.not(a: v128) -> v128
v128.andnot(a: v128, b: v128) -> v128
v128.bitselect(v1: v128, v2: v128, c: v128) -> v128
i8x16.popcnt(v: v128) -> v128
v128.any_true(a: v128) -> i32
i8x16.all_true(a: v128) -> i32
i16x8.all_true(a: v128) -> i32
i32x4.all_true(a: v128) -> i32
i64x2.all_true(a: v128) -> i32
i8x16.bitmask(a: v128) -> i32
i16x8.bitmask(a: v128) -> i32
i32x4.bitmask(a: v128) -> i32
i64x2.bitmask(a: v128) -> i32
i8x16.eq(a: v128, b: v128) -> v128
i16x8.eq(a: v128, b: v128) -> v128
i32x4.eq(a: v128, b: v128) -> v128
i64x2.eq(a: v128, b: v128) -> v128
f32x4.eq(a: v128, b: v128) -> v128
f64x2.eq(a: v128, b: v128) -> v128
i8x16.ne(a: v128, b: v128) -> v128
i16x8.ne(a: v128, b: v128) -> v128
i32x4.ne(a: v128, b: v128) -> v128
i64x2.ne(a: v128, b: v128) -> v128
f32x4.ne(a: v128, b: v128) -> v128
f64x2.ne(a: v128, b: v128) -> v128
i8x16.lt_s(a: v128, b: v128) -> v128
i8x16.lt_u(a: v128, b: v128) -> v128
i16x8.lt_s(a: v128, b: v128) -> v128
i16x8.lt_u(a: v128, b: v128) -> v128
i32x4.lt_s(a: v128, b: v128) -> v128
i32x4.lt_u(a: v128, b: v128) -> v128
i64x2.lt_s(a: v128, b: v128) -> v128
f32x4.lt(a: v128, b: v128) -> v128
f64x2.lt(a: v128, b: v128) -> v128
i8x16.le_s(a: v128, b: v128) -> v128
i8x16.le_u(a: v128, b: v128) -> v128
i16x8.le_s(a: v128, b: v128) -> v128
i16x8.le_u(a: v128, b: v128) -> v128
i32x4.le_s(a: v128, b: v128) -> v128
i32x4.le_u(a: v128, b: v128) -> v128
i64x2.le_s(a: v128, b: v128) -> v128
f32x4.le(a: v128, b: v128) -> v128
f64x2.le(a: v128, b: v128) -> v128
i8x16.gt_s(a: v128, b: v128) -> v128
i8x16.gt_u(a: v128, b: v128) -> v128
i16x8.gt_s(a: v128, b: v128) -> v128
i16x8.gt_u(a: v128, b: v128) -> v128
i32x4.gt_s(a: v128, b: v128) -> v128
i32x4.gt_u(a: v128, b: v128) -> v128
i64x2.gt_s(a: v128, b: v128) -> v128
f32x4.gt(a: v128, b: v128) -> v128
f64x2.gt(a: v128, b: v128) -> v128
i8x16.ge_s(a: v128, b: v128) -> v128
i8x16.ge_u(a: v128, b: v128) -> v128
i16x8.ge_s(a: v128, b: v128) -> v128
i16x8.ge_u(a: v128, b: v128) -> v128
i32x4.ge_s(a: v128, b: v128) -> v128
i32x4.ge_u(a: v128, b: v128) -> v128
i64x2.ge_s(a: v128, b: v128) -> v128
f32x4.ge(a: v128, b: v128) -> v128
f64x2.ge(a: v128, b: v128) -> v128
v128.load(m: memarg) -> v128
v128.load32_zero(m: memarg) -> v128
v128.load64_zero(m: memarg) -> v128
v128.load8_splat(m: memarg) -> v128
v128.load16_splat(m: memarg) -> v128
v128.load32_splat(m: memarg) -> v128
v128.load64_splat(m: memarg) -> v128
v128.load8_lane(m: memarg, x: v128, imm: ImmLaneIdx16) -> v128
v128.load16_lane(m: memarg, x: v128, imm: ImmLaneIdx8) -> v128
v128.load32_lane(m: memarg, x: v128, imm: ImmLaneIdx4) -> v128
v128.load64_lane(m: memarg, x: v128, imm: ImmLaneIdx2) -> v128
v128.load8x8_s(m: memarg)
v128.load8x8_u(m: memarg)
v128.load16x4_s(m: memarg)
v128.load16x4_u(m: memarg)
v128.load32x2_s(m: memarg)
v128.load32x2_u(m: memarg)
v128.store(m: memarg, data: v128)
v128.store8_lane(m: memarg, data: v128, imm: ImmLaneIdx16)
v128.store16_lane(m: memarg, data: v128, imm: ImmLaneIdx8)
v128.store32_lane(m: memarg, data: v128, imm: ImmLaneIdx4)
v128.store64_lane(m: memarg, data: v128, imm: ImmLaneIdx2)
f32x4.neg(a: v128) -> v128
f64x2.neg(a: v128) -> v128
f32x4.abs(a: v128) -> v128
f64x2.abs(a: v128) -> v128
f32x4.min(a: v128, b: v128) -> v128
f64x2.min(a: v128, b: v128) -> v128
f32x4.max(a: v128, b: v128) -> v128
f64x2.max(a: v128, b: v128) -> v128
f32x4.pmin(a: v128, b: v128) -> v128
f64x2.pmin(a: v128, b: v128) -> v128
f32x4.pmax(a: v128, b: v128) -> v128
f64x2.pmax(a: v128, b: v128) -> v128
f32x4.add(a: v128, b: v128) -> v128
f64x2.add(a: v128, b: v128) -> v128
f32x4.sub(a: v128, b: v128) -> v128
f64x2.sub(a: v128, b: v128) -> v128
f32x4.div(a: v128, b: v128) -> v128
f64x2.div(a: v128, b: v128) -> v128
f32x4.mul(a: v128, b: v128) -> v128
f64x2.mul(a: v128, b: v128) -> v128
f32x4.sqrt(a: v128) -> v128
f64x2.sqrt(a: v128) -> v128
f32x4.ceil(a: v128) -> v128
f64x2.ceil(a: v128) -> v128
f32x4.floor(a: v128) -> v128
f64x2.floor(a: v128) -> v128
f32x4.trunc(a: v128) -> v128
f64x2.trunc(a: v128) -> v128
f32x4.nearest(a: v128) -> v128
f64x2.nearest(a: v128) -> v128
f32x4.convert_i32x4_s(a: v128) -> v128
f32x4.convert_i32x4_u(a: v128) -> v128
f64x2.convert_low_i32x4_s(a: v128) -> v128
f64x2.convert_low_i32x4_u(a: v128) -> v128
i32x4.trunc_sat_f32x4_s(a: v128) -> v128
i32x4.trunc_sat_f32x4_u(a: v128) -> v128
i32x4.trunc_sat_f64x2_s_zero(a: v128) -> v128
i32x4.trunc_sat_f64x2_u_zero(a: v128) -> v128
f32x4.demote_f64x2_zero(a: v128) -> v128
f64x2.promote_low_f32x4(a: v128) -> v128
i8x16.narrow_i16x8_s(a: v128, b: v128) -> v128
i8x16.narrow_i16x8_u(a: v128, b: v128) -> v128
i16x8.narrow_i32x4_s(a: v128, b: v128) -> v128
i16x8.narrow_i32x4_u(a: v128, b: v128) -> v128
i16x8.extend_low_i8x16_s(a: v128) -> v128
i16x8.extend_high_i8x16_s(a: v128) -> v128
i16x8.extend_low_i8x16_u(a: v128) -> v128
i16x8.extend_high_i8x16_u(a: v128) -> v128
i32x4.extend_low_i16x8_s(a: v128) -> v128
i32x4.extend_high_i16x8_s(a: v128) -> v128
i32x4.extend_low_i16x8_u(a: v128) -> v128
i32x4.extend_high_i16x8_u(a: v128) -> v128
i64x2.extend_low_i32x4_s(a: v128) -> v128
i64x2.extend_high_i32x4_s(a: v128) -> v128
i64x2.extend_low_i32x4_u(a: v128) -> v128
i64x2.extend_high_i32x4_u(a: v128) -> v128

@codecov
Copy link

codecov bot commented Mar 21, 2025

Codecov Report

Attention: Patch coverage is 0% with 5 lines in your changes missing coverage. Please review.

Project coverage is 69.35%. Comparing base (dbcd4ba) to head (6c56e3a).
Report is 1 commits behind head on main.

Files with missing lines Patch % Lines
crates/ir/src/primitive.rs 0.00% 4 Missing ⚠️
crates/wasmi/src/engine/executor/instrs.rs 0.00% 1 Missing ⚠️
Additional details and impacted files
@@            Coverage Diff             @@
##             main    #1409      +/-   ##
==========================================
- Coverage   69.36%   69.35%   -0.01%     
==========================================
  Files         158      158              
  Lines       14698    14702       +4     
==========================================
+ Hits        10195    10197       +2     
- Misses       4503     4505       +2     

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

@Robbepop Robbepop changed the title wasmi_ir: add support for Wasm simd proposal wasmi_ir: add support for the Wasm simd proposal Mar 21, 2025
@Robbepop
Copy link
Member Author

Robbepop commented Mar 22, 2025

The PR now implements all of the Wasm simd proposal instructions in Wasmi IR.
However, due to the way this PR added the instructions the compile times have exploded even without --feature simd.
There is a way to fix this by changing the way the for_each_instruction macro works and this is required as follow-up to this PR.

Additionally, Wasmi may implement some lowerings in the future. For example lowerings from i32x4.ge_s a b to i32x4.lt_s b a as has already been implemented for the scalar types. With these lowering we'd shrink the number of SIMD related Wasmi IR instructions a bit.

@Robbepop Robbepop merged commit 82836a5 into main Mar 22, 2025
17 of 19 checks passed
@Robbepop Robbepop deleted the rf-wasmi_ir-add-simd-instructions branch March 22, 2025 13:52
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant