`wasmi_ir`: add support for the Wasm `simd` proposal #1409

Robbepop · 2025-03-21T16:57:28Z

Implements wasmi_ir part of #1364.

Adds the simd crate feature to wasmi_ir.
Adds Instruction variants for all Wasm simd instructions when simd crate feature is enabled.

Link to Wasm 3.0 Spec: https://webassembly.github.io/spec/core/appendix/index-instructions.html

🟡 Means that the Wasm instruction has no Wasmi instruction counterpart by design.

Status	Wasm `simd` Instruction
🟡	`v128.const(imm: ImmByte[16]) -> v128`
✅	`i8x16.splat(x: i32) -> v128`
✅	`i16x8.splat(x: i32) -> v128`
✅	`i32x4.splat(x: i32) -> v128`
✅	`i64x2.splat(x: i64) -> v128`
✅	`f32x4.splat(x: f32) -> v128`
✅	`f64x2.splat(x: f64) -> v128`

✅	`i8x16.extract_lane_s(a: v128, imm: ImmLaneIdx16) -> i32`
✅	`i8x16.extract_lane_u(a: v128, imm: ImmLaneIdx16) -> i32`
✅	`i16x8.extract_lane_s(a: v128, imm: ImmLaneIdx8) -> i32`
✅	`i16x8.extract_lane_u(a: v128, imm: ImmLaneIdx8) -> i32`
✅	`i32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> i32`
✅	`i64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> i64`
✅	`f32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> f32`
✅	`f64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> f64`
✅	`i8x16.replace_lane(a: v128, imm: ImmLaneIdx16, x: i32) -> v128`
✅	`i16x8.replace_lane(a: v128, imm: ImmLaneIdx8, x: i32) -> v128`
✅	`i32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: i32) -> v128`
✅	`i64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: i64) -> v128`
✅	`f32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: f32) -> v128`
✅	`f64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: f64) -> v128`

✅	`i8x16.shuffle(a: v128, b: v128, imm: ImmLaneIdx32[16]) -> v128`
✅	`i8x16.swizzle(a: v128, s: v128) -> v128`

✅	`i8x16.add(a: v128, b: v128) -> v128`
✅	`i16x8.add(a: v128, b: v128) -> v128`
✅	`i32x4.add(a: v128, b: v128) -> v128`
✅	`i64x2.add(a: v128, b: v128) -> v128`
✅	`i8x16.sub(a: v128, b: v128) -> v128`
✅	`i16x8.sub(a: v128, b: v128) -> v128`
✅	`i32x4.sub(a: v128, b: v128) -> v128`
✅	`i64x2.sub(a: v128, b: v128) -> v128`
✅	`i16x8.mul(a: v128, b: v128) -> v128`
✅	`i32x4.mul(a: v128, b: v128) -> v128`
✅	`i64x2.mul(a: v128, b: v128) -> v128`
✅	`i32x4.dot_i16x8_s(a: v128, b: v128) -> v128`
✅	`i8x16.neg(a: v128) -> v128`
✅	`i16x8.neg(a: v128) -> v128`
✅	`i32x4.neg(a: v128) -> v128`
✅	`i64x2.neg(a: v128) -> v128`

✅	`i16x8.extmul_low_i8x16_s(a: v128, b: v128) -> v128`
✅	`i16x8.extmul_high_i8x16_s(a: v128, b: v128) -> v128`
✅	`i16x8.extmul_low_i8x16_u(a: v128, b: v128) -> v128`
✅	`i16x8.extmul_high_i8x16_u(a: v128, b: v128) -> v128`
✅	`i32x4.extmul_low_i16x8_s(a: v128, b: v128) -> v128`
✅	`i32x4.extmul_high_i16x8_s(a: v128, b: v128) -> v128`
✅	`i32x4.extmul_low_i16x8_u(a: v128, b: v128) -> v128`
✅	`i32x4.extmul_high_i16x8_u(a: v128, b: v128) -> v128`
✅	`i64x2.extmul_low_i32x4_s(a: v128, b: v128) -> v128`
✅	`i64x2.extmul_high_i32x4_s(a: v128, b: v128) -> v128`
✅	`i64x2.extmul_low_i32x4_u(a: v128, b: v128) -> v128`
✅	`i64x2.extmul_high_i32x4_u(a: v128, b: v128) -> v128`
✅	`i16x8.extadd_pairwise_i8x16_s(a: v128) -> v128`
✅	`i16x8.extadd_pairwise_i8x16_u(a: v128) -> v128`
✅	`i32x4.extadd_pairwise_i16x8_s(a: v128) -> v128`
✅	`i32x4.extadd_pairwise_i16x8_u(a: v128) -> v128`

✅	`i8x16.add_sat_s(a: v128, b: v128) -> v128`
✅	`i8x16.add_sat_u(a: v128, b: v128) -> v128`
✅	`i16x8.add_sat_s(a: v128, b: v128) -> v128`
✅	`i16x8.add_sat_u(a: v128, b: v128) -> v128`
✅	`i8x16.sub_sat_s(a: v128, b: v128) -> v128`
✅	`i8x16.sub_sat_u(a: v128, b: v128) -> v128`
✅	`i16x8.sub_sat_s(a: v128, b: v128) -> v128`
✅	`i16x8.sub_sat_u(a: v128, b: v128) -> v128`

✅	`i16x8.q15mulr_sat_s(a: v128, b: v128) -> v128`

✅	`i8x16.min_s(a: v128, b: v128) -> v128`
✅	`i8x16.min_u(a: v128, b: v128) -> v128`
✅	`i16x8.min_s(a: v128, b: v128) -> v128`
✅	`i16x8.min_u(a: v128, b: v128) -> v128`
✅	`i32x4.min_s(a: v128, b: v128) -> v128`
✅	`i32x4.min_u(a: v128, b: v128) -> v128`
✅	`i8x16.max_s(a: v128, b: v128) -> v128`
✅	`i8x16.max_u(a: v128, b: v128) -> v128`
✅	`i16x8.max_s(a: v128, b: v128) -> v128`
✅	`i16x8.max_u(a: v128, b: v128) -> v128`
✅	`i32x4.max_s(a: v128, b: v128) -> v128`
✅	`i32x4.max_u(a: v128, b: v128) -> v128`

✅	`i8x16.avgr_u(a: v128, b: v128) -> v128`
✅	`i16x8.avgr_u(a: v128, b: v128) -> v128`
✅	`i8x16.abs(a: v128) -> v128`
✅	`i16x8.abs(a: v128) -> v128`
✅	`i32x4.abs(a: v128) -> v128`
✅	`i64x2.abs(a: v128) -> v128`

✅	`i8x16.shl(a: v128, y: i32) -> v128`
✅	`i16x8.shl(a: v128, y: i32) -> v128`
✅	`i32x4.shl(a: v128, y: i32) -> v128`
✅	`i64x2.shl(a: v128, y: i32) -> v128`
✅	`i8x16.shr_s(a: v128, y: i32) -> v128`
✅	`i8x16.shr_u(a: v128, y: i32) -> v128`
✅	`i16x8.shr_s(a: v128, y: i32) -> v128`
✅	`i16x8.shr_u(a: v128, y: i32) -> v128`
✅	`i32x4.shr_s(a: v128, y: i32) -> v128`
✅	`i32x4.shr_u(a: v128, y: i32) -> v128`
✅	`i64x2.shr_s(a: v128, y: i32) -> v128`
✅	`i64x2.shr_u(a: v128, y: i32) -> v128`

✅	`v128.and(a: v128, b: v128) -> v128`
✅	`v128.or(a: v128, b: v128) -> v128`
✅	`v128.xor(a: v128, b: v128) -> v128`
✅	`v128.not(a: v128) -> v128`
✅	`v128.andnot(a: v128, b: v128) -> v128`

✅	`v128.bitselect(v1: v128, v2: v128, c: v128) -> v128`
✅	`i8x16.popcnt(v: v128) -> v128`
✅	`v128.any_true(a: v128) -> i32`
✅	`i8x16.all_true(a: v128) -> i32`
✅	`i16x8.all_true(a: v128) -> i32`
✅	`i32x4.all_true(a: v128) -> i32`
✅	`i64x2.all_true(a: v128) -> i32`
✅	`i8x16.bitmask(a: v128) -> i32`
✅	`i16x8.bitmask(a: v128) -> i32`
✅	`i32x4.bitmask(a: v128) -> i32`
✅	`i64x2.bitmask(a: v128) -> i32`

✅	`i8x16.eq(a: v128, b: v128) -> v128`
✅	`i16x8.eq(a: v128, b: v128) -> v128`
✅	`i32x4.eq(a: v128, b: v128) -> v128`
✅	`i64x2.eq(a: v128, b: v128) -> v128`
✅	`f32x4.eq(a: v128, b: v128) -> v128`
✅	`f64x2.eq(a: v128, b: v128) -> v128`
✅	`i8x16.ne(a: v128, b: v128) -> v128`
✅	`i16x8.ne(a: v128, b: v128) -> v128`
✅	`i32x4.ne(a: v128, b: v128) -> v128`
✅	`i64x2.ne(a: v128, b: v128) -> v128`
✅	`f32x4.ne(a: v128, b: v128) -> v128`
✅	`f64x2.ne(a: v128, b: v128) -> v128`
✅	`i8x16.lt_s(a: v128, b: v128) -> v128`
✅	`i8x16.lt_u(a: v128, b: v128) -> v128`
✅	`i16x8.lt_s(a: v128, b: v128) -> v128`
✅	`i16x8.lt_u(a: v128, b: v128) -> v128`
✅	`i32x4.lt_s(a: v128, b: v128) -> v128`
✅	`i32x4.lt_u(a: v128, b: v128) -> v128`
✅	`i64x2.lt_s(a: v128, b: v128) -> v128`
✅	`f32x4.lt(a: v128, b: v128) -> v128`
✅	`f64x2.lt(a: v128, b: v128) -> v128`
✅	`i8x16.le_s(a: v128, b: v128) -> v128`
✅	`i8x16.le_u(a: v128, b: v128) -> v128`
✅	`i16x8.le_s(a: v128, b: v128) -> v128`
✅	`i16x8.le_u(a: v128, b: v128) -> v128`
✅	`i32x4.le_s(a: v128, b: v128) -> v128`
✅	`i32x4.le_u(a: v128, b: v128) -> v128`
✅	`i64x2.le_s(a: v128, b: v128) -> v128`
✅	`f32x4.le(a: v128, b: v128) -> v128`
✅	`f64x2.le(a: v128, b: v128) -> v128`
✅	`i8x16.gt_s(a: v128, b: v128) -> v128`
✅	`i8x16.gt_u(a: v128, b: v128) -> v128`
✅	`i16x8.gt_s(a: v128, b: v128) -> v128`
✅	`i16x8.gt_u(a: v128, b: v128) -> v128`
✅	`i32x4.gt_s(a: v128, b: v128) -> v128`
✅	`i32x4.gt_u(a: v128, b: v128) -> v128`
✅	`i64x2.gt_s(a: v128, b: v128) -> v128`
✅	`f32x4.gt(a: v128, b: v128) -> v128`
✅	`f64x2.gt(a: v128, b: v128) -> v128`
✅	`i8x16.ge_s(a: v128, b: v128) -> v128`
✅	`i8x16.ge_u(a: v128, b: v128) -> v128`
✅	`i16x8.ge_s(a: v128, b: v128) -> v128`
✅	`i16x8.ge_u(a: v128, b: v128) -> v128`
✅	`i32x4.ge_s(a: v128, b: v128) -> v128`
✅	`i32x4.ge_u(a: v128, b: v128) -> v128`
✅	`i64x2.ge_s(a: v128, b: v128) -> v128`
✅	`f32x4.ge(a: v128, b: v128) -> v128`
✅	`f64x2.ge(a: v128, b: v128) -> v128`

✅	`v128.load(m: memarg) -> v128`
✅	`v128.load32_zero(m: memarg) -> v128`
✅	`v128.load64_zero(m: memarg) -> v128`
✅	`v128.load8_splat(m: memarg) -> v128`
✅	`v128.load16_splat(m: memarg) -> v128`
✅	`v128.load32_splat(m: memarg) -> v128`
✅	`v128.load64_splat(m: memarg) -> v128`
✅	`v128.load8_lane(m: memarg, x: v128, imm: ImmLaneIdx16) -> v128`
✅	`v128.load16_lane(m: memarg, x: v128, imm: ImmLaneIdx8) -> v128`
✅	`v128.load32_lane(m: memarg, x: v128, imm: ImmLaneIdx4) -> v128`
✅	`v128.load64_lane(m: memarg, x: v128, imm: ImmLaneIdx2) -> v128`
✅	`v128.load8x8_s(m: memarg)`
✅	`v128.load8x8_u(m: memarg)`
✅	`v128.load16x4_s(m: memarg)`
✅	`v128.load16x4_u(m: memarg)`
✅	`v128.load32x2_s(m: memarg)`
✅	`v128.load32x2_u(m: memarg)`

✅	`v128.store(m: memarg, data: v128)`
✅	`v128.store8_lane(m: memarg, data: v128, imm: ImmLaneIdx16)`
✅	`v128.store16_lane(m: memarg, data: v128, imm: ImmLaneIdx8)`
✅	`v128.store32_lane(m: memarg, data: v128, imm: ImmLaneIdx4)`
✅	`v128.store64_lane(m: memarg, data: v128, imm: ImmLaneIdx2)`

✅	`f32x4.neg(a: v128) -> v128`
✅	`f64x2.neg(a: v128) -> v128`
✅	`f32x4.abs(a: v128) -> v128`
✅	`f64x2.abs(a: v128) -> v128`
✅	`f32x4.min(a: v128, b: v128) -> v128`
✅	`f64x2.min(a: v128, b: v128) -> v128`
✅	`f32x4.max(a: v128, b: v128) -> v128`
✅	`f64x2.max(a: v128, b: v128) -> v128`
✅	`f32x4.pmin(a: v128, b: v128) -> v128`
✅	`f64x2.pmin(a: v128, b: v128) -> v128`
✅	`f32x4.pmax(a: v128, b: v128) -> v128`
✅	`f64x2.pmax(a: v128, b: v128) -> v128`
✅	`f32x4.add(a: v128, b: v128) -> v128`
✅	`f64x2.add(a: v128, b: v128) -> v128`
✅	`f32x4.sub(a: v128, b: v128) -> v128`
✅	`f64x2.sub(a: v128, b: v128) -> v128`
✅	`f32x4.div(a: v128, b: v128) -> v128`
✅	`f64x2.div(a: v128, b: v128) -> v128`
✅	`f32x4.mul(a: v128, b: v128) -> v128`
✅	`f64x2.mul(a: v128, b: v128) -> v128`
✅	`f32x4.sqrt(a: v128) -> v128`
✅	`f64x2.sqrt(a: v128) -> v128`
✅	`f32x4.ceil(a: v128) -> v128`
✅	`f64x2.ceil(a: v128) -> v128`
✅	`f32x4.floor(a: v128) -> v128`
✅	`f64x2.floor(a: v128) -> v128`
✅	`f32x4.trunc(a: v128) -> v128`
✅	`f64x2.trunc(a: v128) -> v128`
✅	`f32x4.nearest(a: v128) -> v128`
✅	`f64x2.nearest(a: v128) -> v128`

✅	`f32x4.convert_i32x4_s(a: v128) -> v128`
✅	`f32x4.convert_i32x4_u(a: v128) -> v128`
✅	`f64x2.convert_low_i32x4_s(a: v128) -> v128`
✅	`f64x2.convert_low_i32x4_u(a: v128) -> v128`
✅	`i32x4.trunc_sat_f32x4_s(a: v128) -> v128`
✅	`i32x4.trunc_sat_f32x4_u(a: v128) -> v128`
✅	`i32x4.trunc_sat_f64x2_s_zero(a: v128) -> v128`
✅	`i32x4.trunc_sat_f64x2_u_zero(a: v128) -> v128`
✅	`f32x4.demote_f64x2_zero(a: v128) -> v128`
✅	`f64x2.promote_low_f32x4(a: v128) -> v128`

✅	`i8x16.narrow_i16x8_s(a: v128, b: v128) -> v128`
✅	`i8x16.narrow_i16x8_u(a: v128, b: v128) -> v128`
✅	`i16x8.narrow_i32x4_s(a: v128, b: v128) -> v128`
✅	`i16x8.narrow_i32x4_u(a: v128, b: v128) -> v128`

✅	`i16x8.extend_low_i8x16_s(a: v128) -> v128`
✅	`i16x8.extend_high_i8x16_s(a: v128) -> v128`
✅	`i16x8.extend_low_i8x16_u(a: v128) -> v128`
✅	`i16x8.extend_high_i8x16_u(a: v128) -> v128`
✅	`i32x4.extend_low_i16x8_s(a: v128) -> v128`
✅	`i32x4.extend_high_i16x8_s(a: v128) -> v128`
✅	`i32x4.extend_low_i16x8_u(a: v128) -> v128`
✅	`i32x4.extend_high_i16x8_u(a: v128) -> v128`
✅	`i64x2.extend_low_i32x4_s(a: v128) -> v128`
✅	`i64x2.extend_high_i32x4_s(a: v128) -> v128`
✅	`i64x2.extend_low_i32x4_u(a: v128) -> v128`
✅	`i64x2.extend_high_i32x4_u(a: v128) -> v128`

codecov · 2025-03-21T17:02:57Z

Codecov Report

Attention: Patch coverage is 0% with 5 lines in your changes missing coverage. Please review.

Project coverage is 69.35%. Comparing base (dbcd4ba) to head (6c56e3a).
Report is 1 commits behind head on main.

Files with missing lines	Patch %	Lines
crates/ir/src/primitive.rs	0.00%	4 Missing ⚠️
crates/wasmi/src/engine/executor/instrs.rs	0.00%	1 Missing ⚠️

Additional details and impacted files

@@            Coverage Diff             @@
##             main    #1409      +/-   ##
==========================================
- Coverage   69.36%   69.35%   -0.01%     
==========================================
  Files         158      158              
  Lines       14698    14702       +4     
==========================================
+ Hits        10195    10197       +2     
- Misses       4503     4505       +2

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

Robbepop · 2025-03-22T13:43:43Z

The PR now implements all of the Wasm simd proposal instructions in Wasmi IR.
However, due to the way this PR added the instructions the compile times have exploded even without --feature simd.
There is a way to fix this by changing the way the for_each_instruction macro works and this is required as follow-up to this PR.

Additionally, Wasmi may implement some lowerings in the future. For example lowerings from i32x4.ge_s a b to i32x4.lt_s b a as has already been implemented for the scalar types. With these lowering we'd shrink the number of SIMD related Wasmi IR instructions a bit.

add simd crate feature to wasmi_ir

cab38e1

Robbepop added 4 commits March 21, 2025 18:23

make wasmi_ir::Instruction non_exhaustive

418de43

adjust Wasmi executor for non-exhaustive Instruction

f48cac8

make it possible to feature guard instruction definitions

603226a

add simd splat instruction variants

5321b7c

Robbepop mentioned this pull request Mar 20, 2025

Implement the Wasm simd proposal #1364

Closed

Robbepop changed the title ~~wasmi_ir: add support for Wasm simd proposal~~ wasmi_ir: add support for the Wasm simd proposal Mar 21, 2025

Robbepop added 22 commits March 21, 2025 19:41

add extract_lane SIMD ops

acffcb5

make wasmi_ir compile without simd feature

3207ff5

add result and docs for extract_lanes ops

bc37452

import V128 for docs

c2d27b0

add replace_lane SIMD ops

d1090ea

add i8x16.{swizzle,shuffle} ops

9d914f5

add lanewise binary int arithmetic ops

23d783a

add i32x4.dot_i16x8_s op

20b9792

add missing comma

7e160d8

add integer lanewise neg ops

a8b36c1

add extmul SIMD ops

223c1a8

add extadd pairwise SIMD ops

c9586f6

add {add,sub}_sat SIMD ops

c7c5688

add Wasm i16x8.q15mulr_sat_s op

a52f299

add integer min, max SIMD ops

57951f9

add avgr_u SIMD ops

cc496e0

add integer abs SIMD ops

808b910

add shift SIMD ops

6626da5

add bitwise SIMD ops

c633137

fix spelling of new ops

894ac3d

add missing comma

8b2cf6c

add v128.bitselect SIMD op

81c49ea

Robbepop added 16 commits March 22, 2025 10:09

add all_true, any_true, bitmask and popcnt SIMD ops

657e316

add SIMD comparison ops

9d76654

add f{32,64} neg and abs SIMD ops

99b5951

add f{32,64} binary math SIMD ops

7d650fe

add f{32,64} unary math SIMD ops

a28bd2b

add conversion SIMD ops

334273e

Merge branch 'main' into rf-wasmi_ir-add-simd-instructions

42b14f2

add narrowing conversion SIMD ops

149c634

add extend_{low,high} SIMD ops

b683ae3

add v128.store SIMD op

c4751f1

fix inconsistencies in V128Store instructions

d8eeda6

add missing value: Reg to V128StoreOffset16 op

f4938b9

add Offset8 utility type

4ee0382

add Wasm v128.storeN_lane SIMD ops

1511798

add most v128 load SIMD ops

a4e5a78

add v128.loadN_lane SIMD ops

6c56e3a

Robbepop merged commit 82836a5 into main Mar 22, 2025
17 of 19 checks passed

Robbepop deleted the rf-wasmi_ir-add-simd-instructions branch March 22, 2025 13:52

Robbepop mentioned this pull request Mar 22, 2025

Fix compile time regression for wasmi_ir crate #1412

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

`wasmi_ir`: add support for the Wasm `simd` proposal #1409

`wasmi_ir`: add support for the Wasm `simd` proposal #1409

Uh oh!

Robbepop commented Mar 21, 2025 •

edited

Loading

Uh oh!

codecov bot commented Mar 21, 2025 •

edited

Loading

Uh oh!

Robbepop commented Mar 22, 2025 •

edited

Loading

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

Uh oh!

wasmi_ir: add support for the Wasm simd proposal #1409

wasmi_ir: add support for the Wasm simd proposal #1409

Uh oh!

Conversation

Robbepop commented Mar 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

codecov bot commented Mar 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

Robbepop commented Mar 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

`wasmi_ir`: add support for the Wasm `simd` proposal #1409

`wasmi_ir`: add support for the Wasm `simd` proposal #1409

Robbepop commented Mar 21, 2025 •

edited

Loading

codecov bot commented Mar 21, 2025 •

edited

Loading

Robbepop commented Mar 22, 2025 •

edited

Loading