diff --git a/JSTests/wasm/ipint-tests/ipint-test-leb-decode.js b/JSTests/wasm/ipint-tests/ipint-test-leb-decode.js
deleted file mode 100644
index 20a9cbfb7655..000000000000
--- a/JSTests/wasm/ipint-tests/ipint-test-leb-decode.js
+++ /dev/null
@@ -1,296 +0,0 @@
-// Comprehensive LEB128 decode verification for IPInt
-// Tests i32.const (signed LEB128 i32) and i64.const (signed LEB128 i64)
-
-// Helper: build a wasm module that returns a constant
-function makeI32ConstModule(bytes) {
-    // (func (result i32) (i32.const <bytes>) )
-    let code = [0x00, 0x41, ...bytes, 0x0b]; // no locals, i32.const, end
-    let funcBody = [code.length, ...code];
-    let wasmBytes = new Uint8Array([
-        0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
-        0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7f,
-        0x03, 0x02, 0x01, 0x00,
-        0x07, 0x05, 0x01, 0x01, 0x66, 0x00, 0x00,
-        0x0a, funcBody.length + 1, 0x01, ...funcBody
-    ]);
-    return new WebAssembly.Instance(new WebAssembly.Module(wasmBytes.buffer));
-}
-
-function makeI64ConstModule(bytes) {
-    // (func (result i64) (i64.const <bytes>) )
-    let code = [0x00, 0x42, ...bytes, 0x0b]; // no locals, i64.const, end
-    let funcBody = [code.length, ...code];
-    let wasmBytes = new Uint8Array([
-        0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
-        0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7e,
-        0x03, 0x02, 0x01, 0x00,
-        0x07, 0x05, 0x01, 0x01, 0x66, 0x00, 0x00,
-        0x0a, funcBody.length + 1, 0x01, ...funcBody
-    ]);
-    return new WebAssembly.Instance(new WebAssembly.Module(wasmBytes.buffer));
-}
-
-// Encode signed LEB128 for i32
-function encodeSLEB128_i32(value) {
-    value = value | 0; // ensure i32
-    let bytes = [];
-    while (true) {
-        let b = value & 0x7f;
-        value >>= 7;
-        if ((value === 0 && (b & 0x40) === 0) || (value === -1 && (b & 0x40) !== 0)) {
-            bytes.push(b);
-            break;
-        }
-        bytes.push(b | 0x80);
-    }
-    return bytes;
-}
-
-// Encode signed LEB128 for i64 (using BigInt)
-function encodeSLEB128_i64(value) {
-    value = BigInt(value);
-    let bytes = [];
-    while (true) {
-        let b = Number(value & 0x7fn);
-        value >>= 7n;
-        if ((value === 0n && (b & 0x40) === 0) || (value === -1n && (b & 0x40) !== 0)) {
-            bytes.push(b);
-            break;
-        }
-        bytes.push(b | 0x80);
-    }
-    return bytes;
-}
-
-let failures = 0;
-
-function testI32(value, label) {
-    let bytes = encodeSLEB128_i32(value);
-    let m = makeI32ConstModule(bytes);
-    let result = m.exports.f();
-    let expected = value | 0;
-    if (result !== expected) {
-        print("FAIL i32 " + label + ": value=" + value + " bytes=[" + bytes.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result);
-        failures++;
-    }
-}
-
-function testI64(value, label) {
-    let bytes = encodeSLEB128_i64(value);
-    let m = makeI64ConstModule(bytes);
-    let result = m.exports.f();
-    let expected = BigInt(value);
-    if (result !== expected) {
-        print("FAIL i64 " + label + ": value=" + value + " bytes=[" + bytes.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result);
-        failures++;
-    }
-}
-
-// === i32.const tests ===
-
-// Single byte (1 byte LEB128): values -64 to 63
-testI32(0, "zero");
-testI32(1, "one");
-testI32(-1, "neg_one");
-testI32(63, "max_single_pos");      // 0x3F — last positive single-byte
-testI32(-64, "min_single_neg");     // 0x40 — last negative single-byte
-testI32(42, "42");
-
-// Two bytes: values -8192 to 8191
-testI32(64, "first_two_byte_pos");  // 0xC0 0x00
-testI32(-65, "first_two_byte_neg"); // 0xBF 0x7F
-testI32(127, "127");
-testI32(-128, "-128");
-testI32(128, "128");
-testI32(8191, "max_two_byte_pos");
-testI32(-8192, "min_two_byte_neg");
-
-// Three bytes
-testI32(8192, "first_three_byte");
-testI32(-8193, "first_three_byte_neg");
-testI32(65535, "65535");
-testI32(-65535, "-65535");
-
-// Four bytes
-testI32(1048576, "1M");
-testI32(-1048576, "-1M");
-testI32(16777215, "16M-1");
-
-// Five bytes (max for i32)
-testI32(2147483647, "INT32_MAX");
-testI32(-2147483648, "INT32_MIN");
-testI32(1717661556, "1717661556");   // The value that was failing before
-testI32(-1923807898, "-1923807898");
-testI32(219737259, "219737259");
-testI32(2371159398 | 0, "2371159398_as_i32"); // wraps to negative
-
-// === i64.const tests ===
-
-// Single byte
-testI64(0n, "zero");
-testI64(1n, "one");
-testI64(-1n, "neg_one");
-testI64(63n, "max_single_pos");
-testI64(-64n, "min_single_neg");
-
-// Multi-byte
-testI64(64n, "first_two_byte");
-testI64(-65n, "first_two_byte_neg");
-testI64(128n, "128");
-testI64(-128n, "-128");
-
-// Large positive
-testI64(2147483647n, "INT32_MAX");
-testI64(2147483648n, "INT32_MAX+1");
-testI64(4294967295n, "UINT32_MAX");
-testI64(4294967296n, "UINT32_MAX+1");
-
-// Large negative
-testI64(-2147483648n, "INT32_MIN");
-testI64(-2147483649n, "INT32_MIN-1");
-
-// 64-bit range
-testI64(9223372036854775807n, "INT64_MAX");
-testI64(-9223372036854775808n, "INT64_MIN");
-testI64(18231657398634828518n - (1n << 64n), "large_neg_from_test"); // The value from the failing test
-testI64(5825195283807165538n, "large_pos_from_test");
-
-// Values near sign extension boundaries (shift = 7, 14, 21, 28, 35, 42, 49, 56, 63)
-testI64(0x40n, "sign_bit_shift7");         // bit 6 set at shift 0
-testI64(0x2000n, "sign_bit_shift14");      // bit 13 set
-testI64(0x100000n, "sign_bit_shift21");
-testI64(0x8000000n, "sign_bit_shift28");
-testI64(0x400000000n, "sign_bit_shift35");
-testI64(-0x40n, "neg_sign_bit_shift7");
-testI64(-0x2000n, "neg_sign_bit_shift14");
-testI64(-0x100000n, "neg_sign_bit_shift21");
-testI64(-0x8000000n, "neg_sign_bit_shift28");
-testI64(-0x400000000n, "neg_sign_bit_shift35");
-
-// === Non-canonical (padded) LEB128 tests ===
-// Non-canonical LEBs use more bytes than necessary by adding redundant
-// continuation bytes with sign-extension padding. These are valid per the
-// WebAssembly spec and must decode to the same value.
-
-// Pad a canonical signed LEB128 encoding to targetLength bytes.
-// For non-negative values (bit 6 of last byte clear), pad with 0x00.
-// For negative values (bit 6 of last byte set), pad with 0x7f.
-function padSLEB128(canonicalBytes, targetLength) {
-    if (canonicalBytes.length >= targetLength)
-        return canonicalBytes;
-    let padded = [...canonicalBytes];
-    while (padded.length < targetLength) {
-        let lastByte = padded[padded.length - 1];
-        let signExtByte = (lastByte & 0x40) ? 0x7f : 0x00;
-        padded[padded.length - 1] = lastByte | 0x80; // set continuation bit
-        padded.push(signExtByte); // sign-extension terminator
-    }
-    return padded;
-}
-
-function testI32Padded(value, padLen, label) {
-    let canonical = encodeSLEB128_i32(value);
-    let padded = padSLEB128(canonical, padLen);
-    let m = makeI32ConstModule(padded);
-    let result = m.exports.f();
-    let expected = value | 0;
-    if (result !== expected) {
-        print("FAIL i32 padded " + label + ": value=" + value + " canonical=[" + canonical.map(b=>"0x"+b.toString(16)).join(",") + "] padded=[" + padded.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result);
-        failures++;
-    }
-}
-
-function testI64Padded(value, padLen, label) {
-    let canonical = encodeSLEB128_i64(value);
-    let padded = padSLEB128(canonical, padLen);
-    let m = makeI64ConstModule(padded);
-    let result = m.exports.f();
-    let expected = BigInt(value);
-    if (result !== expected) {
-        print("FAIL i64 padded " + label + ": value=" + value + " canonical=[" + canonical.map(b=>"0x"+b.toString(16)).join(",") + "] padded=[" + padded.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result);
-        failures++;
-    }
-}
-
-// i32 non-canonical: pad up to 5 bytes (max for i32 signed LEB128)
-
-// Zero with all possible padding lengths
-testI32Padded(0, 2, "zero_2byte");    // [0x80, 0x00] instead of [0x00]
-testI32Padded(0, 3, "zero_3byte");    // [0x80, 0x80, 0x00]
-testI32Padded(0, 4, "zero_4byte");    // [0x80, 0x80, 0x80, 0x00]
-testI32Padded(0, 5, "zero_5byte");    // [0x80, 0x80, 0x80, 0x80, 0x00]
-
-// -1 with all possible padding lengths
-testI32Padded(-1, 2, "neg1_2byte");   // [0xff, 0x7f] instead of [0x7f]
-testI32Padded(-1, 3, "neg1_3byte");   // [0xff, 0xff, 0x7f]
-testI32Padded(-1, 4, "neg1_4byte");   // [0xff, 0xff, 0xff, 0x7f]
-testI32Padded(-1, 5, "neg1_5byte");   // [0xff, 0xff, 0xff, 0xff, 0x7f]
-
-// Small positive values padded to max
-testI32Padded(1, 5, "one_5byte");
-testI32Padded(42, 5, "42_5byte");
-testI32Padded(63, 5, "63_5byte");      // max single-byte positive, padded
-
-// Small negative values padded to max
-testI32Padded(-2, 5, "neg2_5byte");
-testI32Padded(-64, 5, "neg64_5byte");  // max single-byte negative, padded
-
-// Two-byte canonical values padded to various lengths
-testI32Padded(64, 3, "64_3byte");
-testI32Padded(64, 5, "64_5byte");
-testI32Padded(-65, 3, "neg65_3byte");
-testI32Padded(-65, 5, "neg65_5byte");
-testI32Padded(127, 5, "127_5byte");
-testI32Padded(-128, 5, "neg128_5byte");
-testI32Padded(8191, 5, "8191_5byte");
-testI32Padded(-8192, 5, "neg8192_5byte");
-
-// Three-byte canonical values padded
-testI32Padded(8192, 4, "8192_4byte");
-testI32Padded(8192, 5, "8192_5byte");
-testI32Padded(-8193, 5, "neg8193_5byte");
-testI32Padded(65535, 5, "65535_5byte");
-
-// Four-byte canonical values padded to 5
-testI32Padded(1048576, 5, "1M_5byte");
-testI32Padded(-1048576, 5, "neg1M_5byte");
-
-// i64 non-canonical: pad up to 10 bytes (max for i64 signed LEB128)
-
-// Zero with various padding lengths
-testI64Padded(0n, 2, "zero_2byte");
-testI64Padded(0n, 5, "zero_5byte");
-testI64Padded(0n, 10, "zero_10byte");
-
-// -1 with various padding lengths
-testI64Padded(-1n, 2, "neg1_2byte");
-testI64Padded(-1n, 5, "neg1_5byte");
-testI64Padded(-1n, 10, "neg1_10byte");
-
-// Small values padded to max
-testI64Padded(1n, 10, "one_10byte");
-testI64Padded(-2n, 10, "neg2_10byte");
-testI64Padded(63n, 10, "63_10byte");
-testI64Padded(-64n, 10, "neg64_10byte");
-
-// Values that cross byte boundaries, padded
-testI64Padded(64n, 10, "64_10byte");
-testI64Padded(-65n, 10, "neg65_10byte");
-testI64Padded(8192n, 10, "8192_10byte");
-testI64Padded(-8193n, 10, "neg8193_10byte");
-
-// 32-bit range values padded in 64-bit encoding
-testI64Padded(2147483647n, 10, "INT32_MAX_10byte");
-testI64Padded(-2147483648n, 10, "INT32_MIN_10byte");
-testI64Padded(4294967295n, 10, "UINT32_MAX_10byte");
-
-// Large 64-bit values padded (these are already 9-10 byte canonical, but test padding where possible)
-testI64Padded(0x400000000n, 10, "shift35_10byte");
-testI64Padded(-0x400000000n, 10, "neg_shift35_10byte");
-testI64Padded(0x20000000000n, 10, "shift42_10byte");
-testI64Padded(-0x20000000000n, 10, "neg_shift42_10byte");
-
-if (failures !== 0) {
-    print(failures + " tests FAILED");
-    throw new Error(failures + " LEB128 decode failures");
-}
diff --git a/JSTests/wasm/ipint-tests/ipint-test-simd-memory.js b/JSTests/wasm/ipint-tests/ipint-test-simd-memory.js
deleted file mode 100644
index e16f161e5fb8..000000000000
--- a/JSTests/wasm/ipint-tests/ipint-test-simd-memory.js
+++ /dev/null
@@ -1,141 +0,0 @@
-import { instantiate } from "../wabt-wrapper.js"
-import * as assert from "../assert.js"
-
-// Test SIMD memory operations through the fast/slow path memarg parsing.
-
-let wat = `
-(module
-    (memory 1)
-
-    ;; Initialize memory with known pattern
-    (data (i32.const 0) "\\01\\02\\03\\04\\05\\06\\07\\08\\09\\0a\\0b\\0c\\0d\\0e\\0f\\10\\11\\12\\13\\14\\15\\16\\17\\18\\19\\1a\\1b\\1c\\1d\\1e\\1f\\20")
-
-    ;; v128.load + v128.store round-trip
-    (func (export "test_load_store") (result i32)
-        ;; Load v128 from offset 0, store to offset 16
-        (v128.store (i32.const 16) (v128.load (i32.const 0)))
-        ;; Verify by extracting lane 0 from reloaded value
-        (i32x4.extract_lane 0 (v128.load (i32.const 16)))
-    )
-
-    ;; v128.load8_splat
-    (func (export "test_load8_splat") (result i32)
-        ;; Load byte at offset 4 (value 0x05) and splat
-        (i8x16.extract_lane_u 7 (v128.load8_splat (i32.const 4)))
-    )
-
-    ;; v128.load16_splat
-    (func (export "test_load16_splat") (result i32)
-        ;; Load halfword at offset 0 (value 0x0201) and splat
-        (i16x8.extract_lane_u 3 (v128.load16_splat (i32.const 0)))
-    )
-
-    ;; v128.load32_splat
-    (func (export "test_load32_splat") (result i32)
-        ;; Load word at offset 0 (value 0x04030201) and splat
-        (i32x4.extract_lane 2 (v128.load32_splat (i32.const 0)))
-    )
-
-    ;; v128.load64_splat
-    (func (export "test_load64_splat") (result i64)
-        ;; Load doubleword at offset 0 and splat
-        (i64x2.extract_lane 1 (v128.load64_splat (i32.const 0)))
-    )
-
-    ;; v128.load8x8_s (sign extend 8->16)
-    (func (export "test_load8x8s") (result i32)
-        ;; Load 8 bytes from offset 0, sign-extend to i16x8
-        (i16x8.extract_lane_u 0 (v128.load8x8_s (i32.const 0)))
-    )
-
-    ;; v128.load32_zero
-    (func (export "test_load32_zero") (result i32)
-        ;; Load 32-bit from offset 0, zero upper
-        (i32x4.extract_lane 1 (v128.load32_zero (i32.const 0)))
-    )
-
-    ;; v128.load8_lane
-    (func (export "test_load8_lane") (result i32)
-        (v128.load8_lane 3 (i32.const 5) (v128.const i32x4 0 0 0 0))  ;; Load byte at addr 5 (value 0x06), replace lane 3
-        (i8x16.extract_lane_u 3)
-    )
-
-    ;; v128.store8_lane
-    (func (export "test_store8_lane") (result i32)
-        ;; Store lane 2 of a known vector to memory address 31
-        (v128.store8_lane 2 (i32.const 31) (v128.const i8x16 0x41 0x42 0x43 0x44 0 0 0 0 0 0 0 0 0 0 0 0))
-        ;; Read back
-        (i32.load8_u (i32.const 31))
-    )
-
-    ;; v128.load16_lane
-    (func (export "test_load16_lane") (result i32)
-        (v128.load16_lane 1 (i32.const 2) (v128.const i32x4 0 0 0 0))  ;; Load halfword at addr 2 (value 0x0403), replace lane 1
-        (i16x8.extract_lane_u 1)
-    )
-
-    ;; v128.store16_lane
-    (func (export "test_store16_lane") (result i32)
-        (v128.store16_lane 1 (i32.const 30) (v128.const i16x8 0x1234 0x5678 0 0 0 0 0 0))
-        (i32.load16_u (i32.const 30))
-    )
-
-    ;; v128.load32_lane
-    (func (export "test_load32_lane") (result i32)
-        (v128.load32_lane 2 (i32.const 0) (v128.const i32x4 0 0 0 0))  ;; Load word at addr 0 (0x04030201), replace lane 2
-        (i32x4.extract_lane 2)
-    )
-
-    ;; v128.store32_lane
-    (func (export "test_store32_lane") (result i32)
-        (v128.store32_lane 0 (i32.const 28) (v128.const i32x4 0xDEADBEEF 0 0 0))
-        (i32.load (i32.const 28))
-    )
-)
-`
-
-async function test() {
-    const instance = await instantiate(wat, {});
-    const e = instance.exports
-
-    // v128.load + v128.store: bytes 0-3 are 0x04030201 in little-endian
-    assert.eq(e.test_load_store(), 0x04030201)
-
-    // v128.load8_splat: byte at offset 4 = 0x05
-    assert.eq(e.test_load8_splat(), 5)
-
-    // v128.load16_splat: halfword at offset 0 = 0x0201
-    assert.eq(e.test_load16_splat(), 0x0201)
-
-    // v128.load32_splat: word at offset 0 = 0x04030201
-    assert.eq(e.test_load32_splat(), 0x04030201)
-
-    // v128.load64_splat: dword at offset 0 = 0x0807060504030201
-    assert.eq(e.test_load64_splat(), 0x0807060504030201n)
-
-    // v128.load8x8_s: byte 0x01 sign-extended to i16 = 1
-    assert.eq(e.test_load8x8s(), 1)
-
-    // v128.load32_zero: upper lane should be 0
-    assert.eq(e.test_load32_zero(), 0)
-
-    // v128.load8_lane: byte at addr 5 = 0x06
-    assert.eq(e.test_load8_lane(), 6)
-
-    // v128.store8_lane: lane 2 = 0x43
-    assert.eq(e.test_store8_lane(), 0x43)
-
-    // v128.load16_lane: halfword at addr 2 = 0x0403
-    assert.eq(e.test_load16_lane(), 0x0403)
-
-    // v128.store16_lane: lane 1 = 0x5678
-    assert.eq(e.test_store16_lane(), 0x5678)
-
-    // v128.load32_lane: word at addr 0 = 0x04030201
-    assert.eq(e.test_load32_lane(), 0x04030201)
-
-    // v128.store32_lane: lane 0 = 0xDEADBEEF
-    assert.eq(e.test_store32_lane(), 0xDEADBEEF | 0)
-}
-
-await assert.asyncTest(test())
diff --git a/JSTests/wasm/ipint-tests/ipint-test-simd-multi-byte-leb.js b/JSTests/wasm/ipint-tests/ipint-test-simd-multi-byte-leb.js
deleted file mode 100644
index f491bcea9a81..000000000000
--- a/JSTests/wasm/ipint-tests/ipint-test-simd-multi-byte-leb.js
+++ /dev/null
@@ -1,160 +0,0 @@
-import * as assert from "../assert.js"
-
-// Test SIMD instructions with padded (non-minimal) LEB128 opcode encoding.
-// The simd_prefix handler decodes the opcode LEB128 into t4, so t4 correctly
-// points past however many bytes the opcode takes. Before this fix, the code
-// used a hardcoded offset from PC (ImmLaneIdxOffset = 2), which assumed
-// a 1-byte opcode. With padded LEB128, the opcode is 2+ bytes, making the
-// hardcoded offset wrong. This test verifies the t4-based approach works.
-
-function buildModule(codeBody) {
-    // Minimal wasm module: 1 type (() -> i32), 1 func, 1 export "f"
-    const header = [0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00];
-    const typeSection = [0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7f]; // () -> i32
-    const funcSection = [0x03, 0x02, 0x01, 0x00];
-    const exportSection = [0x07, 0x05, 0x01, 0x01, 0x66, 0x00, 0x00]; // "f"
-
-    const bodyWithLocals = [0x00, ...codeBody, 0x0b]; // 0 locals, body, end
-    const bodyLen = bodyWithLocals.length;
-    const codeSection = [0x0a, ...uleb128(bodyLen + 1 + uleb128(bodyLen).length), 0x01, ...uleb128(bodyLen), ...bodyWithLocals];
-
-    return new Uint8Array([...header, ...typeSection, ...funcSection, ...exportSection, ...codeSection]);
-}
-
-function uleb128(value) {
-    const bytes = [];
-    do {
-        let byte = value & 0x7f;
-        value >>>= 7;
-        if (value !== 0) byte |= 0x80;
-        bytes.push(byte);
-    } while (value !== 0);
-    return bytes;
-}
-
-// Encode a SIMD opcode with padded LEB128 (add continuation byte).
-// Opcode 0x16 -> [0xfd, 0x96, 0x00] instead of [0xfd, 0x16]
-// Opcode 0xAE (already 2-byte: 0xAE,0x01) -> [0xfd, 0xAE, 0x81, 0x00] (3-byte padded)
-function paddedSIMDOp(opcode) {
-    // Normal LEB128 encoding, then pad with one extra continuation byte
-    const lebBytes = uleb128(opcode);
-    // Set continuation bit on the last byte, then add 0x00
-    lebBytes[lebBytes.length - 1] |= 0x80;
-    lebBytes.push(0x00);
-    return [0xfd, ...lebBytes];
-}
-
-// v128.const i32x4 with specified values
-function v128ConstI32x4(a, b, c, d) {
-    const buf = new ArrayBuffer(16);
-    const view = new DataView(buf);
-    view.setInt32(0, a, true);
-    view.setInt32(4, b, true);
-    view.setInt32(8, c, true);
-    view.setInt32(12, d, true);
-    return [0xfd, 0x0c, ...new Uint8Array(buf)];
-}
-
-// v128.const with padded opcode
-function v128ConstI32x4Padded(a, b, c, d) {
-    const buf = new ArrayBuffer(16);
-    const view = new DataView(buf);
-    view.setInt32(0, a, true);
-    view.setInt32(4, b, true);
-    view.setInt32(8, c, true);
-    view.setInt32(12, d, true);
-    return [...paddedSIMDOp(0x0c), ...new Uint8Array(buf)];
-}
-
-// i32.const with correct signed LEB128 encoding
-function i32Const(value) {
-    const bytes = [];
-    let more = true;
-    while (more) {
-        let byte = value & 0x7f;
-        value >>= 7;
-        if ((value === 0 && (byte & 0x40) === 0) || (value === -1 && (byte & 0x40) !== 0))
-            more = false;
-        else
-            byte |= 0x80;
-        bytes.push(byte);
-    }
-    return [0x41, ...bytes];
-}
-
-function instantiateAndRun(bytes) {
-    const mod = new WebAssembly.Module(bytes);
-    const inst = new WebAssembly.Instance(mod);
-    return inst.exports.f();
-}
-
-// Test 1: i8x16.extract_lane_u (opcode 0x16) with padded LEB128
-// Extracts byte at lane 4 from i32x4 [0x04030201, ...]
-// Lane 4 = first byte of second i32 = 0x05
-{
-    const bytes = buildModule([
-        ...v128ConstI32x4(0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d),
-        ...paddedSIMDOp(0x16), 0x04, // i8x16.extract_lane_u 4 (padded opcode)
-    ]);
-    assert.eq(instantiateAndRun(bytes), 5);
-}
-
-// Test 2: i32x4.extract_lane (opcode 0x1b) with padded LEB128
-{
-    const bytes = buildModule([
-        ...v128ConstI32x4(10, 20, 30, 40),
-        ...paddedSIMDOp(0x1b), 0x02, // i32x4.extract_lane 2 (padded opcode)
-    ]);
-    assert.eq(instantiateAndRun(bytes), 30);
-}
-
-// Test 3: v128.const (opcode 0x0c) with padded LEB128
-{
-    const bytes = buildModule([
-        ...v128ConstI32x4Padded(42, 0, 0, 0), // v128.const with padded opcode
-        0xfd, 0x1b, 0x00, // i32x4.extract_lane 0 (normal encoding)
-    ]);
-    assert.eq(instantiateAndRun(bytes), 42);
-}
-
-// Test 4: i8x16.replace_lane (opcode 0x17) with padded LEB128
-{
-    const bytes = buildModule([
-        ...v128ConstI32x4(0, 0, 0, 0),
-        ...i32Const(99),             // i32.const 99 (proper signed LEB128)
-        ...paddedSIMDOp(0x17), 0x02, // i8x16.replace_lane 2 (padded opcode)
-        ...paddedSIMDOp(0x16), 0x02, // i8x16.extract_lane_u 2 (padded opcode)
-    ]);
-    assert.eq(instantiateAndRun(bytes), 99);
-}
-
-// Test 5: Chain of padded ops — v128.const (padded) + i32x4.add (padded, opcode 0xAE)
-// + i32x4.extract_lane (padded)
-{
-    const bytes = buildModule([
-        ...v128ConstI32x4Padded(10, 20, 30, 40),
-        ...v128ConstI32x4Padded(1, 2, 3, 4),
-        ...paddedSIMDOp(0xAE),       // i32x4.add (padded)
-        ...paddedSIMDOp(0x1b), 0x01, // i32x4.extract_lane 1 (padded opcode)
-    ]);
-    assert.eq(instantiateAndRun(bytes), 22);
-}
-
-// Test 6: i8x16.shuffle (opcode 0x0d) with padded LEB128
-// Shuffle: take first 4 bytes from second vector, then first 12 from first.
-{
-    const shuffleImm = [
-        0x10, 0x11, 0x12, 0x13,  // lanes 16-19 (from second vector)
-        0x00, 0x01, 0x02, 0x03,  // lanes 0-3 (from first vector)
-        0x04, 0x05, 0x06, 0x07,  // lanes 4-7 (from first vector)
-        0x08, 0x09, 0x0a, 0x0b,  // lanes 8-11 (from first vector)
-    ];
-    const bytes = buildModule([
-        ...v128ConstI32x4(0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d),
-        ...v128ConstI32x4(0x44434241, 0x48474645, 0x4c4b4a49, 0x504f4e4d),
-        ...paddedSIMDOp(0x0d), ...shuffleImm,  // i8x16.shuffle (padded opcode)
-        0xfd, 0x1b, 0x00, // i32x4.extract_lane 0 (normal encoding)
-    ]);
-    // First 4 bytes of result = bytes 16-19 = first 4 bytes of second vector = 0x44434241
-    assert.eq(instantiateAndRun(bytes), 0x44434241);
-}
diff --git a/JSTests/wasm/stress/atomic-cmpxchg-large-offset.js b/JSTests/wasm/stress/atomic-cmpxchg-large-offset.js
deleted file mode 100644
index 3fe46252c1c2..000000000000
--- a/JSTests/wasm/stress/atomic-cmpxchg-large-offset.js
+++ /dev/null
@@ -1,116 +0,0 @@
-//@ skip if $architecture != "arm64" && $architecture != "x86_64"
-
-import * as assert from "../assert.js";
-import { instantiate } from "../wabt-wrapper.js";
-
-// Test atomic cmpxchg with offset >= 128 to exercise the IPInt slow path.
-// When offset >= 128 the LEB128 encoding is multi-byte (first byte >= 0x80),
-// causing loadStoreMakePointerFast to fall through to the slow path.
-// This specifically tests that expected and replacement are not swapped
-// in the slow path's push/pop save/restore sequence.
-
-let wat = `
-(module
-  (memory 1 1 shared)
-  (export "memory" (memory 0))
-
-  (func (export "i32_cmpxchg") (param i32 i32 i32) (result i32)
-    (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw.cmpxchg offset=128))
-  (func (export "i64_cmpxchg") (param i32 i64 i64) (result i64)
-    (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw.cmpxchg offset=128))
-  (func (export "i32_cmpxchg8") (param i32 i32 i32) (result i32)
-    (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw8.cmpxchg_u offset=128))
-  (func (export "i32_cmpxchg16") (param i32 i32 i32) (result i32)
-    (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw16.cmpxchg_u offset=128))
-  (func (export "i64_cmpxchg8") (param i32 i64 i64) (result i64)
-    (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw8.cmpxchg_u offset=128))
-  (func (export "i64_cmpxchg16") (param i32 i64 i64) (result i64)
-    (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw16.cmpxchg_u offset=128))
-  (func (export "i64_cmpxchg32") (param i32 i64 i64) (result i64)
-    (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw32.cmpxchg_u offset=128))
-)
-`;
-
-async function test() {
-    const instance = await instantiate(wat, {}, { threads: true });
-    const e = instance.exports;
-    const mem = new DataView(e.memory.buffer);
-
-    // The effective address is param0 + 128. We use param0=0 so effective addr=128.
-    const addr = 0;
-    const effectiveAddr = 128;
-
-    for (let i = 0; i < wasmTestLoopCount; i++) {
-        // i32.atomic.rmw.cmpxchg - success: expected matches memory
-        mem.setInt32(effectiveAddr, 10, true);
-        assert.eq(e.i32_cmpxchg(addr, 10, 42), 10);    // returns old value
-        assert.eq(mem.getInt32(effectiveAddr, true), 42); // memory updated to replacement
-
-        // i32.atomic.rmw.cmpxchg - failure: expected doesn't match memory
-        mem.setInt32(effectiveAddr, 10, true);
-        assert.eq(e.i32_cmpxchg(addr, 99, 42), 10);     // returns old value (no match)
-        assert.eq(mem.getInt32(effectiveAddr, true), 10); // memory unchanged
-
-        // i64.atomic.rmw.cmpxchg - success
-        mem.setBigInt64(effectiveAddr, 10n, true);
-        assert.eq(e.i64_cmpxchg(addr, 10n, 42n), 10n);
-        assert.eq(mem.getBigInt64(effectiveAddr, true), 42n);
-
-        // i64.atomic.rmw.cmpxchg - failure
-        mem.setBigInt64(effectiveAddr, 10n, true);
-        assert.eq(e.i64_cmpxchg(addr, 99n, 42n), 10n);
-        assert.eq(mem.getBigInt64(effectiveAddr, true), 10n);
-
-        // i32.atomic.rmw8.cmpxchg_u - success
-        mem.setUint8(effectiveAddr, 10);
-        assert.eq(e.i32_cmpxchg8(addr, 10, 42), 10);
-        assert.eq(mem.getUint8(effectiveAddr), 42);
-
-        // i32.atomic.rmw8.cmpxchg_u - failure
-        mem.setUint8(effectiveAddr, 10);
-        assert.eq(e.i32_cmpxchg8(addr, 99, 42), 10);
-        assert.eq(mem.getUint8(effectiveAddr), 10);
-
-        // i32.atomic.rmw16.cmpxchg_u - success
-        mem.setUint16(effectiveAddr, 10, true);
-        assert.eq(e.i32_cmpxchg16(addr, 10, 42), 10);
-        assert.eq(mem.getUint16(effectiveAddr, true), 42);
-
-        // i32.atomic.rmw16.cmpxchg_u - failure
-        mem.setUint16(effectiveAddr, 10, true);
-        assert.eq(e.i32_cmpxchg16(addr, 99, 42), 10);
-        assert.eq(mem.getUint16(effectiveAddr, true), 10);
-
-        // i64.atomic.rmw8.cmpxchg_u - success
-        mem.setUint8(effectiveAddr, 10);
-        assert.eq(e.i64_cmpxchg8(addr, 10n, 42n), 10n);
-        assert.eq(mem.getUint8(effectiveAddr), 42);
-
-        // i64.atomic.rmw8.cmpxchg_u - failure
-        mem.setUint8(effectiveAddr, 10);
-        assert.eq(e.i64_cmpxchg8(addr, 99n, 42n), 10n);
-        assert.eq(mem.getUint8(effectiveAddr), 10);
-
-        // i64.atomic.rmw16.cmpxchg_u - success
-        mem.setUint16(effectiveAddr, 10, true);
-        assert.eq(e.i64_cmpxchg16(addr, 10n, 42n), 10n);
-        assert.eq(mem.getUint16(effectiveAddr, true), 42);
-
-        // i64.atomic.rmw16.cmpxchg_u - failure
-        mem.setUint16(effectiveAddr, 10, true);
-        assert.eq(e.i64_cmpxchg16(addr, 99n, 42n), 10n);
-        assert.eq(mem.getUint16(effectiveAddr, true), 10);
-
-        // i64.atomic.rmw32.cmpxchg_u - success
-        mem.setUint32(effectiveAddr, 10, true);
-        assert.eq(e.i64_cmpxchg32(addr, 10n, 42n), 10n);
-        assert.eq(mem.getUint32(effectiveAddr, true), 42);
-
-        // i64.atomic.rmw32.cmpxchg_u - failure
-        mem.setUint32(effectiveAddr, 10, true);
-        assert.eq(e.i64_cmpxchg32(addr, 99n, 42n), 10n);
-        assert.eq(mem.getUint32(effectiveAddr, true), 10);
-    }
-}
-
-await assert.asyncTest(test());
diff --git a/JSTests/wasm/stress/memory64-atomics.js b/JSTests/wasm/stress/memory64-atomics.js
deleted file mode 100644
index 7b43297dc453..000000000000
--- a/JSTests/wasm/stress/memory64-atomics.js
+++ /dev/null
@@ -1,169 +0,0 @@
-//@ skip if $addressBits <= 32
-//@ runDefaultWasm("-m", "--useWasmMemory64=1", "--useOMGJIT=0")
-import { instantiate } from "../wabt-wrapper.js";
-import * as assert from "../assert.js";
-
-// Test atomic operations with memory64 and a constant offset.
-// Uses memory64 (i64 addresses) with shared memory and offset=256 to exercise
-// the 64-bit offset metadata path in IPInt.
-
-// First, test non-atomic load/store with memory64 to verify basic memory64 works.
-{
-    let wat = `
-    (module
-      (memory (export "memory") i64 1)
-
-      (func (export "i32_store") (param i64 i32) (local.get 0) (local.get 1) (i32.store offset=256))
-      (func (export "i32_load") (param i64) (result i32) (local.get 0) (i32.load offset=256))
-      (func (export "i64_store") (param i64 i64) (local.get 0) (local.get 1) (i64.store offset=256))
-      (func (export "i64_load") (param i64) (result i64) (local.get 0) (i64.load offset=256))
-    )
-    `;
-
-    const instance = await instantiate(wat, {}, { memory64: true });
-    const e = instance.exports;
-
-    for (let i = 0; i < wasmTestLoopCount; i++) {
-        e.i32_store(0n, 0x12345678);
-        assert.eq(e.i32_load(0n), 0x12345678);
-
-        e.i64_store(0n, 0x123456789ABCDEF0n);
-        assert.eq(e.i64_load(0n), 0x123456789ABCDEF0n);
-
-        e.i32_store(8n, 42);
-        assert.eq(e.i32_load(8n), 42);
-
-        // Test with a larger base address
-        e.i32_store(128n, 0xDEADBEEF);
-        assert.eq(e.i32_load(128n), 0xDEADBEEF | 0);
-    }
-}
-
-// Test atomics with shared memory (memory32) and a large offset to exercise
-// the offset metadata path.
-{
-    const offset = 256;
-
-    let wat = `
-    (module
-      (memory (export "memory") 1 1 shared)
-
-      ;; Non-atomic helpers (for setup/verification)
-      (func (export "i32_store") (param i32 i32) (local.get 0) (local.get 1) (i32.store offset=${offset}))
-      (func (export "i32_load") (param i32) (result i32) (local.get 0) (i32.load offset=${offset}))
-      (func (export "i64_store") (param i32 i64) (local.get 0) (local.get 1) (i64.store offset=${offset}))
-      (func (export "i64_load") (param i32) (result i64) (local.get 0) (i64.load offset=${offset}))
-
-      ;; Atomic loads with offset
-      (func (export "test_i32_atomic_load") (param i32) (result i32) (local.get 0) (i32.atomic.load offset=${offset}))
-      (func (export "test_i64_atomic_load") (param i32) (result i64) (local.get 0) (i64.atomic.load offset=${offset}))
-      (func (export "test_i32_atomic_load8_u") (param i32) (result i32) (local.get 0) (i32.atomic.load8_u offset=${offset}))
-      (func (export "test_i32_atomic_load16_u") (param i32) (result i32) (local.get 0) (i32.atomic.load16_u offset=${offset}))
-      (func (export "test_i64_atomic_load8_u") (param i32) (result i64) (local.get 0) (i64.atomic.load8_u offset=${offset}))
-      (func (export "test_i64_atomic_load16_u") (param i32) (result i64) (local.get 0) (i64.atomic.load16_u offset=${offset}))
-      (func (export "test_i64_atomic_load32_u") (param i32) (result i64) (local.get 0) (i64.atomic.load32_u offset=${offset}))
-
-      ;; Atomic stores with offset
-      (func (export "test_i32_atomic_store") (param i32 i32) (local.get 0) (local.get 1) (i32.atomic.store offset=${offset}))
-      (func (export "test_i64_atomic_store") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store offset=${offset}))
-      (func (export "test_i32_atomic_store8") (param i32 i32) (local.get 0) (local.get 1) (i32.atomic.store8 offset=${offset}))
-      (func (export "test_i32_atomic_store16") (param i32 i32) (local.get 0) (local.get 1) (i32.atomic.store16 offset=${offset}))
-      (func (export "test_i64_atomic_store8") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store8 offset=${offset}))
-      (func (export "test_i64_atomic_store16") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store16 offset=${offset}))
-      (func (export "test_i64_atomic_store32") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store32 offset=${offset}))
-
-      ;; Atomic RMW add with offset
-      (func (export "test_i32_atomic_rmw_add") (param i32 i32) (result i32) (local.get 0) (local.get 1) (i32.atomic.rmw.add offset=${offset}))
-      (func (export "test_i64_atomic_rmw_add") (param i32 i64) (result i64) (local.get 0) (local.get 1) (i64.atomic.rmw.add offset=${offset}))
-
-      ;; Atomic RMW cmpxchg with offset
-      (func (export "test_i32_atomic_rmw_cmpxchg") (param i32 i32 i32) (result i32) (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw.cmpxchg offset=${offset}))
-      (func (export "test_i64_atomic_rmw_cmpxchg") (param i32 i64 i64) (result i64) (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw.cmpxchg offset=${offset}))
-
-      ;; Atomic notify with offset
-      (func (export "test_memory_atomic_notify") (param i32 i32) (result i32) (local.get 0) (local.get 1) (memory.atomic.notify offset=${offset}))
-    )
-    `;
-
-    const instance = await instantiate(wat, {}, { threads: true });
-    const e = instance.exports;
-
-    function clear() {
-        e.i64_store(0, 0n);
-        e.i64_store(8, 0n);
-    }
-
-    for (let i = 0; i < wasmTestLoopCount; i++) {
-        // === Atomic Loads ===
-        clear();
-        e.i64_store(0, 0x7766554433221142n);
-
-        assert.eq(e.test_i32_atomic_load(0), 0x33221142);
-        assert.eq(e.test_i64_atomic_load(0), 0x7766554433221142n);
-        assert.eq(e.test_i32_atomic_load8_u(0), 0x42);
-        assert.eq(e.test_i32_atomic_load16_u(0), 0x1142);
-        assert.eq(e.test_i64_atomic_load8_u(0), 0x42n);
-        assert.eq(e.test_i64_atomic_load16_u(0), 0x1142n);
-        assert.eq(e.test_i64_atomic_load32_u(0), 0x33221142n);
-
-        // === Atomic Stores ===
-        clear();
-        e.test_i32_atomic_store(0, 0x12345678);
-        assert.eq(e.i32_load(0), 0x12345678);
-
-        clear();
-        e.test_i64_atomic_store(0, 0x123456789ABCDEF0n);
-        assert.eq(e.i64_load(0), 0x123456789ABCDEF0n);
-
-        clear();
-        e.test_i32_atomic_store8(0, 0x42);
-        assert.eq(e.i32_load(0), 0x42);
-
-        clear();
-        e.test_i32_atomic_store16(0, 0x1234);
-        assert.eq(e.i32_load(0), 0x1234);
-
-        clear();
-        e.test_i64_atomic_store8(0, 0x42n);
-        assert.eq(e.i64_load(0), 0x42n);
-
-        clear();
-        e.test_i64_atomic_store16(0, 0x1234n);
-        assert.eq(e.i64_load(0), 0x1234n);
-
-        clear();
-        e.test_i64_atomic_store32(0, 0x12345678n);
-        assert.eq(e.i64_load(0), 0x12345678n);
-
-        // === Atomic RMW add ===
-        clear();
-        e.i32_store(0, 10);
-        assert.eq(e.test_i32_atomic_rmw_add(0, 5), 10); // returns old value
-        assert.eq(e.i32_load(0), 15);
-
-        clear();
-        e.i64_store(0, 100n);
-        assert.eq(e.test_i64_atomic_rmw_add(0, 50n), 100n);
-        assert.eq(e.i64_load(0), 150n);
-
-        // === Atomic RMW cmpxchg ===
-        clear();
-        e.i32_store(0, 42);
-        assert.eq(e.test_i32_atomic_rmw_cmpxchg(0, 42, 99), 42); // match: swap
-        assert.eq(e.i32_load(0), 99);
-
-        clear();
-        e.i32_store(0, 42);
-        assert.eq(e.test_i32_atomic_rmw_cmpxchg(0, 0, 99), 42); // no match: no swap
-        assert.eq(e.i32_load(0), 42);
-
-        clear();
-        e.i64_store(0, 42n);
-        assert.eq(e.test_i64_atomic_rmw_cmpxchg(0, 42n, 99n), 42n);
-        assert.eq(e.i64_load(0), 99n);
-
-        // === memory.atomic.notify ===
-        clear();
-        assert.eq(e.test_memory_atomic_notify(0, 1), 0); // no waiters
-    }
-}
diff --git a/JSTests/wasm/stress/wide-arithmetic.js b/JSTests/wasm/stress/wide-arithmetic.js
deleted file mode 100644
index 10cd88f54f1d..000000000000
--- a/JSTests/wasm/stress/wide-arithmetic.js
+++ /dev/null
@@ -1,684 +0,0 @@
-//@ requireOptions("--useWasmWideArithmetic=1")
-import * as assert from '../assert.js';
-
-// Helper: wasm i64 values are returned as signed BigInt in JS.
-// u() converts to BigInt. Use string-form for large values to avoid Number precision loss.
-const u = (v) => BigInt(v);
-
-// ============================================================================
-// Main module (binary format, since wabt doesn't know about wide arithmetic)
-// ============================================================================
-
-// Module with 4 exported functions:
-//   i64.add128(i64,i64,i64,i64) -> (i64,i64)
-//   i64.sub128(i64,i64,i64,i64) -> (i64,i64)
-//   i64.mul_wide_s(i64,i64) -> (i64,i64)
-//   i64.mul_wide_u(i64,i64) -> (i64,i64)
-const mainBytes = new Uint8Array([
-    0x00, 0x61, 0x73, 0x6d,   // magic
-    0x01, 0x00, 0x00, 0x00,   // version
-
-    // type section
-    0x01, 0x11,
-    0x02,                     // 2 types
-    0x60,                     // type0 = func
-    0x04, 0x7e, 0x7e, 0x7e, 0x7e,  // 4 i64 params
-    0x02, 0x7e, 0x7e,              // 2 i64 results
-    0x60,                     // type1 = func
-    0x02, 0x7e, 0x7e,              // 2 i64 params
-    0x02, 0x7e, 0x7e,              // 2 i64 results
-
-    // function section
-    0x03, 0x05,
-    0x04,                     // 4 functions
-    0x00, 0x00, 0x01, 0x01,   // types: 0, 0, 1, 1
-
-    // export section
-    0x07, 0x3d,
-    0x04,                     // 4 exports
-    0x0a, 0x69, 0x36, 0x34, 0x2e, 0x61, 0x64, 0x64, 0x31, 0x32, 0x38, 0x00, 0x00, // "i64.add128" func 0
-    0x0a, 0x69, 0x36, 0x34, 0x2e, 0x73, 0x75, 0x62, 0x31, 0x32, 0x38, 0x00, 0x01, // "i64.sub128" func 1
-    0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x73, 0x00, 0x02, // "i64.mul_wide_s" func 2
-    0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x75, 0x00, 0x03, // "i64.mul_wide_u" func 3
-
-    // code section
-    0x0a, 0x2d,
-    0x04,                     // 4 functions
-
-    // function 0: i64.add128
-    0x0c,                     // byte length = 12
-    0x00,                     // no locals
-    0x20, 0x00,               // local.get 0
-    0x20, 0x01,               // local.get 1
-    0x20, 0x02,               // local.get 2
-    0x20, 0x03,               // local.get 3
-    0xfc, 0x13,               // i64.add128
-    0x0b,                     // end
-
-    // function 1: i64.sub128
-    0x0c,                     // byte length
-    0x00,                     // no locals
-    0x20, 0x00,               // local.get 0
-    0x20, 0x01,               // local.get 1
-    0x20, 0x02,               // local.get 2
-    0x20, 0x03,               // local.get 3
-    0xfc, 0x14,               // i64.sub128
-    0x0b,                     // end
-
-    // function 2: i64.mul_wide_s
-    0x08,                     // byte length
-    0x00,                     // no locals
-    0x20, 0x00,               // local.get 0
-    0x20, 0x01,               // local.get 1
-    0xfc, 0x15,               // i64.mul_wide_s
-    0x0b,                     // end
-
-    // function 3: i64.mul_wide_u
-    0x08,                     // byte length
-    0x00,                     // no locals
-    0x20, 0x00,               // local.get 0
-    0x20, 0x01,               // local.get 1
-    0xfc, 0x16,               // i64.mul_wide_u
-    0x0b,                     // end
-]);
-
-function testMain() {
-    const module = new WebAssembly.Module(mainBytes);
-    const instance = new WebAssembly.Instance(module);
-    const add128 = instance.exports["i64.add128"];
-    const sub128 = instance.exports["i64.sub128"];
-    const mul_wide_s = instance.exports["i64.mul_wide_s"];
-    const mul_wide_u = instance.exports["i64.mul_wide_u"];
-
-    let r;
-
-    for (let iteration = 0; iteration < wasmTestLoopCount; ++iteration) {
-
-        // ====================================================================
-        // Simple addition tests
-        // ====================================================================
-
-        r = add128(0n, 0n, 0n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = add128(0n, 1n, 1n, 0n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 1n);
-
-        r = add128(1n, 0n, -1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 1n);
-
-        r = add128(1n, 1n, -1n, -1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 1n);
-
-        // ====================================================================
-        // Simple subtraction tests
-        // ====================================================================
-
-        r = sub128(0n, 0n, 0n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = sub128(0n, 0n, 1n, 0n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(0n, 1n, 1n, 1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(0n, 0n, 1n, 1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -2n);
-
-        // ====================================================================
-        // Simple mul_wide tests
-        // ====================================================================
-
-        r = mul_wide_s(0n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(0n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(1n, 1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, 1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-1n, -1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-1n, 1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1n);
-
-        r = mul_wide_u(-1n, 1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], 0n);
-
-        // ====================================================================
-        // 20 randomly generated test cases for i64.add128
-        // ====================================================================
-
-        r = add128(-2418420703207364752n, -1n, -1n, -1n);
-        assert.eq(r[0], -2418420703207364753n);
-        assert.eq(r[1], -1n);
-
-        r = add128(0n, 0n, -4579433644172935106n, -1n);
-        assert.eq(r[0], -4579433644172935106n);
-        assert.eq(r[1], -1n);
-
-        r = add128(0n, 0n, 1n, -1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], -1n);
-
-        r = add128(1n, 0n, 1n, 0n);
-        assert.eq(r[0], 2n);
-        assert.eq(r[1], 0n);
-
-        r = add128(-1n, -1n, -1n, -1n);
-        assert.eq(r[0], -2n);
-        assert.eq(r[1], -1n);
-
-        r = add128(0n, -1n, 1n, 0n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], -1n);
-
-        r = add128(0n, 0n, 0n, -1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], -1n);
-
-        r = add128(1n, 0n, -1n, -1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = add128(0n, 6184727276166606191n, 0n, 1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 6184727276166606192n);
-
-        r = add128(-8434911321912688222n, -1n, 1n, -1n);
-        assert.eq(r[0], -8434911321912688221n);
-        assert.eq(r[1], -2n);
-
-        r = add128(1n, -1n, 0n, -1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], -2n);
-
-        r = add128(1n, -5148941131328838092n, 0n, 0n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], -5148941131328838092n);
-
-        r = add128(1n, 1n, 1n, 0n);
-        assert.eq(r[0], 2n);
-        assert.eq(r[1], 1n);
-
-        r = add128(-1n, -1n, -3636740005180858631n, -1n);
-        assert.eq(r[0], -3636740005180858632n);
-        assert.eq(r[1], -1n);
-
-        r = add128(-5529682780229988275n, -1n, 0n, 0n);
-        assert.eq(r[0], -5529682780229988275n);
-        assert.eq(r[1], -1n);
-
-        r = add128(1n, -5381447440966559717n, 1020031372481336745n, 1n);
-        assert.eq(r[0], 1020031372481336746n);
-        assert.eq(r[1], -5381447440966559716n);
-
-        r = add128(1n, 1n, 0n, 0n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 1n);
-
-        r = add128(-9133888546939907356n, -1n, 1n, 1n);
-        assert.eq(r[0], -9133888546939907355n);
-        assert.eq(r[1], 0n);
-
-        r = add128(-4612047512704241719n, -1n, 0n, -1n);
-        assert.eq(r[0], -4612047512704241719n);
-        assert.eq(r[1], -2n);
-
-        r = add128(414720966820876428n, -1n, 1n, 0n);
-        assert.eq(r[0], 414720966820876429n);
-        assert.eq(r[1], -1n);
-
-        // ====================================================================
-        // 20 randomly generated test cases for i64.sub128
-        // ====================================================================
-
-        r = sub128(0n, -2459085471354756766n, -9151153060221070927n, -1n);
-        assert.eq(r[0], 9151153060221070927n);
-        assert.eq(r[1], -2459085471354756766n);
-
-        r = sub128(4566502638724063423n, -4282658540409485563n, -6884077310018979971n, -1n);
-        assert.eq(r[0], -6996164124966508222n);
-        assert.eq(r[1], -4282658540409485563n);
-
-        r = sub128(1n, 3118380319444903041n, 0n, 3283115686417695443n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], -164735366972792402n);
-
-        r = sub128(-7208415241680161810n, -1n, 1n, 0n);
-        assert.eq(r[0], -7208415241680161811n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(0n, 3944850126731328706n, 1n, 1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], 3944850126731328704n);
-
-        r = sub128(1n, -1n, -1n, -1n);
-        assert.eq(r[0], 2n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(-1n, -1n, 4855833073346115923n, -6826437637438999645n);
-        assert.eq(r[0], -4855833073346115924n);
-        assert.eq(r[1], 6826437637438999644n);
-
-        r = sub128(1n, 0n, -1n, -1n);
-        assert.eq(r[0], 2n);
-        assert.eq(r[1], 0n);
-
-        r = sub128(1n, 0n, 1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = sub128(-1n, -1n, 0n, 0n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(1n, -1n, -6365475388498096428n, -1n);
-        assert.eq(r[0], 6365475388498096429n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(6804238617560992346n, -1n, 0n, -1n);
-        assert.eq(r[0], 6804238617560992346n);
-        assert.eq(r[1], 0n);
-
-        r = sub128(0n, 1n, 1n, -7756145513466453619n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], 7756145513466453619n);
-
-        r = sub128(1n, -1n, 1n, 1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], -2n);
-
-        r = sub128(0n, 1n, 1n, 0n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], 0n);
-
-        r = sub128(1n, 5602881641763648953n, -2110589244314239080n, -1n);
-        assert.eq(r[0], 2110589244314239081n);
-        assert.eq(r[1], 5602881641763648953n);
-
-        r = sub128(0n, 1n, -1n, -1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 1n);
-
-        r = sub128(0n, -1n, 3553816990259121806n, -2105235417856431622n);
-        assert.eq(r[0], -3553816990259121806n);
-        assert.eq(r[1], 2105235417856431620n);
-
-        r = sub128(1861102705894987245n, 1n, 3713781778534059871n, 1n);
-        assert.eq(r[0], -1852679072639072626n);
-        assert.eq(r[1], -1n);
-
-        r = sub128(0n, -1n, 1n, 1832524486821761762n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1832524486821761764n);
-
-        // ====================================================================
-        // 20 randomly generated test cases for i64.mul_wide_s
-        // ====================================================================
-
-        r = mul_wide_s(1n, 1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(0n, 6287758211025156705n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-6643537319803451357n, 1n);
-        assert.eq(r[0], -6643537319803451357n);
-        assert.eq(r[1], -1n);
-
-        r = mul_wide_s(-2483565146858803428n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(1n, 1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-3838951433439430085n, 3471602925362676030n);
-        assert.eq(r[0], 5186941893001237834n);
-        assert.eq(r[1], -722475195264825124n);
-
-        r = mul_wide_s(-8262495286814853129n, 7883241869666573970n);
-        assert.eq(r[0], -8557189786755031842n);
-        assert.eq(r[1], -3530988912334554469n);
-
-        r = mul_wide_s(4278371902407959701n, 1n);
-        assert.eq(r[0], 4278371902407959701n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-8852706149487089182n, -1n);
-        assert.eq(r[0], 8852706149487089182n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(1n, -1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1n);
-
-        r = mul_wide_s(-1n, -4329244561838653387n);
-        assert.eq(r[0], 4329244561838653387n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-1n, -1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(697896157315764057n, 1n);
-        assert.eq(r[0], 697896157315764057n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(1n, 1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(-1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(0n, -3769664482072947073n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(1n, 8414291037346403854n);
-        assert.eq(r[0], 8414291037346403854n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_s(1n, -1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], -1n);
-
-        r = mul_wide_s(5014655679779318485n, -5080037812563681985n);
-        assert.eq(r[0], 2842857627777395563n);
-        assert.eq(r[1], -1380983027057486843n);
-
-        r = mul_wide_s(0n, 1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        // ====================================================================
-        // 20 randomly generated test cases for i64.mul_wide_u
-        // ====================================================================
-
-        r = mul_wide_u(-4734436040338162711n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(3270597527173764279n, 6636648075495406358n);
-        assert.eq(r[0], -5430303818902260550n);
-        assert.eq(r[1], 1176674035141685826n);
-
-        r = mul_wide_u(-7771814344630108151n, 1n);
-        assert.eq(r[0], -7771814344630108151n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, -7864138787704962081n);
-        assert.eq(r[0], -7864138787704962081n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, 518555141550256010n);
-        assert.eq(r[0], 518555141550256010n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, -1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1118900477321231571n, -1n);
-        assert.eq(r[0], -1118900477321231571n);
-        assert.eq(r[1], 1118900477321231570n);
-
-        r = mul_wide_u(-1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(-5586890671027490027n, 1n);
-        assert.eq(r[0], -5586890671027490027n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(0n, 3603850799751152505n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(-1n, -1n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], -2n);
-
-        r = mul_wide_u(0n, 1n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(-7344082851774441644n, 3896439839137544024n);
-        assert.eq(r[0], 5738542512914895072n);
-        assert.eq(r[1], 2345175459296971666n);
-
-        r = mul_wide_u(0n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(616395976148874061n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(2810729703362889816n, -1n);
-        assert.eq(r[0], -2810729703362889816n);
-        assert.eq(r[1], 2810729703362889815n);
-
-        r = mul_wide_u(1n, -1n);
-        assert.eq(r[0], -1n);
-        assert.eq(r[1], 0n);
-
-        r = mul_wide_u(1n, 0n);
-        assert.eq(r[0], 0n);
-        assert.eq(r[1], 0n);
-
-    } // end wasmTestLoopCount loop
-}
-
-testMain();
-
-// ============================================================================
-// Overlong binary encoding module
-// ============================================================================
-
-function testOverlongEncoding() {
-    // This module uses overlong LEB128 encodings for each wide arithmetic
-    // instruction's opcode, which must be accepted per the spec.
-    const bytes = new Uint8Array([
-        0x00, 0x61, 0x73, 0x6d,   // magic: \0asm
-        0x01, 0x00, 0x00, 0x00,   // version: 1
-
-        // type section, 17 bytes
-        0x01, 0x11,
-        0x02,                     // 2 types
-        0x60,                     // type0 = function
-        0x04, 0x7e, 0x7e, 0x7e, 0x7e,  // 4 params - all i64
-        0x02, 0x7e, 0x7e,              // 2 results - both i64
-        0x60,                     // type1 = function
-        0x02, 0x7e, 0x7e,              // 2 params - both i64
-        0x02, 0x7e, 0x7e,              // 2 results - both i64
-
-        // function section, 5 bytes
-        0x03, 0x05,
-        0x04,                     // 4 functions
-        0x00, 0x00, 0x01, 0x01,   // types: 0, 0, 1, 1
-
-        // export section, 0x3d bytes
-        0x07, 0x3d,
-        0x04,                     // 4 exports
-        0x0a, 0x69, 0x36, 0x34, 0x2e, 0x61, 0x64, 0x64, 0x31, 0x32, 0x38, 0x00, 0x00, // "i64.add128" func 0
-        0x0a, 0x69, 0x36, 0x34, 0x2e, 0x73, 0x75, 0x62, 0x31, 0x32, 0x38, 0x00, 0x01, // "i64.sub128" func 1
-        0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x73, 0x00, 0x02, // "i64.mul_wide_s" func 2
-        0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x75, 0x00, 0x03, // "i64.mul_wide_u" func 3
-
-        // code section
-        0x0a, 0x37,
-        0x04,                     // 4 functions
-
-        // function 0: i64.add128 with overlong encoding (0xfc 0x93 0x80 0x00)
-        0x0e,                     // byte length
-        0x00,                     // no locals
-        0x20, 0x00,               // local.get 0
-        0x20, 0x01,               // local.get 1
-        0x20, 0x02,               // local.get 2
-        0x20, 0x03,               // local.get 3
-        0xfc, 0x93, 0x80, 0x00,   // i64.add128 (overlong)
-        0x0b,                     // end
-
-        // function 1: i64.sub128 with overlong encoding (0xfc 0x94 0x00)
-        0x0d,                     // byte length
-        0x00,                     // no locals
-        0x20, 0x00,               // local.get 0
-        0x20, 0x01,               // local.get 1
-        0x20, 0x02,               // local.get 2
-        0x20, 0x03,               // local.get 3
-        0xfc, 0x94, 0x00,         // i64.sub128 (overlong)
-        0x0b,                     // end
-
-        // function 2: i64.mul_wide_s with overlong encoding (0xfc 0x95 0x80 0x80 0x80 0x00)
-        0x0c,                     // byte length
-        0x00,                     // no locals
-        0x20, 0x00,               // local.get 0
-        0x20, 0x01,               // local.get 1
-        0xfc, 0x95, 0x80, 0x80, 0x80, 0x00,  // i64.mul_wide_s (overlong)
-        0x0b,                     // end
-
-        // function 3: i64.mul_wide_u with overlong encoding (0xfc 0x96 0x80 0x80 0x00)
-        0x0b,                     // byte length
-        0x00,                     // no locals
-        0x20, 0x00,               // local.get 0
-        0x20, 0x01,               // local.get 1
-        0xfc, 0x96, 0x80, 0x80, 0x00,  // i64.mul_wide_u (overlong)
-        0x0b,                     // end
-    ]);
-
-    const module = new WebAssembly.Module(bytes);
-    const instance = new WebAssembly.Instance(module);
-    const add128 = instance.exports["i64.add128"];
-    const sub128 = instance.exports["i64.sub128"];
-    const mul_wide_s = instance.exports["i64.mul_wide_s"];
-    const mul_wide_u = instance.exports["i64.mul_wide_u"];
-
-    let r;
-
-    for (let iteration = 0; iteration < wasmTestLoopCount; ++iteration) {
-        r = add128(1n, 2n, 3n, 4n);
-        assert.eq(r[0], 4n);
-        assert.eq(r[1], 6n);
-
-        r = sub128(2n, 5n, 1n, 2n);
-        assert.eq(r[0], 1n);
-        assert.eq(r[1], 3n);
-
-        r = mul_wide_s(1n, -2n);
-        assert.eq(r[0], -2n);
-        assert.eq(r[1], -1n);
-
-        r = mul_wide_u(3n, 2n);
-        assert.eq(r[0], 6n);
-        assert.eq(r[1], 0n);
-    } // end wasmTestLoopCount loop
-}
-
-testOverlongEncoding();
-
-// ============================================================================
-// assert_invalid tests: type mismatches (binary format)
-// ============================================================================
-
-// Helper: build a minimal wasm module with one function
-// typeParams/typeResults are arrays of wasm valtype bytes (0x7e = i64)
-// bodyLocals is the local.get sequence, opcode is the wide arith opcode
-function makeInvalidModule(typeParams, typeResults, bodyGetCount, opcodeBytes) {
-    // Type section
-    const funcType = [0x60, typeParams.length, ...typeParams, typeResults.length, ...typeResults];
-    const typeSection = [0x01, funcType.length + 1, 0x01, ...funcType];
-
-    // Function section
-    const funcSection = [0x03, 0x02, 0x01, 0x00];
-
-    // Code section
-    const bodyGets = [];
-    for (let i = 0; i < bodyGetCount; i++)
-        bodyGets.push(0x20, i);
-    const bodyContent = [0x00, ...bodyGets, ...opcodeBytes, 0x0b];
-    const codeSection = [0x0a, bodyContent.length + 2, 0x01, bodyContent.length, ...bodyContent];
-
-    return new Uint8Array([
-        0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
-        ...typeSection, ...funcSection, ...codeSection
-    ]);
-}
-
-const i64 = 0x7e;
-
-// i64.add128: too few results (1 instead of 2)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64, i64, i64, i64], [i64], 4, [0xfc, 0x13])
-), WebAssembly.CompileError, "");
-
-// i64.add128: too few params (3 instead of 4)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64, i64, i64], [i64, i64], 3, [0xfc, 0x13])
-), WebAssembly.CompileError, "");
-
-// i64.sub128: too few results (1 instead of 2)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64, i64, i64, i64], [i64], 4, [0xfc, 0x14])
-), WebAssembly.CompileError, "");
-
-// i64.sub128: too few params (3 instead of 4)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64, i64, i64], [i64, i64], 3, [0xfc, 0x14])
-), WebAssembly.CompileError, "");
-
-// i64.mul_wide_s: too few results (1 instead of 2)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64, i64], [i64], 2, [0xfc, 0x15])
-), WebAssembly.CompileError, "");
-
-// i64.mul_wide_s: too few params (1 instead of 2)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64], [i64, i64], 1, [0xfc, 0x15])
-), WebAssembly.CompileError, "");
-
-// i64.mul_wide_u: too few results (1 instead of 2)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64, i64], [i64], 2, [0xfc, 0x16])
-), WebAssembly.CompileError, "");
-
-// i64.mul_wide_u: too few params (1 instead of 2)
-assert.throws(() => new WebAssembly.Module(
-    makeInvalidModule([i64], [i64, i64], 1, [0xfc, 0x16])
-), WebAssembly.CompileError, "");
-
diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
index 0edcc51ae075..35ec179dfb35 100644
--- a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
@@ -299,16 +299,6 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
         add64AndSetFlags(imm, dest, dest);
     }
 
-    void add64AndSetFlags(RegisterID a, RegisterID b, RegisterID dest)
-    {
-        m_assembler.add<64, S>(dest, a, b);
-    }
-
-    void addCarry64(RegisterID a, RegisterID b, RegisterID dest)
-    {
-        m_assembler.adc<64>(dest, a, b);
-    }
-
     void add64(TrustedImm64 imm, RegisterID dest)
     {
         add64(imm, dest, dest);
@@ -1607,16 +1597,6 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
         }
     }
 
-    void sub64AndSetFlags(RegisterID a, RegisterID b, RegisterID dest)
-    {
-        m_assembler.sub<64, S>(dest, a, b);
-    }
-
-    void subBorrow64(RegisterID a, RegisterID b, RegisterID dest)
-    {
-        m_assembler.sbc<64>(dest, a, b);
-    }
-
     void urshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest)
     {
         m_assembler.lsr<32>(dest, src, shiftAmount);
diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h b/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
index 84750f015d59..a20d0e9a4cd3 100644
--- a/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
@@ -5067,17 +5067,7 @@ class MacroAssemblerX86_64 : public AbstractMacroAssembler<Assembler> {
     {
         m_assembler.addq_rr(src, dest);
     }
-
-    void addCarry64(RegisterID src, RegisterID dest)
-    {
-        m_assembler.adcq_rr(src, dest);
-    }
-
-    void subBorrow64(RegisterID src, RegisterID dest)
-    {
-        m_assembler.sbbq_rr(src, dest);
-    }
-
+    
     void add64(Address src, RegisterID dest)
     {
         m_assembler.addq_mr(src.offset, src.base, dest);
diff --git a/Source/JavaScriptCore/assembler/X86Assembler.h b/Source/JavaScriptCore/assembler/X86Assembler.h
index ac8c02ee61d9..4c4d0e2f2412 100644
--- a/Source/JavaScriptCore/assembler/X86Assembler.h
+++ b/Source/JavaScriptCore/assembler/X86Assembler.h
@@ -175,8 +175,6 @@ class X86Assembler {
         OP_OR_GvEv                      = 0x0B,
         OP_OR_EAXIv                     = 0x0D,
         OP_2BYTE_ESCAPE                 = 0x0F,
-        OP_ADC_EvGv                     = 0x11,
-        OP_SBB_EvGv                     = 0x19,
         OP_AND_EvGb                     = 0x20,
         OP_AND_EvGv                     = 0x21,
         OP_AND_GvEv                     = 0x23,
@@ -507,7 +505,6 @@ class X86Assembler {
         GROUP1_OP_ADD = 0,
         GROUP1_OP_OR  = 1,
         GROUP1_OP_ADC = 2,
-        GROUP1_OP_SBB = 3,
         GROUP1_OP_AND = 4,
         GROUP1_OP_SUB = 5,
         GROUP1_OP_XOR = 6,
@@ -1400,16 +1397,6 @@ class X86Assembler {
         m_formatter.oneByteOp64(OP_SUB_EvGv, src, dst);
     }
 
-    void adcq_rr(RegisterID src, RegisterID dst)
-    {
-        m_formatter.oneByteOp64(OP_ADC_EvGv, src, dst);
-    }
-
-    void sbbq_rr(RegisterID src, RegisterID dst)
-    {
-        m_formatter.oneByteOp64(OP_SBB_EvGv, src, dst);
-    }
-
     void subq_mr(int offset, RegisterID base, RegisterID dst)
     {
         m_formatter.oneByteOp64(OP_SUB_GvEv, dst, base, offset);
diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm
index a4307e69f32c..74697f1525a1 100644
--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm
+++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm
@@ -50,12 +50,11 @@
 # - MC: (Metadata Counter) IPInt's metadata pointer. This records the corresponding position in generated metadata.
 # - WI: (Wasm Instance) pointer to the current JSWebAssemblyInstance object. This is used for accessing
 #       function-specific data (callee-save).
+# - PL: (Pointer to Locals) pointer to the address of local 0 in the current function. This is used for accessing
+#       locals quickly.
 # - MB: (Memory Base) pointer to the current Wasm memory base address (callee-save).
 # - BC: (Bounds Check) the size of the current Wasm memory region, for bounds checking (callee-save).
 #
-# Locals are accessed at a constant offset from CFR:
-#   local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize
-#
 # Finally, we provide four "sc" (safe for call) registers which are guaranteed to not overlap with argument
 # registers (sc0, sc1, sc2, sc3)
 
@@ -68,6 +67,7 @@ const alignMInt = constexpr JSC::IPInt::alignMInt
 if ARM64 or ARM64E
     const PC = csr7
     const MC = csr6
+    const PL = t6
 
     # Wasm Pinned Registers
     const WI = csr0
@@ -81,6 +81,7 @@ if ARM64 or ARM64E
 elsif X86_64
     const PC = csr2
     const MC = csr1
+    const PL = t5
 
     # Wasm Pinned Registers
     const WI = csr0
@@ -94,6 +95,7 @@ elsif X86_64
 elsif RISCV64
     const PC = csr7
     const MC = csr6
+    const PL = csr10
 
     # Wasm Pinned Registers
     const WI = csr0
@@ -107,6 +109,7 @@ elsif RISCV64
 elsif ARMv7
     const PC = csr1
     const MC = t6
+    const PL = t7
 
     # Wasm Pinned Registers
     const WI = csr0
@@ -120,6 +123,7 @@ elsif ARMv7
 else
     const PC = invalidGPR
     const MC = invalidGPR
+    const PL = invalidGPR
 
     # Wasm Pinned Registers
     const WI = invalidGPR
@@ -167,9 +171,6 @@ const WasmToJSIPIntReturnPCSlot = constexpr Wasm::WasmToJSIPIntReturnPCSlot
 const IPIntCalleeSaveSpaceAsVirtualRegisters = constexpr Wasm::numberOfIPIntCalleeSaveRegisters + constexpr Wasm::numberOfIPIntInternalRegisters
 const IPIntCalleeSaveSpaceStackAligned = (IPIntCalleeSaveSpaceAsVirtualRegisters * SlotSize + StackAlignment - 1) & ~StackAlignmentMask
 
-# Offset from CFR to local[0]: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize
-const IPIntLocalsBaseOffset = IPIntCalleeSaveSpaceStackAligned + LocalSize
-
 # Must match GPRInfo.h
 if X86_64
     const NumberOfWasmArgumentGPRs = 6
@@ -235,112 +236,32 @@ macro advanceMCByReg(amount)
     addp amount, MC
 end
 
-macro decodeLEBVarUInt(dst, cursor, scratch1, scratch2)
-    loadb [cursor], dst
-    addp 1, cursor
-    bbb dst, 0x80, .done
-    andq 0x7f, dst
-    move 7, scratch1
-    validateOpcodeConfig(scratch2)
-.loop:
-    loadb [cursor], scratch2
-    addp 1, cursor
-    bbb scratch2, 0x80, .lastByte
-    andq 0x7f, scratch2
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-    addq 7, scratch1
-    jmp .loop
-.lastByte:
-    # bit 7 already 0, no AND needed
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-.done:
-end
-
-macro decodeLEBVarSInt32(dst, cursor, scratch1, scratch2)
-    loadb [cursor], dst
-    addp 1, cursor
-    bbb dst, 0x80, .singleByte
-    andq 0x7f, dst
-    move 7, scratch1
-    validateOpcodeConfig(scratch2)
-.loop:
-    loadb [cursor], scratch2
-    addp 1, cursor
-    bbb scratch2, 0x80, .lastByte
-    andq 0x7f, scratch2
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-    addq 7, scratch1
-    jmp .loop
-.lastByte:
-    # bit 7 already 0, no AND needed
-    # Check sign bit (0x40) BEFORE shifting
-    btiz scratch2, 0x40, .noSignExtend
-    lshiftq scratch1, scratch2
-    ori scratch2, dst # Ensure output is always upper zero-cleared.
-    addq 7, scratch1
-    # sign extend if shift < 32
-    bigteq scratch1, 32, .done
-    move -1, scratch2
-    lshiftq scratch1, scratch2
-    ori scratch2, dst # Ensure output is always upper zero-cleared.
-    jmp .done
-.noSignExtend:
-    lshiftq scratch1, scratch2
-    ori scratch2, dst # Ensure output is always upper zero-cleared.
-    jmp .done
-.singleByte:
-    lshifti 25, dst
-    rshifti 25, dst
-.done:
-end
-
-macro decodeLEBVarSInt64(dst, cursor, scratch1, scratch2)
-    loadb [cursor], dst
-    addp 1, cursor
-    bbb dst, 0x80, .singleByte
-    andq 0x7f, dst
+macro decodeLEBVarUInt32(offset, dst, scratch1, scratch2, scratch3, scratch4)
+    # if it's a single byte, fastpath it
+    const tempPC = scratch4
+    leap offset[PC], tempPC
+    loadb [tempPC], dst
+
+    bbb dst, 0x80, .fastpath
+    # otherwise, set up for second iteration
+    # next shift is 7
     move 7, scratch1
+    # take off high bit
+    subi 0x80, dst
     validateOpcodeConfig(scratch2)
 .loop:
-    loadb [cursor], scratch2
-    addp 1, cursor
-    bbb scratch2, 0x80, .lastByte
-    andq 0x7f, scratch2
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-    addq 7, scratch1
-    jmp .loop
-.lastByte:
-    # bit 7 already 0, no AND needed
-    # Check sign bit (0x40) BEFORE shifting
-    btiz scratch2, 0x40, .noSignExtend
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-    addq 7, scratch1
-    # sign extend if shift < 64
-    bigteq scratch1, 64, .done
-    move -1, scratch2
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-    jmp .done
-.noSignExtend:
-    lshiftq scratch1, scratch2
-    orq scratch2, dst
-    jmp .done
-.singleByte:
-    lshiftq 57, dst
-    rshiftq 57, dst
-.done:
-end
-
-macro skipLEB128(cursor, scratch)
-.loop:
-    loadb [cursor], scratch
-    addp 1, cursor
-    bbaeq scratch, 0x80, .loop
+    addp 1, tempPC
+    loadb [tempPC], scratch2
+    # scratch3 = high bit 7
+    # leave scratch2 with low bits 6-0
+    move 0x80, scratch3
+    andi scratch2, scratch3
+    xori scratch3, scratch2
+    lshifti scratch1, scratch2
+    addi 7, scratch1
+    ori scratch2, dst
+    bbneq scratch3, 0, .loop
+.fastpath:
 end
 
 macro checkStackOverflow(callee, scratch)
@@ -386,6 +307,12 @@ macro instructionLabel(instrname)
     _ipint%instrname%:
 end
 
+macro slowPathLabel(instrname)
+    aligned _ipint%instrname%_slow_path_validate alignIPInt
+    _ipint%instrname%_slow_path_validate:
+    _ipint%instrname%_slow_path:
+end
+
 macro unimplementedInstruction(instrname)
     instructionLabel(instrname)
     validateOpcodeConfig(a0)
@@ -458,14 +385,18 @@ macro operationCall(fn)
     move wasmInstance, a0
     push PC, MC
     if ARM64 or ARM64E
-        # Save ws0 with padding for 16-byte alignment (PC+MC=16, ws0+pad=16, total=32)
-        subp MachineRegisterSize * 2, sp
-        storep ws0, [sp]
+        push PL, ws0
+    elsif X86_64
+        push PL
+        # preserve 16 byte alignment.
+        subq MachineRegisterSize, sp
     end
     fn()
     if ARM64 or ARM64E
-        loadp [sp], ws0
-        addp MachineRegisterSize * 2, sp
+        pop ws0, PL
+    elsif X86_64
+        addq MachineRegisterSize, sp
+        pop PL
     end
     pop MC, PC
 end
@@ -477,8 +408,11 @@ macro operationCallMayThrowImpl(fn, sizeOfExtraRegistersPreserved)
     move wasmInstance, a0
     push PC, MC
     if ARM64 or ARM64E
-        # Save ws0 with padding for 16-byte alignment (PC+MC=16, ws0+ws0=16, total=32)
-        push ws0, ws0
+        push PL, ws0
+    elsif X86_64
+        push PL
+        # preserve 16 byte alignment.
+        subq MachineRegisterSize, sp
     end
     fn()
     bpneq r1, (constexpr JSC::IPInt::SlowPathExceptionTag), .continuation
@@ -488,15 +422,15 @@ macro operationCallMayThrowImpl(fn, sizeOfExtraRegistersPreserved)
         move cfr, a1
         move sp, a2
         operationCall(macro() cCall3(_ipint_extern_handle_debugger_trap_if_needed) end)
-        addp sizeOfExtraRegistersPreserved + (4 * MachineRegisterSize), sp
-    elsif X86_64
-        addp sizeOfExtraRegistersPreserved + (2 * MachineRegisterSize), sp
     end
+    addp sizeOfExtraRegistersPreserved + (4 * MachineRegisterSize), sp
     jmp _wasm_throw_from_slow_path_trampoline
 .continuation:
     if ARM64 or ARM64E
-        loadp [sp], ws0
-        addp MachineRegisterSize * 2, sp
+        pop ws0, PL
+    elsif X86_64
+        addq MachineRegisterSize, sp
+        pop PL
     end
     pop MC, PC
 end
@@ -576,7 +510,7 @@ if JIT and not ARMv7
     move PC, a2
     # Add 1 to the index due to WTF::UncheckedKeyHashMap not supporting 0 as a key
     addq 1, a2
-    move sp, a3
+    move PL, a3
     operationCall(macro() cCall4(_ipint_extern_loop_osr) end)
     btpz r1, .recover
     restoreIPIntRegisters()
@@ -1269,9 +1203,9 @@ end
 
 macro handleDebuggerTrapIfNeeded()
     push PC, MC
-    push ws0, ws0   # sp[0]=ws0 (unused), sp[1]=ws0 (IPIntCallee*), sp[2]=PC, sp[3]=MC
+    push PL, ws0    # sp[0]=PL, sp[1]=ws0 (IPIntCallee*), sp[2]=PC, sp[3]=MC
     move cfr, a1
-    move sp, a2     # a2 = pointer to saved [ws0, ws0, PC, MC]
+    move sp, a2     # a2 = pointer to saved [PL, ws0, PC, MC]
     operationCall(macro() cCall3(_ipint_extern_handle_debugger_trap_if_needed) end)
     addp 4 * MachineRegisterSize, sp
 end
@@ -1320,7 +1254,7 @@ end)
 op(wasm_throw_from_fault_handler_trampoline_reg_instance, macro ()
     # enableWasmDebugger disables BBQ/OMG, so this trampoline is only
     # reached from IPInt when the debugger is active. The signal handler only patches
-    # the machine PC, so IPInt registers (PC, MC, ws0, cfr) are still live.
+    # the machine PC, so IPInt registers (PC, MC, PL, ws0, cfr) are still live.
     # Exception type comes from instance->m_exception; copy to CFR slot for handle_debugger_trap_if_needed.
     loadi JSWebAssemblyInstance::m_exception[wasmInstance], t0
     storei t0, ArgumentCountIncludingThis + PayloadOffset[cfr]
@@ -1375,6 +1309,7 @@ end
     operationCall(macro() cCall2(_ipint_extern_prepare_function_body) end)
     move r0, ws0
 
+    move sp, PL
     loadp Wasm::IPIntCallee::m_bytecode[ws0], PC
     loadp Wasm::IPIntCallee::m_metadata + VectorBufferOffset[ws0], MC
 
@@ -1423,9 +1358,18 @@ end
     loadp Wasm::IPIntCallee::m_metadata + VectorBufferOffset[ws0], t1
     addp t1, MC
 
-    # Recompute SP from catch metadata. [MC] contains localSizeToAlloc + stackValues.
-    # Add rethrowSlots to get the total frame size below callee-save space.
-    loadi Wasm::IPIntCallee::m_numRethrowSlotsToAlloc[ws0], t1
+    # Recompute PL
+    if ARM64 or ARM64E
+        loadpairi Wasm::IPIntCallee::m_localSizeToAlloc[ws0], t0, t1
+    else
+        loadi Wasm::IPIntCallee::m_numRethrowSlotsToAlloc[ws0], t1
+        loadi Wasm::IPIntCallee::m_localSizeToAlloc[ws0], t0
+    end
+    addp t1, t0
+    mulp LocalSize, t0
+    addp IPIntCalleeSaveSpaceStackAligned, t0
+    subp cfr, t0, PL
+
     loadi [MC], t0
     addp t1, t0
     mulp StackValueSize, t0
@@ -1448,7 +1392,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64)
 
     move cfr, a1
     move sp, a2
-    operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end)
+    move PL, a3
+    operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end)
 
     ipintReloadMemory()
     advanceMC(4)
@@ -1464,7 +1409,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64)
 
     move cfr, a1
     move 0, a2
-    operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end)
+    move PL, a3
+    operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end)
 
     ipintReloadMemory()
     advanceMC(4)
@@ -1482,7 +1428,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7)
 
     move cfr, a1
     move sp, a2
-    operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end)
+    move PL, a3
+    operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end)
 
     ipintReloadMemory()
     advanceMC(4)
@@ -1500,7 +1447,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7)
 
     move cfr, a1
     move sp, a2
-    operationCall(macro() cCall3(_ipint_extern_retrieve_clear_and_push_exception_and_arguments) end)
+    move PL, a3
+    operationCall(macro() cCall4(_ipint_extern_retrieve_clear_and_push_exception_and_arguments) end)
 
     ipintReloadMemory()
     advanceMC(4)
@@ -1518,7 +1466,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7)
 
     move cfr, a1
     move 0, a2
-    operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end)
+    move PL, a3
+    operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end)
 
     ipintReloadMemory()
     advanceMC(4)
@@ -1536,7 +1485,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7)
 
     move cfr, a1
     move sp, a2
-    operationCall(macro() cCall3(_ipint_extern_retrieve_clear_and_push_exception) end)
+    move PL, a3
+    operationCall(macro() cCall4(_ipint_extern_retrieve_clear_and_push_exception) end)
 
     ipintReloadMemory()
     advanceMC(4)
diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp b/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp
index e513fe49d189..a14c01138994 100644
--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp
+++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp
@@ -63,6 +63,7 @@ do { \
 #define VALIDATE_IPINT_SIMD_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_simd_v128_load_mem_validate, alignIPInt, opcode, name)
 #define VALIDATE_IPINT_ATOMIC_OPCODE(opcode, name) VALIDATE_IPINT_ATOMIC_OPCODE_FROM_BASE(ipint_memory_atomic_notify_atomic_validate, alignAtomicIPInt, opcode, name)
 #define VALIDATE_IPINT_ARGUMINT_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_argumINT_a0_validate, alignArgumInt, opcode, name)
+#define VALIDATE_IPINT_SLOW_PATH(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_local_get_slow_path_validate, alignIPInt, opcode, name)
 #define VALIDATE_IPINT_MINT_CALL_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_mint_a0_validate, alignMInt, opcode, name)
 #define VALIDATE_IPINT_MINT_RETURN_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_mint_r0_validate, alignMInt, opcode, name)
 #define VALIDATE_IPINT_UINT_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_uint_r0_validate, alignUInt, opcode, name)
@@ -91,6 +92,7 @@ void initialize()
     FOR_EACH_IPINT_ATOMIC_OPCODE(VALIDATE_IPINT_ATOMIC_OPCODE);
 
     FOR_EACH_IPINT_ARGUMINT_OPCODE(VALIDATE_IPINT_ARGUMINT_OPCODE);
+    FOR_EACH_IPINT_SLOW_PATH(VALIDATE_IPINT_SLOW_PATH);
     FOR_EACH_IPINT_MINT_CALL_OPCODE(VALIDATE_IPINT_MINT_CALL_OPCODE);
     FOR_EACH_IPINT_MINT_RETURN_OPCODE(VALIDATE_IPINT_MINT_RETURN_OPCODE);
     FOR_EACH_IPINT_UINT_OPCODE(VALIDATE_IPINT_UINT_OPCODE);
diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter.h b/Source/JavaScriptCore/llint/InPlaceInterpreter.h
index 7e3e2793710b..6d6d95024df9 100644
--- a/Source/JavaScriptCore/llint/InPlaceInterpreter.h
+++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.h
@@ -711,6 +711,11 @@ extern "C" void SYSV_ABI ipint_entry();
     m(0x11, argumINT_stack_vector) \
     m(0x12, argumINT_end) \
 
+#define FOR_EACH_IPINT_SLOW_PATH(m) \
+    m(0x00, local_get_slow_path) \
+    m(0x01, local_set_slow_path) \
+    m(0x02, local_tee_slow_path) \
+
 #define FOR_EACH_IPINT_MINT_CALL_OPCODE(m) \
     m(0x00, mint_a0) \
     m(0x01, mint_a1) \
@@ -790,6 +795,7 @@ FOR_EACH_IPINT_CONVERSION_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION);
 FOR_EACH_IPINT_SIMD_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION);
 FOR_EACH_IPINT_ATOMIC_OPCODE(IPINT_ATOMIC_VALIDATE_DEFINE_FUNCTION);
 FOR_EACH_IPINT_ARGUMINT_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION);
+FOR_EACH_IPINT_SLOW_PATH(IPINT_VALIDATE_DEFINE_FUNCTION);
 FOR_EACH_IPINT_MINT_CALL_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION);
 FOR_EACH_IPINT_MINT_RETURN_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION);
 FOR_EACH_IPINT_UINT_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION);
diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm b/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm
index 0d2a20cd338e..653b6a343537 100644
--- a/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm
+++ b/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm
@@ -180,21 +180,15 @@ macro ipintEntry()
     end
     mulp LocalSize, argumINTEnd
     mulp LocalSize, argumINTTmp
-    # Allocate locals first (closest to CFR)
+    subp argumINTEnd, sp
+    move sp, argumINTEnd
     subp argumINTTmp, sp
     move sp, argumINTDsp
-    # Allocate rethrow slots below locals
-    subp argumINTEnd, sp
-    # argumINTEnd = boundary for zero-init loop. Handlers write [argumINTDst] then subp,
-    # so after localSizeToAlloc handlers, argumINTDst = argumINTDsp - LocalSize.
-    move argumINTDsp, argumINTEnd
-    subp LocalSize, argumINTEnd
     loadp Wasm::IPIntCallee::m_argumINTBytecode + VectorBufferOffset[ws0], MC
 
     push argumINTTmp, argumINTDst, argumINTSrc, argumINTEnd
 
-    # Start writing at local[0] = CFR - IPIntLocalsBaseOffset, going downward
-    leap -IPIntLocalsBaseOffset[cfr], argumINTDst
+    move argumINTDsp, argumINTDst
     leap FirstArgumentOffset[cfr], argumINTSrc
 
     validateOpcodeConfig(argumINTTmp)
@@ -216,7 +210,7 @@ end
 end
 
 macro argumINTInitializeDefaultLocals()
-    # zero out remaining locals (argumINTDst moves downward toward argumINTEnd)
+    # zero out remaining locals
     bpeq argumINTDst, argumINTEnd, .ipint_entry_finish_zero
     loadb [MC], argumINTTmp
     addp 1, MC
@@ -229,7 +223,7 @@ elsif X86_64
     storep argumINTTmp, [argumINTDst]
     storep 0, 8[argumINTDst]
 end
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
 end
 
 macro argumINTFinish()
@@ -241,7 +235,7 @@ end
     #############################
 
 ipintOp(_unreachable, macro()
-    jmp _ipint_throw_Unreachable
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(Unreachable)
 end)
 
 ipintOp(_nop, macro()
@@ -362,14 +356,15 @@ ipintOp(_rethrow, macro()
     copyCalleeSavesToEntryFrameCalleeSavesBuffer(t0)
 
     move cfr, a1
-    loadi IPInt::RethrowMetadata::tryDepth[MC], a2
-    operationCall(macro() cCall3(_ipint_extern_rethrow_exception) end)
+    move PL, a2
+    loadi IPInt::RethrowMetadata::tryDepth[MC], a3
+    operationCall(macro() cCall4(_ipint_extern_rethrow_exception) end)
     jumpToException()
 end)
 
 ipintOp(_throw_ref, macro()
     popQuad(a2)
-    bieq a2, ValueNull, _ipint_throw_NullExnrefReference
+    bieq a2, ValueNull, .throw_null_ref
 
     saveCallSiteIndex()
 
@@ -380,6 +375,9 @@ ipintOp(_throw_ref, macro()
     move cfr, a1
     operationCall(macro() cCall3(_ipint_extern_throw_ref) end)
     jumpToException()
+
+.throw_null_ref:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullExnrefReference)
 end)
 
 macro uintDispatch()
@@ -745,10 +743,9 @@ end)
     ###################################
 
 macro localGetPostDecode()
-    # Index into locals: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize
+    # Index into locals
     mulq LocalSize, t0
-    subp cfr, t0, t0
-    loadv -IPIntLocalsBaseOffset[t0], v0
+    loadv [PL, t0], v0
     # Push to stack
     pushVec(v0)
     nextIPIntInstruction()
@@ -758,17 +755,16 @@ ipintOp(_local_get, macro()
     # local.get
     loadb 1[PC], t0
     advancePC(2)
-    bbaeq t0, 0x80, .ipint_local_get_slow_path
+    bbaeq t0, 128, _ipint_local_get_slow_path
     localGetPostDecode()
 end)
 
 macro localSetPostDecode()
     # Pop from stack
     popVec(v0)
-    # Store to locals: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize
+    # Store to locals
     mulq LocalSize, t0
-    subp cfr, t0, t0
-    storev v0, -IPIntLocalsBaseOffset[t0]
+    storev v0, [PL, t0]
     nextIPIntInstruction()
 end
 
@@ -776,17 +772,16 @@ ipintOp(_local_set, macro()
     # local.set
     loadb 1[PC], t0
     advancePC(2)
-    bbaeq t0, 0x80, .ipint_local_set_slow_path
+    bbaeq t0, 128, _ipint_local_set_slow_path
     localSetPostDecode()
 end)
 
 macro localTeePostDecode()
     # Load from stack
     loadv [sp], v0
-    # Store to locals: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize
+    # Store to locals
     mulq LocalSize, t0
-    subp cfr, t0, t0
-    storev v0, -IPIntLocalsBaseOffset[t0]
+    storev v0, [PL, t0]
     nextIPIntInstruction()
 end
 
@@ -794,7 +789,7 @@ ipintOp(_local_tee, macro()
     # local.tee
     loadb 1[PC], t0
     advancePC(2)
-    bbaeq t0, 0x80, .ipint_local_tee_slow_path
+    bbaeq t0, 128, _ipint_local_tee_slow_path
     localTeePostDecode()
 end)
 
@@ -867,40 +862,44 @@ end)
 
 ipintOp(_table_get, macro()
     # Load pre-computed index from metadata
-    loadi IPInt::TableAccessMetadata::index[MC], a1
+    loadi IPInt::Const32Metadata::value[MC], a1
     popInt32(a2)
 
     operationCallMayThrow(macro() cCall3(_ipint_extern_table_get) end)
 
     pushQuad(r0)
 
-    loadb IPInt::TableAccessMetadata::instructionLength[MC], t0
+    loadb IPInt::Const32Metadata::instructionLength[MC], t0
 
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::TableAccessMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_table_set, macro()
     # Load pre-computed index from metadata
-    loadi IPInt::TableAccessMetadata::index[MC], a1
+    loadi IPInt::Const32Metadata::value[MC], a1
     popQuad(a3)
     popInt32(a2)
     operationCallMayThrow(macro() cCall4(_ipint_extern_table_set) end)
 
-    loadb IPInt::TableAccessMetadata::instructionLength[MC], t0
+    loadb IPInt::Const32Metadata::instructionLength[MC], t0
 
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::TableAccessMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 reservedOpcode(0x27)
 
-macro popMemoryIndex(reg)
-    popInt64(reg) # Note that popInt32 and popInt64 are same implementation.
-    btbnz JSWebAssemblyInstance::m_cachedIsMemory64[wasmInstance], .done
-    zxi2q reg, reg
+macro popMemoryIndex(reg, tmp)
+    loadb JSWebAssemblyInstance::m_cachedIsMemory64[wasmInstance], tmp
+    btiz tmp, .memory32
+    popInt64(reg)
+    jmp .done
+.memory32:
+    popInt32(reg)
+    ori 0, reg
 .done:
 end
 
@@ -910,52 +909,33 @@ macro baddpc(src, dst, label)
     bpb dst, src, label # unsigned overflow check
 end
 
+macro memoryOpAdvanceMCAndMakePointer(instrLenReg, wasmAddrReg, size, scratch, scratch2)
+    # overwrites wasmAddrReg with computed pointer
 
-macro loadStoreMakePointerFast(alignAccess, offsetAccess, wasmAddrReg, size, scratch, scratch2, slowLabel)
-    # overwrites wasmAddrReg with computed pointer.
-    # Fast path: alignment byte < 0x40 (single-byte, no multi-memory),
-    # and offset byte < 0x80 (single-byte). Memory index is 0.
-    # alignAccess/offsetAccess are memory access patterns for the memarg bytes.
-    # For non-SIMD: pass (1[PC], 2[PC]). For SIMD: pass ([t4], 1[t4]).
-
-    # Check alignment byte: if >= 0x40, it's multi-memory or unusual alignment
-    loadb alignAccess, scratch2          # alignment/flags byte
-    bbaeq scratch2, 0x40, slowLabel
-    loadb offsetAccess, scratch          # offset byte
-    bbaeq scratch, 0x80, slowLabel
-
-    # Both single-byte, memory index = 0. scratch = offset value.
-    baddpc(scratch, wasmAddrReg, _ipint_throw_OutOfBoundsMemoryAccess)
-    move size - 1, scratch2
-    baddpc(wasmAddrReg, scratch2, _ipint_throw_OutOfBoundsMemoryAccess)
-
-    bpaeq scratch2, boundsCheckingSize, _ipint_throw_OutOfBoundsMemoryAccess # scratch2 contains wasm address + size - 1
-    addp memoryBase, wasmAddrReg
-end
+    loadb JSWebAssemblyInstance::m_cachedIsMemory64[wasmInstance], scratch
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
 
-# Note: wasmAddrReg (t0) is set by the handler's popMemoryIndex before branching here.
-# For store ops, the data register (t3 for int, ft0 for float) is also set by the handler.
-macro loadStoreMakePointerSlow(cursor, wasmAddrReg, size, scratch, scratch2, decodeScratch1, decodeScratch2)
-    # 1. Decode flags/alignment, check multi-memory bit
-    decodeLEBVarUInt(scratch, cursor, decodeScratch1, decodeScratch2)
+    btiz scratch, .memory32
+    loadq memoryIndexSize + IPInt::Const64Metadata::value[MC], instrLenReg # reuse instrLenReg to store offset
+    baddpc(instrLenReg, wasmAddrReg, .outOfBounds) # wasmAddrReg contains address + offset
+    loadb memoryIndexSize + IPInt::Const64Metadata::instructionLength[MC], instrLenReg
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], scratch # scratch contains memory index now
+    advanceMC(memoryIndexSize + sizeof IPInt::Const64Metadata)
+    jmp .commonMemoryCalculations
 
-    # 2. If multi-memory, decode memory index; otherwise 0
-    btiz scratch, 0x40, .memoryIndex0
-    decodeLEBVarUInt(scratch, cursor, decodeScratch1, decodeScratch2)
-    jmp .decodeOffset
-.memoryIndex0:
-    move 0, scratch
+.memory32:
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], instrLenReg # reuse instrLenReg to store offset
+    baddpc(instrLenReg, wasmAddrReg, .outOfBounds) # wasmAddrReg contains address + offset
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], instrLenReg
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], scratch # scratch contains memory index now
+    advanceMC(memoryIndexSize + sizeof IPInt::Const32Metadata)
 
-.decodeOffset:
-    # 3. Decode offset
-    decodeLEBVarUInt(scratch2, cursor, decodeScratch1, decodeScratch2)
-
-    baddpc(scratch2, wasmAddrReg, _ipint_throw_OutOfBoundsMemoryAccess)
+.commonMemoryCalculations:
     move size - 1, scratch2
-    baddpc(wasmAddrReg, scratch2, _ipint_throw_OutOfBoundsMemoryAccess)
+    baddpc(wasmAddrReg, scratch2, .outOfBounds)
 
     btinz scratch, .memoryIsNotZero
-    bpaeq scratch2, boundsCheckingSize, _ipint_throw_OutOfBoundsMemoryAccess # scratch2 contains wasm address + size - 1
+    bpaeq scratch2, boundsCheckingSize, .outOfBounds # scratch2 contains wasm address + size - 1
     addp memoryBase, wasmAddrReg
     jmp .done
 
@@ -963,183 +943,199 @@ macro loadStoreMakePointerSlow(cursor, wasmAddrReg, size, scratch, scratch2, dec
     mulp constexpr (sizeof(JSWebAssemblyInstance::WasmMemoryBaseAndSize)), scratch
     # FIXME: it's probably worth trying to use a loadpair here, but that requires a separate x86 codepath
     loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0) + sizeof(void*))) [wasmInstance, scratch], scratch2 # bounds checking size
-    subp size - 1, scratch2 # wasmAddrReg + (size-1) >= scratch2 is equivalent to wasmAddrReg >= scratch2 - (size-1)
-    bpaeq wasmAddrReg, scratch2, _ipint_throw_OutOfBoundsMemoryAccess
+    bpaeq wasmAddrReg, scratch2, .outOfBounds
     loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0))) [wasmInstance, scratch], scratch2 # memory base
     addp scratch2, wasmAddrReg
+    jmp .done
+
+.outOfBounds:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess)
 .done:
 end
 
 ipintOp(_i32_load_mem, macro()
     # i32.load
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i32_load_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
     # load memory location
     loadi [t0], t1
     pushInt32(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load_mem, macro()
     # i32.load
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_i64_load_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
     # load memory location
     loadq [t0], t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_f32_load_mem, macro()
     # f32.load
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_f32_load_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
     # load memory location
     loadf [t0], ft0
     pushFloat32(ft0)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_f64_load_mem, macro()
     # f64.load
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_f64_load_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
     # load memory location
     loadd [t0], ft0
     pushFloat64(ft0)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i32_load8s_mem, macro()
     # i32.load8_s
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i32_load8s_mem_slow_path)
-    loadbsi [t0], t1
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    loadb [t0], t1
+    sxb2i t1, t1
     pushInt32(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i32_load8u_mem, macro()
     # i32.load8_u
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i32_load8u_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    # load memory location
     loadb [t0], t1
     pushInt32(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i32_load16s_mem, macro()
     # i32.load16_s
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i32_load16s_mem_slow_path)
-    loadhsi [t0], t1
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    # load memory location
+    loadh [t0], t1
+    sxh2i t1, t1
     pushInt32(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i32_load16u_mem, macro()
     # i32.load16_u
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i32_load16u_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    # load memory location
     loadh [t0], t1
     pushInt32(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load8s_mem, macro()
     # i64.load8_s
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i64_load8s_mem_slow_path)
-    loadbsq [t0], t1
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    # load memory location
+    loadb [t0], t1
+    sxb2q t1, t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load8u_mem, macro()
     # i64.load8_u
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i64_load8u_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    # load memory location
     loadb [t0], t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load16s_mem, macro()
     # i64.load16_s
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i64_load16s_mem_slow_path)
-    loadhsq [t0], t1
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    # load memory location
+    loadh [t0], t1
+    sxh2q t1, t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load16u_mem, macro()
     # i64.load16_u
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i64_load16u_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    # load memory location
     loadh [t0], t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load32s_mem, macro()
     # i64.load32_s
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i64_load32s_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    # load memory location
     loadi [t0], t1
     sxi2q t1, t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_load32u_mem, macro()
     # i64.load8_s
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i64_load32u_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    # load memory location
     loadi [t0], t1
     pushInt64(t1)
 
-    advancePC(3)
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1148,10 +1144,12 @@ ipintOp(_i32_store_mem, macro()
     # pop data
     popInt32(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i32_store_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    # load memory location
     storei t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1160,10 +1158,12 @@ ipintOp(_i64_store_mem, macro()
     # pop data
     popInt64(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_i64_store_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    # load memory location
     storeq t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1172,10 +1172,12 @@ ipintOp(_f32_store_mem, macro()
     # pop data
     popFloat32(ft0)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_f32_store_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    # load memory location
     storef ft0, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1184,10 +1186,12 @@ ipintOp(_f64_store_mem, macro()
     # pop data
     popFloat64(ft0)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_f64_store_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    # load memory location
     stored ft0, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1196,10 +1200,12 @@ ipintOp(_i32_store8_mem, macro()
     # pop data
     popInt32(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i32_store8_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    # load memory location
     storeb t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1208,10 +1214,12 @@ ipintOp(_i32_store16_mem, macro()
     # pop data
     popInt32(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i32_store16_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    # load memory location
     storeh t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1220,10 +1228,12 @@ ipintOp(_i64_store8_mem, macro()
     # pop data
     popInt64(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i64_store8_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    # load memory location
     storeb t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1232,10 +1242,12 @@ ipintOp(_i64_store16_mem, macro()
     # pop data
     popInt64(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i64_store16_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    # load memory location
     storeh t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
@@ -1244,16 +1256,18 @@ ipintOp(_i64_store32_mem, macro()
     # pop data
     popInt64(t3)
     # pop index
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i64_store32_mem_slow_path)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    # load memory location
     storei t3, [t0]
-    advancePC(3)
+
+    advancePCByReg(t4)
     nextIPIntInstruction()
 end)
 
 ipintOp(_memory_size, macro()
-    loadb IPInt::MemorySizeMetadata::memoryIndex[MC], t0
-    advanceMC(constexpr (sizeof(IPInt::MemorySizeMetadata)))
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata)))
     btinz t0, .callMemorySize
     loadp constexpr (JSWebAssemblyInstance::offsetOfCachedMemory0Size())[wasmInstance], t0 # size of memory 0
     jmp .doneLoadingMemorySize
@@ -1271,8 +1285,8 @@ end)
 
 ipintOp(_memory_grow, macro()
     popInt32(a1)
-    loadb IPInt::MemoryGrowMetadata::memoryIndex[MC], a2
-    advanceMC(constexpr (sizeof(IPInt::MemoryGrowMetadata)))
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a2
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata)))
     operationCall(macro() cCall3(_ipint_extern_memory_grow) end)
     pushInt32(r0)
     ipintReloadMemory()
@@ -1285,26 +1299,38 @@ end)
     ################################
 
 ipintOp(_i32_const, macro()
-    # i32.const - decode signed LEB128 from bytecode
+    # i32.const
+    loadb IPInt::InstructionLengthMetadata::length[MC], t1
+    bigteq t1, 2, .ipint_i32_const_slowpath
     loadb 1[PC], t0
-    bbaeq t0, 0x80, .ipint_i32_const_slow_path
-    # single byte: sign extend from 7 bits
-    lshifti 25, t0
-    rshifti 25, t0
+    lshiftq 7, t1
+    orq t1, t0
+    sxb2i t0, t0
     pushInt32(t0)
     advancePC(2)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
+    nextIPIntInstruction()
+.ipint_i32_const_slowpath:
+    # Load pre-computed value from metadata
+    loadi IPInt::Const32Metadata::value[MC], t0
+    # Push to stack
+    pushInt32(t0)
+
+    advancePCByReg(t1)
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_i64_const, macro()
-    # i64.const - decode signed LEB128 from bytecode
-    loadb 1[PC], t0
-    bbaeq t0, 0x80, .ipint_i64_const_slow_path
-    # single byte: sign extend from 7 bits
-    lshiftq 57, t0
-    rshiftq 57, t0
+    # i64.const
+    # Load pre-computed value from metadata
+    loadq IPInt::Const64Metadata::value[MC], t0
+    # Push to stack
     pushInt64(t0)
-    advancePC(2)
+    loadb IPInt::Const64Metadata::instructionLength[MC], t0
+
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::Const64Metadata)))
     nextIPIntInstruction()
 end)
 
@@ -1753,10 +1779,10 @@ ipintOp(_i32_div_s, macro()
     # i32.div_s
     popInt32(t1)
     popInt32(t0)
-    btiz t1, _ipint_throw_DivisionByZero
+    btiz t1, .ipint_i32_div_s_throwDivisionByZero
 
     bineq t1, -1, .ipint_i32_div_s_safe
-    bieq t0, constexpr INT32_MIN, _ipint_throw_IntegerOverflow
+    bieq t0, constexpr INT32_MIN, .ipint_i32_div_s_throwIntegerOverflow
 
 .ipint_i32_div_s_safe:
     if X86_64
@@ -1772,13 +1798,19 @@ ipintOp(_i32_div_s, macro()
     pushInt32(t0)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i32_div_s_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
+
+.ipint_i32_div_s_throwIntegerOverflow:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(IntegerOverflow)
 end)
 
 ipintOp(_i32_div_u, macro()
     # i32.div_u
     popInt32(t1)
     popInt32(t0)
-    btiz t1, _ipint_throw_DivisionByZero
+    btiz t1, .ipint_i32_div_u_throwDivisionByZero
 
     if X86_64
         xori t2, t2
@@ -1791,6 +1823,9 @@ ipintOp(_i32_div_u, macro()
     pushInt32(t0)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i32_div_u_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
 end)
 
 ipintOp(_i32_rem_s, macro()
@@ -1798,7 +1833,7 @@ ipintOp(_i32_rem_s, macro()
     popInt32(t1)
     popInt32(t0)
 
-    btiz t1, _ipint_throw_DivisionByZero
+    btiz t1, .ipint_i32_rem_s_throwDivisionByZero
 
     bineq t1, -1, .ipint_i32_rem_s_safe
     bineq t0, constexpr INT32_MIN, .ipint_i32_rem_s_safe
@@ -1826,13 +1861,16 @@ ipintOp(_i32_rem_s, macro()
     pushInt32(t2)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i32_rem_s_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
 end)
 
 ipintOp(_i32_rem_u, macro()
     # i32.rem_u
     popInt32(t1)
     popInt32(t0)
-    btiz t1, _ipint_throw_DivisionByZero
+    btiz t1, .ipint_i32_rem_u_throwDivisionByZero
 
     if X86_64
         xori t2, t2
@@ -1849,6 +1887,9 @@ ipintOp(_i32_rem_u, macro()
     pushInt32(t2)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i32_rem_u_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
 end)
 
 ipintOp(_i32_and, macro()
@@ -2010,10 +2051,10 @@ ipintOp(_i64_div_s, macro()
     # i64.div_s
     popInt64(t1)
     popInt64(t0)
-    btqz t1, _ipint_throw_DivisionByZero
+    btqz t1, .ipint_i64_div_s_throwDivisionByZero
 
     bqneq t1, -1, .ipint_i64_div_s_safe
-    bqeq t0, constexpr INT64_MIN, _ipint_throw_IntegerOverflow
+    bqeq t0, constexpr INT64_MIN, .ipint_i64_div_s_throwIntegerOverflow
 
 .ipint_i64_div_s_safe:
     if X86_64
@@ -2029,13 +2070,19 @@ ipintOp(_i64_div_s, macro()
     pushInt64(t0)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i64_div_s_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
+
+.ipint_i64_div_s_throwIntegerOverflow:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(IntegerOverflow)
 end)
 
 ipintOp(_i64_div_u, macro()
     # i64.div_u
     popInt64(t1)
     popInt64(t0)
-    btqz t1, _ipint_throw_DivisionByZero
+    btqz t1, .ipint_i64_div_u_throwDivisionByZero
 
     if X86_64
         xorq t2, t2
@@ -2048,6 +2095,9 @@ ipintOp(_i64_div_u, macro()
     pushInt64(t0)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i64_div_u_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
 end)
 
 ipintOp(_i64_rem_s, macro()
@@ -2055,7 +2105,7 @@ ipintOp(_i64_rem_s, macro()
     popInt64(t1)
     popInt64(t0)
 
-    btqz t1, _ipint_throw_DivisionByZero
+    btqz t1, .ipint_i64_rem_s_throwDivisionByZero
 
     bqneq t1, -1, .ipint_i64_rem_s_safe
     bqneq t0, constexpr INT64_MIN, .ipint_i64_rem_s_safe
@@ -2083,13 +2133,16 @@ ipintOp(_i64_rem_s, macro()
     pushInt64(t2)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i64_rem_s_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
 end)
 
 ipintOp(_i64_rem_u, macro()
     # i64.rem_u
     popInt64(t1)
     popInt64(t0)
-    btqz t1, _ipint_throw_DivisionByZero
+    btqz t1, .ipint_i64_rem_u_throwDivisionByZero
 
     if X86_64
         xorq t2, t2
@@ -2106,6 +2159,9 @@ ipintOp(_i64_rem_u, macro()
     pushInt64(t2)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_i64_rem_u_throwDivisionByZero:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
 end)
 
 ipintOp(_i64_and, macro()
@@ -2621,67 +2677,76 @@ ipintOp(_i32_trunc_f32_s, macro()
     popFloat32(ft0)
     move 0xcf000000, t0 # INT32_MIN (Note that INT32_MIN - 1.0 in float is the same as INT32_MIN in float).
     fi2f t0, ft1
-    bfltun ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfltun ft0, ft1, .ipint_trunc_i32_f32_s_outOfBoundsTrunc
 
     move 0x4f000000, t0 # -INT32_MIN
     fi2f t0, ft1
-    bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfgtequn ft0, ft1, .ipint_trunc_i32_f32_s_outOfBoundsTrunc
 
     truncatef2is ft0, t0
     pushInt32(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_trunc_i32_f32_s_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i32_trunc_f32_u, macro()
     popFloat32(ft0)
     move 0xbf800000, t0 # -1.0
     fi2f t0, ft1
-    bfltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfltequn ft0, ft1, .ipint_trunc_i32_f32_u_outOfBoundsTrunc
 
     move 0x4f800000, t0 # INT32_MIN * -2.0
     fi2f t0, ft1
-    bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfgtequn ft0, ft1, .ipint_trunc_i32_f32_u_outOfBoundsTrunc
 
     truncatef2i ft0, t0
     pushInt32(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_trunc_i32_f32_u_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i32_trunc_f64_s, macro()
     popFloat64(ft0)
     move 0xc1e0000000200000, t0 # INT32_MIN - 1.0
     fq2d t0, ft1
-    bdltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdltequn ft0, ft1, .ipint_trunc_i32_f64_s_outOfBoundsTrunc
 
     move 0x41e0000000000000, t0 # -INT32_MIN
     fq2d t0, ft1
-    bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdgtequn ft0, ft1, .ipint_trunc_i32_f64_s_outOfBoundsTrunc
 
     truncated2is ft0, t0
     pushInt32(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_trunc_i32_f64_s_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i32_trunc_f64_u, macro()
     popFloat64(ft0)
     move 0xbff0000000000000, t0 # -1.0
     fq2d t0, ft1
-    bdltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdltequn ft0, ft1, .ipint_trunc_i32_f64_u_outOfBoundsTrunc
 
     move 0x41f0000000000000, t0 # INT32_MIN * -2.0
     fq2d t0, ft1
-    bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdgtequn ft0, ft1, .ipint_trunc_i32_f64_u_outOfBoundsTrunc
 
     truncated2i ft0, t0
     pushInt32(t0)
     advancePC(1)
     nextIPIntInstruction()
+
+.ipint_trunc_i32_f64_u_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i64_extend_i32_s, macro()
@@ -2706,68 +2771,76 @@ ipintOp(_i64_trunc_f32_s, macro()
     popFloat32(ft0)
     move 0xdf000000, t0 # INT64_MIN
     fi2f t0, ft1
-    bfltun ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfltun ft0, ft1, .ipint_trunc_i64_f32_s_outOfBoundsTrunc
 
     move 0x5f000000, t0 # -INT64_MIN
     fi2f t0, ft1
-    bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfgtequn ft0, ft1, .ipint_trunc_i64_f32_s_outOfBoundsTrunc
 
     truncatef2qs ft0, t0
     pushInt64(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_trunc_i64_f32_s_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i64_trunc_f32_u, macro()
     popFloat32(ft0)
     move 0xbf800000, t0 # -1.0
     fi2f t0, ft1
-    bfltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfltequn ft0, ft1, .ipint_i64_f32_u_outOfBoundsTrunc
 
     move 0x5f800000, t0 # INT64_MIN * -2.0
     fi2f t0, ft1
-    bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bfgtequn ft0, ft1, .ipint_i64_f32_u_outOfBoundsTrunc
 
     truncatef2q ft0, t0
     pushInt64(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_i64_f32_u_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i64_trunc_f64_s, macro()
     popFloat64(ft0)
     move 0xc3e0000000000000, t0 # INT64_MIN
     fq2d t0, ft1
-    bdltun ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdltun ft0, ft1, .ipint_i64_f64_s_outOfBoundsTrunc
 
     move 0x43e0000000000000, t0 # -INT64_MIN
     fq2d t0, ft1
-    bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdgtequn ft0, ft1, .ipint_i64_f64_s_outOfBoundsTrunc
 
     truncated2qs ft0, t0
     pushInt64(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_i64_f64_s_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_i64_trunc_f64_u, macro()
     popFloat64(ft0)
     move 0xbff0000000000000, t0 # -1.0
     fq2d t0, ft1
-    bdltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdltequn ft0, ft1, .ipint_i64_f64_u_outOfBoundsTrunc
 
     move 0x43f0000000000000, t0 # INT64_MIN * -2.0
     fq2d t0, ft1
-    bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc
+    bdgtequn ft0, ft1, .ipint_i64_f64_u_outOfBoundsTrunc
 
     truncated2q ft0, t0
     pushInt64(t0)
     advancePC(1)
     nextIPIntInstruction()
 
+.ipint_i64_f64_u_outOfBoundsTrunc:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
 end)
 
 ipintOp(_f32_convert_i32_s, macro()
@@ -2952,11 +3025,11 @@ reservedOpcode(0xcf)
     #####################
 
 ipintOp(_ref_null_t, macro()
-    # Push null value, skip heap type LEB128 in bytecode
-    move ValueNull, t0
+    loadi IPInt::Const32Metadata::value[MC], t0
     pushQuad(t0)
-    leap 1[PC], PC
-    skipLEB128(PC, t0)
+    loadb IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePC(t0)
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
@@ -2969,12 +3042,12 @@ ipintOp(_ref_is_null, macro()
 end)
 
 ipintOp(_ref_func, macro()
-    loadi IPInt::RefFuncMetadata::index[MC], a1
+    loadi IPInt::Const32Metadata::value[MC], a1
     operationCall(macro() cCall2(_ipint_extern_ref_func) end)
     pushQuad(r0)
-    loadb IPInt::RefFuncMetadata::instructionLength[MC], t0
+    loadb IPInt::Const32Metadata::instructionLength[MC], t0
     advancePC(t0)
-    advanceMC(constexpr (sizeof(IPInt::RefFuncMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
@@ -2989,9 +3062,11 @@ end)
 
 ipintOp(_ref_as_non_null, macro()
     loadq [sp], t0
-    bqeq t0, ValueNull, _ipint_throw_NullRefAsNonNull
+    bqeq t0, ValueNull, .ref_as_non_null_nullRef
     advancePC(1)
     nextIPIntInstruction()
+.ref_as_non_null_nullRef:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullRefAsNonNull)
 end)
 
 ipintOp(_br_on_null, macro()
@@ -3061,17 +3136,16 @@ reservedOpcode(0xfa)
 # the changes should be matched in IPINT_INSTRUCTIONS in Tools/lldb/debug_ipint.py
 
 ipintOp(_gc_prefix, macro()
-    leap 1[PC], t4
-    decodeLEBVarUInt(t0, t4, t1, t2)
+    decodeLEBVarUInt32(1, t0, t1, t2, t3, t4)
     # Security guarantee: always less than 30 (0x00 -> 0x1e)
     biaeq t0, 0x1f, .ipint_gc_nonexistent
     leap _os_script_config_storage, t1
     loadp JSC::LLInt::OpcodeConfig::ipint_gc_dispatch_base[t1], t1
     if ARM64 or ARM64E
-        addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0
+        addlshiftp t1, t0, 8, t0
         jmp t0
     elsif X86_64
-        lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0
+        lshiftq 8, t0
         addq t1, t0
         jmp t0
     end
@@ -3081,17 +3155,16 @@ ipintOp(_gc_prefix, macro()
 end)
 
 ipintOp(_conversion_prefix, macro()
-    leap 1[PC], t4
-    decodeLEBVarUInt(t0, t4, t1, t2)
-    # Security guarantee: always less than 23 (0x00 -> 0x16)
-    biaeq t0, 0x17, .ipint_conversion_nonexistent
+    decodeLEBVarUInt32(1, t0, t1, t2, t3, t4)
+    # Security guarantee: always less than 18 (0x00 -> 0x11)
+    biaeq t0, 0x12, .ipint_conversion_nonexistent
     leap _os_script_config_storage, t1
     loadp JSC::LLInt::OpcodeConfig::ipint_conversion_dispatch_base[t1], t1
     if ARM64 or ARM64E
-        addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0
+        addlshiftp t1, t0, 8, t0
         jmp t0
     elsif X86_64
-        lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0
+        lshiftq 8, t0
         addq t1, t0
         jmp t0
     end
@@ -3101,17 +3174,16 @@ ipintOp(_conversion_prefix, macro()
 end)
 
 ipintOp(_simd_prefix, macro()
-    leap 1[PC], t4
-    decodeLEBVarUInt(t0, t4, t1, t2)
+    decodeLEBVarUInt32(1, t0, t1, t2, t3, t4)
     # Security guarantee: always less than 256 (0x00 -> 0xff)
     biaeq t0, 0x100, .ipint_simd_nonexistent
     leap _os_script_config_storage, t1
     loadp JSC::LLInt::OpcodeConfig::ipint_simd_dispatch_base[t1], t1
     if ARM64 or ARM64E
-        addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0
+        addlshiftp t1, t0, 8, t0
         jmp t0
     elsif X86_64
-        lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0
+        lshiftq 8, t0
         addq t1, t0
         jmp t0
     end
@@ -3121,17 +3193,16 @@ ipintOp(_simd_prefix, macro()
 end)
 
 ipintOp(_atomic_prefix, macro()
-    leap 1[PC], t4
-    decodeLEBVarUInt(t0, t4, t1, t2)
+    decodeLEBVarUInt32(1, t0, t1, t2, t3, t4)
     # Security guarantee: always less than 78 (0x00 -> 0x4e)
     biaeq t0, 0x4f, .ipint_atomic_nonexistent
     leap _os_script_config_storage, t1
     loadp JSC::LLInt::OpcodeConfig::ipint_atomic_dispatch_base[t1], t1
     if ARM64 or ARM64E
-        addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt))), t0
+        addlshiftp t1, t0, constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt)), t0
         jmp t0
     elsif X86_64
-        lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt))), t0
+        lshiftq constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt)), t0
         addq t1, t0
         jmp t0
     end
@@ -3352,13 +3423,16 @@ end)
 
 ipintOp(_array_len, macro()
     popQuad(t0)  # array into t0
-    bqeq t0, ValueNull, _ipint_throw_NullAccess
+    bqeq t0, ValueNull, .nullArray
     loadi JSWebAssemblyArray::m_size[t0], t0
     pushInt32(t0)
     loadb IPInt::InstructionLengthMetadata::length[MC], t0
     advancePCByReg(t0)
     advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
+
+.nullArray:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullAccess)
 end)
 
 ipintOp(_array_fill, macro()
@@ -3536,18 +3610,20 @@ end)
 
 ipintOp(_i31_get_s, macro()
     popQuad(t0)
-    bqeq t0, ValueNull, _ipint_throw_NullI31Get
+    bqeq t0, ValueNull, .i31_get_throw
     pushInt32(t0)
 
     loadb IPInt::InstructionLengthMetadata::length[MC], t0
     advancePCByReg(t0)
     advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
+.i31_get_throw:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullI31Get)
 end)
 
 ipintOp(_i31_get_u, macro()
     popQuad(t0)
-    bqeq t0, ValueNull, _ipint_throw_NullI31Get
+    bqeq t0, ValueNull, .i31_get_throw
     andq 0x7fffffff, t0
     pushInt32(t0)
 
@@ -3555,6 +3631,8 @@ ipintOp(_i31_get_u, macro()
     advancePCByReg(t0)
     advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
+.i31_get_throw:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullI31Get)
 end)
 
     #############################
@@ -3834,56 +3912,60 @@ end)
 
 ipintOp(_memory_init, macro()
     # memory.init
-    loadb IPInt::MemoryInitMetadata::memoryIndex[MC], a3
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a3
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata)))
     move sp, a2
-    loadi IPInt::MemoryInitMetadata::dataIndex[MC], a1
+    loadi 1[MC], a1
     operationCallMayThrow(macro() cCall4(_ipint_extern_memory_init) end)
     addq 3 * StackValueSize, sp
-    loadb IPInt::MemoryInitMetadata::instructionLength[MC], t0
+    loadb [MC], t0
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::MemoryInitMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) # xxx check
     nextIPIntInstruction()
 end)
 
 ipintOp(_data_drop, macro()
     # data.drop
-    loadi IPInt::DataAccessMetadata::index[MC], a1
+    loadi 1[MC], a1
     operationCall(macro() cCall2(_ipint_extern_data_drop) end)
-    loadb IPInt::DataAccessMetadata::instructionLength[MC], t0
+    loadb [MC], t0
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::DataAccessMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) # xxx check
     nextIPIntInstruction()
 end)
 
 ipintOp(_memory_copy, macro()
     # memory.copy
-    loadb IPInt::MemoryCopyMetadata::dstMemoryIndex[MC], a1
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a1
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata)))
     pushQuad(a1)
-    loadb IPInt::MemoryCopyMetadata::srcMemoryIndex[MC], a1
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a1
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata)))
     pushQuad(a1)
     move sp, a1
     # starting at top of stack: src memory index, dst memory index, n, s, d
     operationCallMayThrow(macro() cCall2(_ipint_extern_memory_copy) end)
     addq 5 * StackValueSize, sp
 
-    loadb IPInt::MemoryCopyMetadata::instructionLength[MC], t0
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::MemoryCopyMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_memory_fill, macro()
     # memory.fill
-    loadb IPInt::MemoryFillMetadata::memoryIndex[MC], a1
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a1
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata)))
     pushQuad(a1)
     move sp, a1
     # starting at top of stack: memory index, n, val, d
     operationCallMayThrow(macro() cCall2(_ipint_extern_memory_fill) end)
     addq 4 * StackValueSize, sp
 
-    loadb IPInt::MemoryFillMetadata::instructionLength[MC], t0
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::MemoryFillMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -3901,11 +3983,11 @@ end)
 
 ipintOp(_elem_drop, macro()
     # elem.drop
-    loadi IPInt::ElemDropMetadata::index[MC], a1
+    loadi IPInt::Const32Metadata::value[MC], a1
     operationCall(macro() cCall2(_ipint_extern_elem_drop) end)
-    loadb IPInt::ElemDropMetadata::instructionLength[MC], t0
+    loadb IPInt::Const32Metadata::instructionLength[MC], t0
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::ElemDropMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
@@ -3936,12 +4018,12 @@ end)
 
 ipintOp(_table_size, macro()
     # table.size
-    loadi IPInt::TableAccessMetadata::index[MC], a1
+    loadi IPInt::Const32Metadata::value[MC], a1
     operationCall(macro() cCall2(_ipint_extern_table_size) end)
     pushQuad(r0)
-    loadb IPInt::TableAccessMetadata::instructionLength[MC], t0
+    loadb IPInt::Const32Metadata::instructionLength[MC], t0
     advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::TableAccessMetadata)))
+    advanceMC(constexpr (sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
@@ -3957,347 +4039,232 @@ ipintOp(_table_fill, macro()
     nextIPIntInstruction()
 end)
 
-reservedOpcode(misc_0x12)
-    break
-
-##################################
-## Wide Arithmetic Instructions ##
-##################################
-
-ipintOp(_i64_add128, macro()
-    # i64.add128: [lhsLo lhsHi rhsLo rhsHi] -> [resultLo resultHi]
-    # Stack layout (top first): sp[0]=rhsHi, sp[1]=rhsLo, sp[2]=lhsHi, sp[3]=lhsLo
-    popQuad(t3) # rhsHi
-    popQuad(t2) # rhsLo
-    popQuad(t1) # lhsHi
-    popQuad(t0) # lhsLo
-    if ARM64 or ARM64E
-        addqs t0, t2, t0   # resultLo = lhsLo + rhsLo, sets carry flag
-        adcq t1, t3, t1    # resultHi = lhsHi + rhsHi + carry flag
-    elsif X86_64
-        addq t2, t0        # resultLo = lhsLo + rhsLo, sets carry flag
-        adcq t3, t1        # resultHi = lhsHi + rhsHi + carry flag
-    end
-    pushQuad(t0)
-    pushQuad(t1)
-    move t4, PC
-    nextIPIntInstruction()
-end)
-
-ipintOp(_i64_sub128, macro()
-    # i64.sub128: [lhsLo lhsHi rhsLo rhsHi] -> [resultLo resultHi]
-    # Stack layout (top first): sp[0]=rhsHi, sp[1]=rhsLo, sp[2]=lhsHi, sp[3]=lhsLo
-    popQuad(t3) # rhsHi
-    popQuad(t2) # rhsLo
-    popQuad(t1) # lhsHi
-    popQuad(t0) # lhsLo
-    if ARM64 or ARM64E
-        subqs t0, t2, t0   # resultLo = lhsLo - rhsLo, sets carry flag (borrow)
-        sbcq t1, t3, t1    # resultHi = lhsHi - rhsHi - carry flag
-    elsif X86_64
-        subq t2, t0        # resultLo = lhsLo - rhsLo, sets carry flag (borrow)
-        sbcq t3, t1        # resultHi = lhsHi - rhsHi - carry flag
-    end
-    pushQuad(t0)
-    pushQuad(t1)
-    move t4, PC
-    nextIPIntInstruction()
-end)
-
-ipintOp(_i64_mul_wide_s, macro()
-    # i64.mul_wide_s: [lhs rhs] -> [resultLo resultHi]
-    # Stack layout (top first): sp[0]=rhs, sp[1]=lhs
-    popQuad(t1) # rhs
-    popQuad(t0) # lhs
-    if ARM64 or ARM64E
-        smulhq t0, t1, t2  # resultHi = smulh(lhs, rhs) - must precede mulq
-        mulq t1, t0        # resultLo = lhs * rhs
-    elsif X86_64
-        # t0 = rax
-        # t2 = rdx
-        smulhq t1          # imulq %rsi: rdx:rax = rax * rsi -> t0=resultLo, t2=resultHi
-    end
-    pushQuad(t0)
-    pushQuad(t2)
-    move t4, PC
-    nextIPIntInstruction()
-end)
-
-ipintOp(_i64_mul_wide_u, macro()
-    # i64.mul_wide_u: [lhs rhs] -> [resultLo resultHi]
-    # Stack layout (top first): sp[0]=rhs, sp[1]=lhs
-    popQuad(t1) # rhs
-    popQuad(t0) # lhs
-    if ARM64 or ARM64E
-        umulhq t0, t1, t2  # resultHi = umulh(lhs, rhs) - must precede mulq
-        mulq t1, t0        # resultLo = lhs * rhs
-    elsif X86_64
-        # t0 = rax
-        # t2 = rdx
-        umulhq t1          # mulq %rsi: rdx:rax = rax * rsi -> t0=resultLo, t2=resultHi
-    end
-    pushQuad(t0)
-    pushQuad(t2)
-    move t4, PC
-    nextIPIntInstruction()
-end)
-
     #######################
     ## SIMD Instructions ##
     #######################
 
-const ImmLaneIdxOffset = 0 # Offset from t4 (points past the decoded SIMD opcode)
+const ImmLaneIdxOffset = 2 # Offset in bytecode
 const ImmLaneIdx16Mask = 0xf
 const ImmLaneIdx8Mask = 0x7
 const ImmLaneIdx4Mask = 0x3
 const ImmLaneIdx2Mask = 0x1
 
-# Platform-specific SIMD load macros (shared between fast and slow paths).
-# Input: t0 = host pointer (rax on x86_64). Output: v0 = loaded vector.
-# Clobbers: ft0 (ARM64), t1 (splat ops on ARM64).
-
-macro simdLoad8x8s()
-    if ARM64 or ARM64E
-        loadd [t0], ft0
-        emit "sxtl v16.8h, v0.8b"
-    elsif X86_64
-        emit "pmovsxbw (%rax), %xmm0"
-    else
-        break
-    end
-end
-
-macro simdLoad8x8u()
-    if ARM64 or ARM64E
-        loadd [t0], ft0
-        emit "uxtl v16.8h, v0.8b"
-    elsif X86_64
-        emit "pmovzxbw (%rax), %xmm0"
-    else
-        break
-    end
-end
-
-macro simdLoad16x4s()
-    if ARM64 or ARM64E
-        loadd [t0], ft0
-        emit "sxtl v16.4s, v0.4h"
-    elsif X86_64
-        emit "pmovsxwd (%rax), %xmm0"
-    else
-        break
-    end
-end
-
-macro simdLoad16x4u()
-    if ARM64 or ARM64E
-        loadd [t0], ft0
-        emit "uxtl v16.4s, v0.4h"
-    elsif X86_64
-        emit "pmovzxwd (%rax), %xmm0"
-    else
-        break
-    end
-end
-
-macro simdLoad32x2s()
-    if ARM64 or ARM64E
-        loadd [t0], ft0
-        emit "sxtl v16.2d, v0.2s"
-    elsif X86_64
-        emit "pmovsxdq (%rax), %xmm0"
-    else
-        break
-    end
-end
-
-macro simdLoad32x2u()
-    if ARM64 or ARM64E
-        loadd [t0], ft0
-        emit "uxtl v16.2d, v0.2s"
-    elsif X86_64
-        emit "pmovzxdq (%rax), %xmm0"
-    else
-        break
-    end
-end
-
-macro simdLoadSplat8()
-    if ARM64 or ARM64E
-        loadb [t0], t1
-        emit "dup v16.16b, w1"
-    elsif X86_64
-        emit "vpinsrb $0, (%rax), %xmm0, %xmm0"
-        emit "vpxor %xmm1, %xmm1, %xmm1"
-        emit "vpshufb %xmm1, %xmm0, %xmm0"
-    else
-        break
-    end
-end
+# 0xFD 0x00 - 0xFD 0x0B: memory
 
-macro simdLoadSplat16()
-    if ARM64 or ARM64E
-        loadh [t0], t1
-        emit "dup v16.8h, w1"
-    elsif X86_64
-        emit "vpinsrw $0, (%rax), %xmm0, %xmm0"
-        emit "vpshuflw $0, %xmm0, %xmm0"
-        emit "vpunpcklqdq %xmm0, %xmm0, %xmm0"
-    else
-        break
-    end
-end
+# Wrapper for SIMD load/store operations. Places linear address in t0 for memOp()
+macro simdMemoryOp(accessSize, memOp)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, accessSize, t1, t2)
 
-macro simdLoadSplat32()
-    if ARM64 or ARM64E
-        loadi [t0], t1
-        emit "dup v16.4s, w1"
-    elsif X86_64
-        emit "vbroadcastss (%rax), %xmm0"
-    else
-        break
-    end
-end
+    # memOp must not clobber t4
+    memOp()
 
-macro simdLoadSplat64()
-    if ARM64 or ARM64E
-        loadq [t0], t1
-        emit "dup v16.2d, x1"
-    elsif X86_64
-        emit "vmovddup (%rax), %xmm0"
-    else
-        break
-    end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
 end
 
-# 0xFD 0x00 - 0xFD 0x0B: memory
-
 ipintOp(_simd_v128_load_mem, macro()
     # v128.load
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 16, t1, t2, .simd_v128_load_slow_path)
-    loadv [t0], v0
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    simdMemoryOp(16, macro()
+        loadv [t0], v0
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load_8x8s_mem, macro()
-    # v128.load8x8_s
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_8x8s_slow_path)
-    simdLoad8x8s()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load8x8_s - load 8 8-bit values, sign-extend each to i16
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadd [t0], ft0
+            # offlineasm ft0 = ARM v0
+            # offlineasm v0 = ARM v16
+            emit "sxtl v16.8h, v0.8b"
+        elsif X86_64
+            # t0 is eax
+            emit "pmovsxbw (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load_8x8u_mem, macro()
-    # v128.load8x8_u
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_8x8u_slow_path)
-    simdLoad8x8u()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load8x8_u - load 8 8-bit values, zero-extend each to i16
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadd [t0], ft0
+            # offlineasm ft0 = ARM v0
+            # offlineasm v0 = ARM v16
+            emit "uxtl v16.8h, v0.8b"
+        elsif X86_64
+            # t0 is eax
+            emit "pmovzxbw (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load_16x4s_mem, macro()
-    # v128.load16x4_s
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_16x4s_slow_path)
-    simdLoad16x4s()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load16x4_s - load 4 16-bit values, sign-extend each to i32
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadd [t0], ft0
+            # offlineasm ft0 = ARM v0
+            # offlineasm v0 = ARM v16
+            emit "sxtl v16.4s, v0.4h"
+        elsif X86_64
+            # t0 is eax
+            emit "pmovsxwd (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load_16x4u_mem, macro()
-    # v128.load16x4_u
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_16x4u_slow_path)
-    simdLoad16x4u()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load16x4_u - load 4 16-bit values, zero-extend each to i32
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadd [t0], ft0
+            # offlineasm ft0 = ARM v0
+            # offlineasm v0 = ARM v16
+            emit "uxtl v16.4s, v0.4h"
+        elsif X86_64
+            # t0 is eax
+            emit "pmovzxwd (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load_32x2s_mem, macro()
-    # v128.load32x2_s
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_32x2s_slow_path)
-    simdLoad32x2s()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load32x2_s - load 2 32-bit values, sign-extend each to i64
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadd [t0], ft0
+            # offlineasm ft0 = ARM v0
+            # offlineasm v0 = ARM v16
+            emit "sxtl v16.2d, v0.2s"
+        elsif X86_64
+            # t0 is eax
+            emit "pmovsxdq (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load_32x2u_mem, macro()
-    # v128.load32x2_u
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_32x2u_slow_path)
-    simdLoad32x2u()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load32x2_u - load 2 32-bit values, zero-extend each to i64
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadd [t0], ft0
+            # offlineasm ft0 = ARM v0
+            # offlineasm v0 = ARM v16
+            emit "uxtl v16.2d, v0.2s"
+        elsif X86_64
+            # t0 is eax
+            emit "pmovzxdq (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load8_splat_mem, macro()
-    # v128.load8_splat
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .simd_v128_load8_splat_slow_path)
-    simdLoadSplat8()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load8_splat - load 1 8-bit value and splat to all 16 lanes
+    simdMemoryOp(1, macro()
+        if ARM64 or ARM64E
+            loadb [t0], t1
+            emit "dup v16.16b, w1"
+        elsif X86_64
+            # t0 is eax
+            emit "vpinsrb $0, (%rax), %xmm0, %xmm0"
+            emit "vpxor %xmm1, %xmm1, %xmm1"
+            emit "vpshufb %xmm1, %xmm0, %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load16_splat_mem, macro()
-    # v128.load16_splat
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .simd_v128_load16_splat_slow_path)
-    simdLoadSplat16()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load16_splat - load 1 16-bit value and splat to all 8 lanes
+    simdMemoryOp(2, macro()
+        if ARM64 or ARM64E
+            loadh [t0], t1
+            emit "dup v16.8h, w1"
+        elsif X86_64
+            # t0 is eax
+            emit "vpinsrw $0, (%rax), %xmm0, %xmm0"
+            emit "vpshuflw $0, %xmm0, %xmm0"
+            emit "vpunpcklqdq %xmm0, %xmm0, %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load32_splat_mem, macro()
-    # v128.load32_splat
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_load32_splat_slow_path)
-    simdLoadSplat32()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load32_splat - load 1 32-bit value and splat to all 4 lanes
+    simdMemoryOp(4, macro()
+        if ARM64 or ARM64E
+            loadi [t0], t1
+            emit "dup v16.4s, w1"
+        elsif X86_64
+            # Load and broadcast 32-bit value directly from memory to all 4 dwords
+            # t0 is eax
+            emit "vbroadcastss (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_load64_splat_mem, macro()
-    # v128.load64_splat
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load64_splat_slow_path)
-    simdLoadSplat64()
-    pushVec(v0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    # v128.load64_splat - load 1 64-bit value and splat to all 2 lanes
+    simdMemoryOp(8, macro()
+        if ARM64 or ARM64E
+            loadq [t0], t1
+            emit "dup v16.2d, x1"
+        elsif X86_64
+            # Load and broadcast 64-bit value directly from memory to both qwords
+            # t0 is eax
+            emit "vmovddup (%rax), %xmm0"
+        else
+            break # Not implemented
+        end
+        pushVec(v0)
+    end)
 end)
 
 ipintOp(_simd_v128_store_mem, macro()
     # v128.store
     popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 16, t1, t2, .simd_v128_store_slow_path)
-    storev v0, [t0]
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    simdMemoryOp(16, macro()
+        storev v0, [t0]
+    end)
 end)
 
 # 0xFD 0x0C: v128.const
 ipintOp(_simd_v128_const, macro()
     # v128.const
-    loadv [t4], v0
+    loadv 2[PC], v0
     pushVec(v0)
-    leap 16[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4308,7 +4275,7 @@ ipintOp(_simd_i8x16_shuffle, macro()
     if ARM64 or ARM64E
         popVec(v1)
         popVec(v0)
-        loadv [t4], v2
+        loadv ImmLaneIdxOffset[PC], v2
         emit "tbl v16.16b, {v16.16b, v17.16b}, v18.16b"
         pushVec(v0)
     else
@@ -4319,7 +4286,7 @@ ipintOp(_simd_i8x16_shuffle, macro()
         move 0, t0
 
     .shuffleLoop:
-        loadb [t4, t0, 1], t1
+        loadb ImmLaneIdxOffset[PC, t0, 1], t1
 
         bigt t1, 31, .outOfBounds
         bigt t1, 15, .useRightVector
@@ -4350,7 +4317,9 @@ ipintOp(_simd_i8x16_shuffle, macro()
         addp 2 * V128ISize, sp            # Pop temp result and right vector
     end
 
-    leap 16[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4377,7 +4346,9 @@ ipintOp(_simd_i8x16_swizzle, macro()
     end
 
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4398,7 +4369,9 @@ ipintOp(_simd_i8x16_splat, macro()
     end
 
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4418,9 +4391,11 @@ ipintOp(_simd_i16x8_splat, macro()
     end
 
     pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-end)
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
+    nextIPIntInstruction()
+end)
 
 ipintOp(_simd_i32x4_splat, macro()
     # i32x4.splat - splat i32 value to all 4 32-bit lanes
@@ -4437,7 +4412,9 @@ ipintOp(_simd_i32x4_splat, macro()
     end
 
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4456,7 +4433,9 @@ ipintOp(_simd_i64x2_splat, macro()
     end
 
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4474,7 +4453,9 @@ ipintOp(_simd_f32x4_splat, macro()
     end
 
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4492,156 +4473,186 @@ ipintOp(_simd_f64x2_splat, macro()
     end
 
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 # 0xFD 0x15 - 0xFD 0x22: extract and replace lanes
 ipintOp(_simd_i8x16_extract_lane_s, macro()
     # i8x16.extract_lane_s (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx16Mask, t0
     loadbsi [sp, t0], t0
     addp V128ISize, sp
     pushInt32(t0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i8x16_extract_lane_u, macro()
     # i8x16.extract_lane_u (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx16Mask, t0
     loadb [sp, t0], t0
     addp V128ISize, sp
     pushInt32(t0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i8x16_replace_lane, macro()
     # i8x16.replace_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx16Mask, t0
     popInt32(t1)  # value to replace with
     storeb t1, [sp, t0]  # replace the byte at lane index
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i16x8_extract_lane_s, macro()
     # i16x8.extract_lane_s (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx8Mask, t0
     loadhsi [sp, t0, 2], t0
     addp V128ISize, sp
     pushInt32(t0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i16x8_extract_lane_u, macro()
     # i16x8.extract_lane_u (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx8Mask, t0
     loadh [sp, t0, 2], t0
     addp V128ISize, sp
     pushInt32(t0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i16x8_replace_lane, macro()
     # i16x8.replace_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx8Mask, t0
     popInt32(t1)  # value to replace with
     storeh t1, [sp, t0, 2]  # replace the 16-bit value at lane index
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i32x4_extract_lane, macro()
     # i32x4.extract_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx4Mask, t0
     loadi [sp, t0, 4], t0
     addp V128ISize, sp
     pushInt32(t0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i32x4_replace_lane, macro()
     # i32x4.replace_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx4Mask, t0
     popInt32(t1)  # value to replace with
     storei t1, [sp, t0, 4]  # replace the 32-bit value at lane index
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i64x2_extract_lane, macro()
     # i64x2.extract_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx2Mask, t0
     loadq [sp, t0, 8], t0
     addp V128ISize, sp
     pushInt64(t0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_i64x2_replace_lane, macro()
     # i64x2.replace_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx2Mask, t0
     popInt64(t1)  # value to replace with
     storeq t1, [sp, t0, 8]  # replace the 64-bit value at lane index
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_f32x4_extract_lane, macro()
     # f32x4.extract_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx4Mask, t0
     loadf [sp, t0, 4], ft0
     addp V128ISize, sp
     pushFloat32(ft0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_f32x4_replace_lane, macro()
     # f32x4.replace_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx4Mask, t0
     popFloat32(ft0)  # value to replace with
     storef ft0, [sp, t0, 4]  # replace the 32-bit float at lane index
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_f64x2_extract_lane, macro()
     # f64x2.extract_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx2Mask, t0
     loadd [sp, t0, 8], ft0
     addp V128ISize, sp
     pushFloat64(ft0)
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_f64x2_replace_lane, macro()
     # f64x2.replace_lane (lane)
-    loadb ImmLaneIdxOffset[t4], t0
+    loadb ImmLaneIdxOffset[PC], t0
     andi ImmLaneIdx2Mask, t0
     popFloat64(ft0)  # value to replace with
     stored ft0, [sp, t0, 8]  # replace the 64-bit float at lane index
-    leap 1[t4], PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4658,7 +4669,9 @@ ipintOp(_simd_i8x16_eq, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4679,7 +4692,9 @@ ipintOp(_simd_i8x16_ne, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4697,7 +4712,9 @@ ipintOp(_simd_i8x16_lt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4718,7 +4735,9 @@ ipintOp(_simd_i8x16_lt_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4734,7 +4753,9 @@ ipintOp(_simd_i8x16_gt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4754,7 +4775,9 @@ ipintOp(_simd_i8x16_gt_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4774,7 +4797,9 @@ ipintOp(_simd_i8x16_le_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4793,7 +4818,9 @@ ipintOp(_simd_i8x16_le_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4812,7 +4839,9 @@ ipintOp(_simd_i8x16_ge_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4830,7 +4859,9 @@ ipintOp(_simd_i8x16_ge_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4848,7 +4879,9 @@ ipintOp(_simd_i16x8_eq, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4868,7 +4901,9 @@ ipintOp(_simd_i16x8_ne, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4886,7 +4921,9 @@ ipintOp(_simd_i16x8_lt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4907,7 +4944,9 @@ ipintOp(_simd_i16x8_lt_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4923,7 +4962,9 @@ ipintOp(_simd_i16x8_gt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4943,7 +4984,9 @@ ipintOp(_simd_i16x8_gt_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4963,7 +5006,9 @@ ipintOp(_simd_i16x8_le_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -4982,7 +5027,9 @@ ipintOp(_simd_i16x8_le_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5001,7 +5048,9 @@ ipintOp(_simd_i16x8_ge_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5019,7 +5068,9 @@ ipintOp(_simd_i16x8_ge_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5036,7 +5087,9 @@ ipintOp(_simd_i32x4_eq, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5056,7 +5109,9 @@ ipintOp(_simd_i32x4_ne, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5074,7 +5129,9 @@ ipintOp(_simd_i32x4_lt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5095,7 +5152,9 @@ ipintOp(_simd_i32x4_lt_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5111,7 +5170,9 @@ ipintOp(_simd_i32x4_gt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5131,7 +5192,9 @@ ipintOp(_simd_i32x4_gt_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5151,7 +5214,9 @@ ipintOp(_simd_i32x4_le_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5170,7 +5235,9 @@ ipintOp(_simd_i32x4_le_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5189,7 +5256,9 @@ ipintOp(_simd_i32x4_ge_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5207,7 +5276,9 @@ ipintOp(_simd_i32x4_ge_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5224,7 +5295,9 @@ ipintOp(_simd_f32x4_eq, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5241,7 +5314,9 @@ ipintOp(_simd_f32x4_ne, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5258,7 +5333,9 @@ ipintOp(_simd_f32x4_lt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5274,7 +5351,9 @@ ipintOp(_simd_f32x4_gt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5291,7 +5370,9 @@ ipintOp(_simd_f32x4_le, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5307,7 +5388,9 @@ ipintOp(_simd_f32x4_ge, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5324,7 +5407,9 @@ ipintOp(_simd_f64x2_eq, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5341,7 +5426,9 @@ ipintOp(_simd_f64x2_ne, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5358,7 +5445,9 @@ ipintOp(_simd_f64x2_lt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5374,7 +5463,9 @@ ipintOp(_simd_f64x2_gt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5391,7 +5482,9 @@ ipintOp(_simd_f64x2_le, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5407,7 +5500,9 @@ ipintOp(_simd_f64x2_ge, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5425,7 +5520,9 @@ ipintOp(_simd_v128_not, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5441,7 +5538,9 @@ ipintOp(_simd_v128_and, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5457,7 +5556,9 @@ ipintOp(_simd_v128_andnot, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5473,7 +5574,9 @@ ipintOp(_simd_v128_or, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5489,7 +5592,9 @@ ipintOp(_simd_v128_xor, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5514,7 +5619,9 @@ ipintOp(_simd_v128_bitselect, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5537,150 +5644,290 @@ ipintOp(_simd_v128_any_true, macro()
         break # Not implemented
     end
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 # 0xFD 0x54 - 0xFD 0x5D: v128 load/store lane
-# For load_lane: stack is [v128, i32_addr]. Pop addr, do memarg, load from memory,
-# read lane index, replace lane in the v128 still on stack.
-# For store_lane: stack is [v128, i32_addr]. Pop addr, do memarg, read lane index,
-# extract value from v128 on stack, pop v128, store to memory.
-# Lane index is the last byte of the instruction, right after the memarg.
+
+# If simd ops used memoryOpAdvanceMCAndMakePointer the macro would read
+# memory index and advance MC and then the handler would read the constant
+# and advance MC, so there is a performance optimization here to only
+# advance MC once
+
+macro ipintCheckMemoryBoundAndMakePointer(whichMemory, mem, scratch, size)
+    # overwrites mem with computed pointer
+    btiz whichMemory, .checkBounds
+    # overwrites whichMemory
+    mulp (constexpr (sizeof(JSWebAssemblyInstance::WasmMemoryBaseAndSize))), whichMemory
+    addp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0))), whichMemory
+    addp wasmInstance, whichMemory
+    loadp [whichMemory], memoryBase
+    loadp (constexpr (sizeof(void*)))[whichMemory], boundsCheckingSize
+    move 1, whichMemory # restore base and size registers afterward if using nonzero memory
+.checkBounds:
+    # Memory indices are 32 bit
+    leap size - 1[mem], scratch
+    bpb scratch, boundsCheckingSize, .continuation
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess)
+.continuation:
+    addp memoryBase, mem
+    btiz whichMemory, .done
+    loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0))) [wasmInstance], memoryBase
+    loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0) + sizeof(void*))) [wasmInstance], boundsCheckingSize
+.done:
+end
 
 ipintOp(_simd_v128_load8_lane_mem, macro()
+    # v128.load8_lane - load 8-bit value from memory and replace lane in existing vector
+
     popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .simd_v128_load8_lane_slow_path)
+    popMemoryIndex(t0, t2)
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 1)
     loadb [t0], t0
-    loadb 2[t4], t1
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1
+    advancePCByReg(t1)
+    loadb -1[PC], t1
     andi ImmLaneIdx16Mask, t1
+
+    # Push the result and then replace one lane of the result with the loaded value
     pushVec(v0)
     storeb t0, [sp, t1]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_load16_lane_mem, macro()
+    # v128.load16_lane - load 16-bit value from memory and replace lane in existing vector
+
     popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .simd_v128_load16_lane_slow_path)
+    popMemoryIndex(t0, t2)
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 2)
     loadh [t0], t0
-    loadb 2[t4], t1
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1
+    advancePCByReg(t1)
+    loadb -1[PC], t1
     andi ImmLaneIdx8Mask, t1
+
+    # Push the result and then replace one lane of the result with the loaded value
     pushVec(v0)
     storeh t0, [sp, t1, 2]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_load32_lane_mem, macro()
+    # v128.load32_lane - load 32-bit value from memory and replace lane in existing vector
+
     popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_load32_lane_slow_path)
+    popMemoryIndex(t0, t2)
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 4)
     loadi [t0], t0
-    loadb 2[t4], t1
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1
+    advancePCByReg(t1)
+    loadb -1[PC], t1
     andi ImmLaneIdx4Mask, t1
+
+    # Push the result and then replace one lane of the result with the loaded value
     pushVec(v0)
     storei t0, [sp, t1, 4]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_load64_lane_mem, macro()
+    # v128.load64_lane - load 64-bit value from memory and replace lane in existing vector
+
     popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load64_lane_slow_path)
+    popMemoryIndex(t0, t2)
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 8)
     loadq [t0], t0
-    loadb 2[t4], t1
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1
+    advancePCByReg(t1)
+    loadb -1[PC], t1
     andi ImmLaneIdx2Mask, t1
+
+    # Push the result and then replace one lane of the result with the loaded value
     pushVec(v0)
     storeq t0, [sp, t1, 8]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_store8_lane_mem, macro()
-    # Stack: [addr, v128] with v128 on top. Pop both, parse memarg, extract lane, store.
-    popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .simd_v128_store8_lane_slow_path)
-    loadb 2[t4], t1
+    # v128.store8_lane - extract 8-bit value from lane and store to memory
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    loadb -1[PC], t1
     andi ImmLaneIdx16Mask, t1
-    # Extract byte from v0 via temp push
-    pushVec(v0)
-    loadb [sp, t1], t1
-    addp V128ISize, sp
+
+    loadb [sp, t1], t1  # Load value from lane in vector on stack
+    addp V128ISize, sp  # Pop the vector
+
+    popMemoryIndex(t0, t2)
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 1)
+
     storeb t1, [t0]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_store16_lane_mem, macro()
-    popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .simd_v128_store16_lane_slow_path)
-    loadb 2[t4], t1
+    # v128.store16_lane - extract 16-bit value from lane and store to memory
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    loadb -1[PC], t1
     andi ImmLaneIdx8Mask, t1
-    pushVec(v0)
-    loadh [sp, t1, 2], t1
-    addp V128ISize, sp
-    storeh t1, [t0]
-    leap 3[t4], PC
-    nextIPIntInstruction()
-end)
 
-ipintOp(_simd_v128_store32_lane_mem, macro()
-    popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_store32_lane_slow_path)
-    loadb 2[t4], t1
+    loadh [sp, t1, 2], t1   # Load value from lane in vector on stack
+    addp V128ISize, sp      # Pop the vector
+
+    popMemoryIndex(t0, t2)
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 2)
+
+    storeh t1, [t0]
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
+    nextIPIntInstruction()
+end)
+
+ipintOp(_simd_v128_store32_lane_mem, macro()
+    # v128.store32_lane - extract 32-bit value from lane and store to memory
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    loadb -1[PC], t1
     andi ImmLaneIdx4Mask, t1
-    pushVec(v0)
-    loadi [sp, t1, 4], t1
-    addp V128ISize, sp
+
+    loadi [sp, t1, 4], t1   # Load value from lane in vector on stack
+    addp V128ISize, sp      # Pop the vector
+
+    popMemoryIndex(t0, t2)
+
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 4)
+
     storei t1, [t0]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_store64_lane_mem, macro()
-    popVec(v0)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_store64_lane_slow_path)
-    loadb 2[t4], t1
+    # v128.store64_lane - extract 64-bit value from lane and store to memory
+
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3
+    const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata
+
+    # The lane index comes after the variable length memory offset, so find it by
+    # advancing the PC and loading the byte before the next instruction.
+    loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    loadb -1[PC], t1
     andi ImmLaneIdx2Mask, t1
-    pushVec(v0)
-    loadq [sp, t1, 8], t1
-    addp V128ISize, sp
+
+    loadq [sp, t1, 8], t1   # Load value from lane in vector on stack
+    addp V128ISize, sp      # Pop the vector
+
+    popMemoryIndex(t0, t2)
+    loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2
+    addp t2, t0
+    ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 8)
+
     storeq t1, [t0]
-    leap 3[t4], PC
+
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
     nextIPIntInstruction()
 end)
 
 ipintOp(_simd_v128_load32_zero_mem, macro()
     # v128.load32_zero - load 32-bit value from memory and zero-pad to 128 bits
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_load32_zero_slow_path)
-    loadi [t0], t0
-    subp V128ISize, sp
-    storei t0, [sp]
-    storei 0, 4[sp]
-    storeq 0, 8[sp]
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    simdMemoryOp(4, macro()
+        loadi [t0], t0
+
+        subp V128ISize, sp
+        storei t0, [sp]
+        storei 0, 4[sp]
+        storeq 0, 8[sp]
+    end)
 end)
 
 ipintOp(_simd_v128_load64_zero_mem, macro()
     # v128.load64_zero - load 64-bit value from memory and zero-pad to 128 bits
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load64_zero_slow_path)
-    loadq [t0], t0
-    subp V128ISize, sp
-    storeq t0, [sp]
-    storeq 0, 8[sp]
-    leap 2[t4], PC
-    nextIPIntInstruction()
+    simdMemoryOp(8, macro()
+        loadq [t0], t0
+
+        subp V128ISize, sp
+        storeq t0, [sp]
+        storeq 0, 8[sp]
+    end)
 end)
 
 # 0xFD 0x5E - 0xFD 0x5F: f32x4/f64x2 conversion
@@ -5699,7 +5946,9 @@ ipintOp(_simd_f32x4_demote_f64x2_zero, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5714,7 +5963,9 @@ ipintOp(_simd_f64x2_promote_low_f32x4, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5731,7 +5982,9 @@ ipintOp(_simd_i8x16_abs, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5748,7 +6001,9 @@ ipintOp(_simd_i8x16_neg, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5790,7 +6045,9 @@ ipintOp(_simd_i8x16_popcnt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5815,7 +6072,9 @@ ipintOp(_simd_i8x16_all_true, macro()
         break # Not implemented
     end
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5843,7 +6102,9 @@ ipintOp(_simd_i8x16_bitmask, macro()
 
     addp V128ISize, sp  # Pop the vector
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5861,7 +6122,9 @@ ipintOp(_simd_i8x16_narrow_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5879,7 +6142,9 @@ ipintOp(_simd_i8x16_narrow_i16x8_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5896,7 +6161,9 @@ ipintOp(_simd_f32x4_ceil, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5911,7 +6178,9 @@ ipintOp(_simd_f32x4_floor, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5926,7 +6195,9 @@ ipintOp(_simd_f32x4_trunc, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5941,7 +6212,9 @@ ipintOp(_simd_f32x4_nearest, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -5989,7 +6262,9 @@ ipintOp(_simd_i8x16_shl, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6031,7 +6306,9 @@ ipintOp(_simd_i8x16_shr_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6073,7 +6350,9 @@ ipintOp(_simd_i8x16_shr_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6089,7 +6368,9 @@ ipintOp(_simd_i8x16_add, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6105,7 +6386,9 @@ ipintOp(_simd_i8x16_add_sat_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6121,7 +6404,9 @@ ipintOp(_simd_i8x16_add_sat_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6137,7 +6422,9 @@ ipintOp(_simd_i8x16_sub, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6153,7 +6440,9 @@ ipintOp(_simd_i8x16_sub_sat_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6169,7 +6458,9 @@ ipintOp(_simd_i8x16_sub_sat_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6186,7 +6477,9 @@ ipintOp(_simd_f64x2_ceil, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6201,7 +6494,9 @@ ipintOp(_simd_f64x2_floor, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6218,7 +6513,9 @@ ipintOp(_simd_i8x16_min_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6234,7 +6531,9 @@ ipintOp(_simd_i8x16_min_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6250,7 +6549,9 @@ ipintOp(_simd_i8x16_max_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6266,7 +6567,9 @@ ipintOp(_simd_i8x16_max_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6283,7 +6586,9 @@ ipintOp(_simd_f64x2_trunc, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6301,7 +6606,9 @@ ipintOp(_simd_i8x16_avgr_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6321,7 +6628,9 @@ ipintOp(_simd_i16x8_extadd_pairwise_i8x16_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6339,7 +6648,9 @@ ipintOp(_simd_i16x8_extadd_pairwise_i8x16_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6357,7 +6668,9 @@ ipintOp(_simd_i32x4_extadd_pairwise_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6374,7 +6687,9 @@ ipintOp(_simd_i32x4_extadd_pairwise_i16x8_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6391,7 +6706,9 @@ ipintOp(_simd_i16x8_abs, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6408,7 +6725,9 @@ ipintOp(_simd_i16x8_neg, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6432,7 +6751,9 @@ ipintOp(_simd_i16x8_q15mulr_sat_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6459,7 +6780,9 @@ ipintOp(_simd_i16x8_all_true, macro()
         break # Not implemented
     end
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6487,7 +6810,9 @@ ipintOp(_simd_i16x8_bitmask, macro()
 
     addp V128ISize, sp  # Pop the vector
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6505,7 +6830,9 @@ ipintOp(_simd_i16x8_narrow_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6523,7 +6850,9 @@ ipintOp(_simd_i16x8_narrow_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6538,7 +6867,9 @@ ipintOp(_simd_i16x8_extend_low_i8x16_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6555,7 +6886,9 @@ ipintOp(_simd_i16x8_extend_high_i8x16_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6570,7 +6903,9 @@ ipintOp(_simd_i16x8_extend_low_i8x16_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6587,7 +6922,9 @@ ipintOp(_simd_i16x8_extend_high_i8x16_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6612,7 +6949,9 @@ ipintOp(_simd_i16x8_shl, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6639,7 +6978,9 @@ ipintOp(_simd_i16x8_shr_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6664,7 +7005,9 @@ ipintOp(_simd_i16x8_shr_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6680,7 +7023,9 @@ ipintOp(_simd_i16x8_add, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6696,7 +7041,9 @@ ipintOp(_simd_i16x8_add_sat_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6712,7 +7059,9 @@ ipintOp(_simd_i16x8_add_sat_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6728,7 +7077,9 @@ ipintOp(_simd_i16x8_sub, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6744,7 +7095,9 @@ ipintOp(_simd_i16x8_sub_sat_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6760,7 +7113,9 @@ ipintOp(_simd_i16x8_sub_sat_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6777,7 +7132,9 @@ ipintOp(_simd_f64x2_nearest, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6795,7 +7152,9 @@ ipintOp(_simd_i16x8_mul, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6811,7 +7170,9 @@ ipintOp(_simd_i16x8_min_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6827,7 +7188,9 @@ ipintOp(_simd_i16x8_min_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6843,7 +7206,9 @@ ipintOp(_simd_i16x8_max_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6859,12 +7224,13 @@ ipintOp(_simd_i16x8_max_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
 reservedOpcode(0xfd9a01)
-
 ipintOp(_simd_i16x8_avgr_u, macro()
     # i16x8.avgr_u - average of 8 16-bit unsigned integers with rounding
     popVec(v1)
@@ -6877,7 +7243,9 @@ ipintOp(_simd_i16x8_avgr_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6896,7 +7264,9 @@ ipintOp(_simd_i16x8_extmul_low_i8x16_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6917,7 +7287,9 @@ ipintOp(_simd_i16x8_extmul_high_i8x16_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6936,7 +7308,9 @@ ipintOp(_simd_i16x8_extmul_low_i8x16_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6956,7 +7330,9 @@ ipintOp(_simd_i16x8_extmul_high_i8x16_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6973,7 +7349,9 @@ ipintOp(_simd_i32x4_abs, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -6990,7 +7368,9 @@ ipintOp(_simd_i32x4_neg, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7019,7 +7399,9 @@ ipintOp(_simd_i32x4_all_true, macro()
         break # Not implemented
     end
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7047,7 +7429,9 @@ ipintOp(_simd_i32x4_bitmask, macro()
 
     addp V128ISize, sp  # Pop the vector
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7065,7 +7449,9 @@ ipintOp(_simd_i32x4_extend_low_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7082,7 +7468,9 @@ ipintOp(_simd_i32x4_extend_high_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7097,7 +7485,9 @@ ipintOp(_simd_i32x4_extend_low_i16x8_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7114,7 +7504,9 @@ ipintOp(_simd_i32x4_extend_high_i16x8_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7137,7 +7529,9 @@ ipintOp(_simd_i32x4_shl, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7162,7 +7556,9 @@ ipintOp(_simd_i32x4_shr_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7187,7 +7583,9 @@ ipintOp(_simd_i32x4_shr_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7203,7 +7601,9 @@ ipintOp(_simd_i32x4_add, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7222,7 +7622,9 @@ ipintOp(_simd_i32x4_sub, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7242,7 +7644,9 @@ ipintOp(_simd_i32x4_mul, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7258,7 +7662,9 @@ ipintOp(_simd_i32x4_min_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7274,7 +7680,9 @@ ipintOp(_simd_i32x4_min_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7290,7 +7698,9 @@ ipintOp(_simd_i32x4_max_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7306,7 +7716,9 @@ ipintOp(_simd_i32x4_max_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7327,7 +7739,9 @@ ipintOp(_simd_i32x4_dot_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 reservedOpcode(0xfdbb01)
@@ -7347,7 +7761,9 @@ ipintOp(_simd_i32x4_extmul_low_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7366,7 +7782,9 @@ ipintOp(_simd_i32x4_extmul_high_i16x8_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7385,7 +7803,9 @@ ipintOp(_simd_i32x4_extmul_low_i16x8_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7404,7 +7824,9 @@ ipintOp(_simd_i32x4_extmul_high_i16x8_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7426,7 +7848,9 @@ ipintOp(_simd_i64x2_abs, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7443,7 +7867,9 @@ ipintOp(_simd_i64x2_neg, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7472,7 +7898,9 @@ ipintOp(_simd_i64x2_all_true, macro()
         break # Not implemented
     end
     pushInt32(t0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7502,7 +7930,9 @@ ipintOp(_simd_i64x2_bitmask, macro()
 
 .bitmask_i64x2_done:
     pushInt32(t2)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7520,7 +7950,9 @@ ipintOp(_simd_i64x2_extend_low_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7537,7 +7969,9 @@ ipintOp(_simd_i64x2_extend_high_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7552,7 +7986,9 @@ ipintOp(_simd_i64x2_extend_low_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7569,7 +8005,9 @@ ipintOp(_simd_i64x2_extend_high_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7592,7 +8030,9 @@ ipintOp(_simd_i64x2_shl, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7610,7 +8050,9 @@ ipintOp(_simd_i64x2_shr_s, macro()
     rshiftq t0, t1
     storeq t1, [sp]
 
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7635,7 +8077,9 @@ ipintOp(_simd_i64x2_shr_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7651,7 +8095,9 @@ ipintOp(_simd_i64x2_add, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7670,7 +8116,9 @@ ipintOp(_simd_i64x2_sub, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7695,7 +8143,9 @@ ipintOp(_simd_i64x2_mul, macro()
 
     # Pop vector1, result in vector0
     addp V128ISize, sp        # Remove first vector from stack, leaving result
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7711,7 +8161,9 @@ ipintOp(_simd_i64x2_eq, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7731,7 +8183,9 @@ ipintOp(_simd_i64x2_ne, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7749,7 +8203,9 @@ ipintOp(_simd_i64x2_lt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7765,7 +8221,9 @@ ipintOp(_simd_i64x2_gt_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7785,7 +8243,9 @@ ipintOp(_simd_i64x2_le_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7804,7 +8264,9 @@ ipintOp(_simd_i64x2_ge_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7823,7 +8285,9 @@ ipintOp(_simd_i64x2_extmul_low_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7842,7 +8306,9 @@ ipintOp(_simd_i64x2_extmul_high_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7861,7 +8327,9 @@ ipintOp(_simd_i64x2_extmul_low_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7880,7 +8348,9 @@ ipintOp(_simd_i64x2_extmul_high_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7901,7 +8371,9 @@ ipintOp(_simd_f32x4_abs, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7920,7 +8392,9 @@ ipintOp(_simd_f32x4_neg, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7937,7 +8411,9 @@ ipintOp(_simd_f32x4_sqrt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7953,7 +8429,9 @@ ipintOp(_simd_f32x4_add, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7969,7 +8447,9 @@ ipintOp(_simd_f32x4_sub, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -7985,7 +8465,9 @@ ipintOp(_simd_f32x4_mul, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8001,7 +8483,9 @@ ipintOp(_simd_f32x4_div, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8030,7 +8514,9 @@ ipintOp(_simd_f32x4_min, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8064,7 +8550,9 @@ ipintOp(_simd_f32x4_max, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8084,7 +8572,9 @@ ipintOp(_simd_f32x4_pmin, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8104,7 +8594,9 @@ ipintOp(_simd_f32x4_pmax, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8125,7 +8617,9 @@ ipintOp(_simd_f64x2_abs, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8144,7 +8638,9 @@ ipintOp(_simd_f64x2_neg, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8161,7 +8657,9 @@ ipintOp(_simd_f64x2_sqrt, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8177,7 +8675,9 @@ ipintOp(_simd_f64x2_add, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8193,7 +8693,9 @@ ipintOp(_simd_f64x2_sub, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8209,7 +8711,9 @@ ipintOp(_simd_f64x2_mul, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8225,7 +8729,9 @@ ipintOp(_simd_f64x2_div, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8254,7 +8760,9 @@ ipintOp(_simd_f64x2_min, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8288,7 +8796,9 @@ ipintOp(_simd_f64x2_max, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8308,7 +8818,9 @@ ipintOp(_simd_f64x2_pmin, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8328,7 +8840,9 @@ ipintOp(_simd_f64x2_pmax, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8357,7 +8871,9 @@ ipintOp(_simd_i32x4_trunc_sat_f32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8391,7 +8907,9 @@ ipintOp(_simd_i32x4_trunc_sat_f32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8406,7 +8924,9 @@ ipintOp(_simd_f32x4_convert_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8429,7 +8949,9 @@ ipintOp(_simd_f32x4_convert_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8458,7 +8980,9 @@ ipintOp(_simd_i32x4_trunc_sat_f64x2_s_zero, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8493,7 +9017,9 @@ ipintOp(_simd_i32x4_trunc_sat_f64x2_u_zero, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8510,7 +9036,9 @@ ipintOp(_simd_f64x2_convert_low_i32x4_s, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8542,7 +9070,9 @@ ipintOp(_simd_f64x2_convert_low_i32x4_u, macro()
         break # Not implemented
     end
     pushVec(v0)
-    move t4, PC
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
     nextIPIntInstruction()
 end)
 
@@ -8565,2785 +9095,1842 @@ macro checkAlignment8(mem, label)
     btpnz mem, 7, label
 end
 
-macro weakCASLoopByte(mem, value, scratch1AndOldValue, scratch2, fn)
-    validateOpcodeConfig(scratch1AndOldValue)
-    if X86_64
-        loadb [mem], scratch1AndOldValue
-    .loop:
-        move scratch1AndOldValue, scratch2
-        fn(value, scratch2)
-        batomicweakcasb scratch1AndOldValue, scratch2, [mem], .loop
-    else
-    .loop:
-        loadlinkacqb [mem], scratch1AndOldValue
-        fn(value, scratch1AndOldValue, scratch2)
-        storecondrelb ws2, scratch2, [mem]
-        bineq ws2, 0, .loop
-    end
-end
+ipintAtomicOp(_memory_atomic_notify, macro()
+    # starting at sp: count, pointer
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0
+    pushInt32(t0)
+    const miMetaSize = sizeof IPInt::MemoryIndexMetadata
+    loadi miMetaSize + IPInt::Const32Metadata::value[MC], t0
+    pushInt32(t0) # offset
 
-macro weakCASLoopHalf(mem, value, scratch1AndOldValue, scratch2, fn)
-    validateOpcodeConfig(scratch1AndOldValue)
-    if X86_64
-        loadh [mem], scratch1AndOldValue
-    .loop:
-        move scratch1AndOldValue, scratch2
-        fn(value, scratch2)
-        batomicweakcash scratch1AndOldValue, scratch2, [mem], .loop
-    else
-    .loop:
-        loadlinkacqh [mem], scratch1AndOldValue
-        fn(value, scratch1AndOldValue, scratch2)
-        storecondrelh ws2, scratch2, [mem]
-        bineq ws2, 0, .loop
-    end
-end
+    move sp, a1
 
-macro weakCASLoopInt(mem, value, scratch1AndOldValue, scratch2, fn)
-    validateOpcodeConfig(scratch1AndOldValue)
-    if X86_64
-        loadi [mem], scratch1AndOldValue
-    .loop:
-        move scratch1AndOldValue, scratch2
-        fn(value, scratch2)
-        batomicweakcasi scratch1AndOldValue, scratch2, [mem], .loop
-    else
-    .loop:
-        loadlinkacqi [mem], scratch1AndOldValue
-        fn(value, scratch1AndOldValue, scratch2)
-        storecondreli ws2, scratch2, [mem]
-        bineq ws2, 0, .loop
-    end
-end
+    operationCall(macro() cCall2(_ipint_extern_memory_atomic_notify) end)
+    bilt r0, 0, .atomic_notify_throw
 
-macro weakCASLoopQuad(mem, value, scratch1AndOldValue, scratch2, fn)
-    validateOpcodeConfig(scratch1AndOldValue)
-    if X86_64
-        loadq [mem], scratch1AndOldValue
-    .loop:
-        move scratch1AndOldValue, scratch2
-        fn(value, scratch2)
-        batomicweakcasq scratch1AndOldValue, scratch2, [mem], .loop
+    addq (StackValueSize * 4), sp
+
+    pushInt32(r0)
+    loadb miMetaSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
+    nextIPIntInstruction()
+
+.atomic_notify_throw:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess)
+end)
+
+ipintAtomicOp(_memory_atomic_wait32, macro()
+    # starting at sp: timeout, value, pointer
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0
+    pushInt32(t0)
+    loadq (StackValueSize * 3)[sp], t0
+    const miMetaSize = sizeof IPInt::MemoryIndexMetadata
+    loadi miMetaSize + IPInt::Const32Metadata::value[MC], t1
+    addq t1, t0
+    storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset
+
+    move sp, a1
+
+    operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait32) end)
+    bilt r0, 0, .atomic_wait32_throw
+
+    addq (StackValueSize * 4), sp
+
+    pushInt32(r0)
+    loadb miMetaSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
+    nextIPIntInstruction()
+
+.atomic_wait32_throw:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess)
+end)
+
+ipintAtomicOp(_memory_atomic_wait64, macro()
+    # starting at sp: timeout, value, pointer
+    loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0
+    pushInt32(t0)
+    loadq (StackValueSize * 3)[sp], t0
+    const miMetaSize = sizeof IPInt::MemoryIndexMetadata
+    loadi miMetaSize + IPInt::Const32Metadata::value[MC], t1
+    addq t1, t0
+    storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset
+
+    move sp, a1
+
+    operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait64) end)
+    bilt r0, 0, .atomic_wait64_throw
+
+    addq (StackValueSize * 4), sp
+
+    pushInt32(r0)
+    loadb miMetaSize + IPInt::Const32Metadata::instructionLength[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata)))
+    nextIPIntInstruction()
+
+.atomic_wait64_throw:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess)
+end)
+
+ipintAtomicOp(_atomic_fence, macro()
+    fence
+
+    loadb IPInt::InstructionLengthMetadata::length[MC], t0
+    advancePCByReg(t0)
+    advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata)))
+    nextIPIntInstruction()
+end)
+
+reservedAtomicOpcode(atomic_0x4)
+reservedAtomicOpcode(atomic_0x5)
+reservedAtomicOpcode(atomic_0x6)
+reservedAtomicOpcode(atomic_0x7)
+reservedAtomicOpcode(atomic_0x8)
+reservedAtomicOpcode(atomic_0x9)
+reservedAtomicOpcode(atomic_0xa)
+reservedAtomicOpcode(atomic_0xb)
+reservedAtomicOpcode(atomic_0xc)
+reservedAtomicOpcode(atomic_0xd)
+reservedAtomicOpcode(atomic_0xe)
+reservedAtomicOpcode(atomic_0xf)
+
+ipintAtomicOp(_i32_atomic_load, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    if ARM64 or ARM64E or X86_64
+        atomicloadi [t0], t2
     else
-    .loop:
-        loadlinkacqq [mem], scratch1AndOldValue
-        fn(value, scratch1AndOldValue, scratch2)
-        storecondrelq ws2, scratch2, [mem]
-        bineq ws2, 0, .loop
+        error
     end
-end
+    pushInt32(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicLoad(mem, dst)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
+ipintAtomicOp(_i64_atomic_load, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
     if ARM64 or ARM64E or X86_64
-        atomicloadi [mem], dst
+        atomicloadq [t0], t2
     else
         error
     end
-end
+    pushInt64(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicLoad(mem, dst)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
+ipintAtomicOp(_i32_atomic_load8_u, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
     if ARM64 or ARM64E or X86_64
-        atomicloadq [mem], dst
+        atomicloadb [t0], t2
     else
         error
     end
-end
+    pushInt32(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicLoad8(mem, dst)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
+ipintAtomicOp(_i32_atomic_load16_u, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
     if ARM64 or ARM64E or X86_64
-        atomicloadb [mem], dst
+        atomicloadh [t0], t2
     else
         error
     end
-end
+    pushInt32(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicLoad16(mem, dst)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
+ipintAtomicOp(_i64_atomic_load8_u, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
     if ARM64 or ARM64E or X86_64
-        atomicloadh [mem], dst
+        atomicloadb [t0], t2
     else
         error
     end
-end
+    pushInt64(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicLoad8(mem, dst)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
+ipintAtomicOp(_i64_atomic_load16_u, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
     if ARM64 or ARM64E or X86_64
-        atomicloadb [mem], dst
+        atomicloadh [t0], t2
     else
         error
     end
-end
+    pushInt64(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicLoad16(mem, dst)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
+ipintAtomicOp(_i64_atomic_load32_u, macro()
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
     if ARM64 or ARM64E or X86_64
-        atomicloadh [mem], dst
+        atomicloadi [t0], t2
     else
         error
     end
+    pushInt64(t2)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
+
+macro weakCASLoopByte(mem, value, scratch1AndOldValue, scratch2, fn)
+    validateOpcodeConfig(scratch1AndOldValue)
+    if X86_64
+        loadb [mem], scratch1AndOldValue
+    .loop:
+        move scratch1AndOldValue, scratch2
+        fn(value, scratch2)
+        batomicweakcasb scratch1AndOldValue, scratch2, [mem], .loop
+    else
+    .loop:
+        loadlinkacqb [mem], scratch1AndOldValue
+        fn(value, scratch1AndOldValue, scratch2)
+        storecondrelb ws2, scratch2, [mem]
+        bineq ws2, 0, .loop
+    end
 end
 
-macro doI64AtomicLoad32(mem, dst)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    if ARM64 or ARM64E or X86_64
-        atomicloadi [mem], dst
+macro weakCASLoopHalf(mem, value, scratch1AndOldValue, scratch2, fn)
+    validateOpcodeConfig(scratch1AndOldValue)
+    if X86_64
+        loadh [mem], scratch1AndOldValue
+    .loop:
+        move scratch1AndOldValue, scratch2
+        fn(value, scratch2)
+        batomicweakcash scratch1AndOldValue, scratch2, [mem], .loop
     else
-        error
+    .loop:
+        loadlinkacqh [mem], scratch1AndOldValue
+        fn(value, scratch1AndOldValue, scratch2)
+        storecondrelh ws2, scratch2, [mem]
+        bineq ws2, 0, .loop
     end
 end
 
-macro doI32AtomicStore(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    if ARM64E
-        atomicxchgi val, [memCopy], val
-    elsif X86_64
-        atomicxchgi val, [memCopy]
-    elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
-            move value, newValue
-        end)
+macro weakCASLoopInt(mem, value, scratch1AndOldValue, scratch2, fn)
+    validateOpcodeConfig(scratch1AndOldValue)
+    if X86_64
+        loadi [mem], scratch1AndOldValue
+    .loop:
+        move scratch1AndOldValue, scratch2
+        fn(value, scratch2)
+        batomicweakcasi scratch1AndOldValue, scratch2, [mem], .loop
     else
-        error
+    .loop:
+        loadlinkacqi [mem], scratch1AndOldValue
+        fn(value, scratch1AndOldValue, scratch2)
+        storecondreli ws2, scratch2, [mem]
+        bineq ws2, 0, .loop
+    end
+end
+
+macro weakCASLoopQuad(mem, value, scratch1AndOldValue, scratch2, fn)
+    validateOpcodeConfig(scratch1AndOldValue)
+    if X86_64
+        loadq [mem], scratch1AndOldValue
+    .loop:
+        move scratch1AndOldValue, scratch2
+        fn(value, scratch2)
+        batomicweakcasq scratch1AndOldValue, scratch2, [mem], .loop
+    else
+    .loop:
+        loadlinkacqq [mem], scratch1AndOldValue
+        fn(value, scratch1AndOldValue, scratch2)
+        storecondrelq ws2, scratch2, [mem]
+        bineq ws2, 0, .loop
     end
 end
 
-macro doI64AtomicStore(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+
+ipintAtomicOp(_i32_atomic_store, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgq val, [memCopy], val
+        atomicxchgi t3, [t2], t3
     elsif X86_64
-        atomicxchgq val, [memCopy]
+        atomicxchgi t3, [t2]
     elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicStore8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_store, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgb val, [memCopy], val
+        atomicxchgq t3, [t2], t3
     elsif X86_64
-        atomicxchgb val, [memCopy]
+        atomicxchgq t3, [t2]
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicStore16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_store8_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgh val, [memCopy], val
+        atomicxchgb t3, [t2], t3
     elsif X86_64
-        atomicxchgh val, [memCopy]
+        atomicxchgb t3, [t2]
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicStore8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_store16_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgb val, [memCopy], val
+        atomicxchgh t3, [t2], t3
     elsif X86_64
-        atomicxchgb val, [memCopy]
+        atomicxchgh t3, [t2]
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicStore16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_store8_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgh val, [memCopy], val
+        atomicxchgb t3, [t2], t3
     elsif X86_64
-        atomicxchgh val, [memCopy]
+        atomicxchgb t3, [t2]
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicStore32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_store16_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgi val, [memCopy], val
+        atomicxchgh t3, [t2], t3
     elsif X86_64
-        atomicxchgi val, [memCopy]
+        atomicxchgh t3, [t2]
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwAdd(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_store32_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddi val, [memCopy], mem
+        atomicxchgi t3, [t2], t3
     elsif X86_64
-        atomicxchgaddi val, [memCopy]
-        move val, mem
+        atomicxchgi t3, [t2]
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
-            addi value, oldValue, newValue
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
+            move value, newValue
         end)
     else
         error
     end
-end
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAdd(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw_add, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddq val, [memCopy], mem
+        atomicxchgaddi t3, [t2], t0
     elsif X86_64
-        atomicxchgaddq val, [memCopy]
-        move val, mem
+        atomicxchgaddi t3, [t2]
+        move t3, t0
     elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
-            addq value, oldValue, newValue
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
+            addi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
+
+ipintAtomicOp(_i64_atomic_rmw_add, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
+    if ARM64E
+        atomicxchgaddq t3, [t2], t0
+    elsif X86_64
+        atomicxchgaddq t3, [t2]
+        move t3, t0
+    elsif ARM64
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
+            addq value, oldValue, newValue
+        end)
+    else
+        error
+    end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwAdd8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw8_add_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddb val, [memCopy], mem
+        atomicxchgaddb t3, [t2], t0
     elsif X86_64
-        atomicxchgaddb val, [memCopy]
-        move val, mem
-        andi 0xff, mem
+        atomicxchgaddb t3, [t2]
+        move t3, t0
+        andi 0xff, t0
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             addi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwAdd16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw16_add_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddh val, [memCopy], mem
+        atomicxchgaddh t3, [t2], t0
     elsif X86_64
-        atomicxchgaddh val, [memCopy]
-        move val, mem
-        andi 0xffff, mem
+        atomicxchgaddh t3, [t2]
+        move t3, t0
+        andi 0xffff, t0
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             addi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAdd8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw8_add_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddb val, [memCopy], mem
+        atomicxchgaddb t3, [t2], t0
     elsif X86_64
-        atomicxchgaddb val, [memCopy]
-        move val, mem
-        andi 0xff, mem
+        atomicxchgaddb t3, [t2]
+        move t3, t0
+        andi 0xff, t0
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             addi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAdd16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw16_add_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddh val, [memCopy], mem
+        atomicxchgaddh t3, [t2], t0
     elsif X86_64
-        atomicxchgaddh val, [memCopy]
-        move val, mem
-        andi 0xffff, mem
+        atomicxchgaddh t3, [t2]
+        move t3, t0
+        andi 0xffff, t0
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             addi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAdd32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw32_add_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgaddi val, [memCopy], mem
+        atomicxchgaddi t3, [t2], t0
     elsif X86_64
-        atomicxchgaddi val, [memCopy]
-        move val, mem
-        ori 0, mem
+        atomicxchgaddi t3, [t2]
+        move t3, t0
+        ori 0, t0
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             addi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwSub(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw_sub, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negi val
-        atomicxchgaddi val, [memCopy], mem
+        negi t3
+        atomicxchgaddi t3, [t2], t0
     elsif X86_64
-        negi val
-        atomicxchgaddi val, [memCopy]
-        move val, mem
+        negi t3
+        atomicxchgaddi t3, [t2]
+        move t3, t0
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subi oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwSub(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw_sub, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negq val
-        atomicxchgaddq val, [memCopy], mem
+        negq t3
+        atomicxchgaddq t3, [t2], t0
     elsif X86_64
-        negq val
-        atomicxchgaddq val, [memCopy]
-        move val, mem
+        negq t3
+        atomicxchgaddq t3, [t2]
+        move t3, t0
     elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subq oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwSub8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw8_sub_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negi val
-        atomicxchgaddb val, [memCopy], mem
+        negi t3
+        atomicxchgaddb t3, [t2], t0
     elsif X86_64
-        negi val
-        atomicxchgaddb val, [memCopy]
-        move val, mem
-        andi 0xff, mem
+        negi t3
+        atomicxchgaddb t3, [t2]
+        move t3, t0
+        andi 0xff, t0
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subi oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwSub16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw16_sub_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negi val
-        atomicxchgaddh val, [memCopy], mem
+        negi t3
+        atomicxchgaddh t3, [t2], t0
     elsif X86_64
-        negi val
-        atomicxchgaddh val, [memCopy]
-        move val, mem
-        andi 0xffff, mem
+        negi t3
+        atomicxchgaddh t3, [t2]
+        move t3, t0
+        andi 0xffff, t0
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subi oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwSub8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw8_sub_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negq val
-        atomicxchgaddb val, [memCopy], mem
+        negq t3
+        atomicxchgaddb t3, [t2], t0
     elsif X86_64
-        negq val
-        atomicxchgaddb val, [memCopy]
-        move val, mem
-        andi 0xff, mem
+        negq t3
+        atomicxchgaddb t3, [t2]
+        move t3, t0
+        andi 0xff, t0
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subi oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwSub16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw16_sub_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negq val
-        atomicxchgaddh val, [memCopy], mem
+        negq t3
+        atomicxchgaddh t3, [t2], t0
     elsif X86_64
-        negq val
-        atomicxchgaddh val, [memCopy]
-        move val, mem
-        andi 0xffff, mem
+        negq t3
+        atomicxchgaddh t3, [t2]
+        move t3, t0
+        andi 0xffff, t0
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subi oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwSub32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw32_sub_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        negq val
-        atomicxchgaddi val, [memCopy], mem
+        negq t3
+        atomicxchgaddi t3, [t2], t0
     elsif X86_64
-        negq val
-        atomicxchgaddi val, [memCopy]
-        move val, mem
-        ori 0, mem
+        negq t3
+        atomicxchgaddi t3, [t2]
+        move t3, t0
+        ori 0, t0
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             subi oldValue, value, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwAnd(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw_and, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        noti val
-        atomicxchgcleari val, [memCopy], mem
+        noti t3
+        atomicxchgcleari t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAnd(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw_and, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        notq val
-        atomicxchgclearq val, [memCopy], mem
+        notq t3
+        atomicxchgclearq t3, [t2], t0
     elsif X86_64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andq value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwAnd8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw8_and_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        noti val
-        atomicxchgclearb val, [memCopy], mem
+        noti t3
+        atomicxchgclearb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwAnd16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw16_and_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        noti val
-        atomicxchgclearh val, [memCopy], mem
+        noti t3
+        atomicxchgclearh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAnd8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw8_and_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        notq val
-        atomicxchgclearb val, [memCopy], mem
+        notq t3
+        atomicxchgclearb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAnd16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw16_and_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        notq val
-        atomicxchgclearh val, [memCopy], mem
+        notq t3
+        atomicxchgclearh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwAnd32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw32_and_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        notq val
-        atomicxchgcleari val, [memCopy], mem
+        notq t3
+        atomicxchgcleari t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             andq value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             andi value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwOr(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw_or, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgori val, [memCopy], mem
+        atomicxchgori t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             ori value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             ori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwOr(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw_or, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgorq val, [memCopy], mem
+        atomicxchgorq t3, [t2], t0
     elsif X86_64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst)
             orq value, dst
         end)
     elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
             orq value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwOr8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw8_or_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgorb val, [memCopy], mem
+        atomicxchgorb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             orq value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             ori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwOr16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw16_or_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgorh val, [memCopy], mem
+        atomicxchgorh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             orq value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             ori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwOr8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw8_or_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgorb val, [memCopy], mem
+        atomicxchgorb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             orq value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             ori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwOr16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw16_or_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgorh val, [memCopy], mem
+        atomicxchgorh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             orq value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             ori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwOr32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw32_or_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgori val, [memCopy], mem
+        atomicxchgori t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             orq value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             ori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwXor(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw_xor, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxori val, [memCopy], mem
+        atomicxchgxori t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXor(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw_xor, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxorq val, [memCopy], mem
+        atomicxchgxorq t3, [t2], t0
     elsif X86_64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xorq value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwXor8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw8_xor_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxorb val, [memCopy], mem
+        atomicxchgxorb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwXor16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw16_xor_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxorh val, [memCopy], mem
+        atomicxchgxorh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXor8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw8_xor_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxorb val, [memCopy], mem
+        atomicxchgxorb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXor16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw16_xor_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxorh val, [memCopy], mem
+        atomicxchgxorh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xori value, oldValue, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXor32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw32_xor_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgxori val, [memCopy], mem
+        atomicxchgxori t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             xorq value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             xori value, oldValue, newValue
         end)
     else
         error
     end
-end
-
-macro doI32AtomicRmwXchg(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    if ARM64E
-        atomicxchgi val, [memCopy], mem
-    elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
-            move value, dst
-        end)
-    elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
-            move value, newValue
-        end)
-    else
-        error
-    end
-end
-
-macro doI64AtomicRmwXchg(mem, val, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    if ARM64E
-        atomicxchgq val, [memCopy], mem
-    elsif X86_64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst)
-            move value, dst
-        end)
-    elsif ARM64
-        weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
-            move value, newValue
-        end)
-    else
-        error
-    end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwXchg8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw_xchg, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgb val, [memCopy], mem
+        atomicxchgi t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
             move value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI32AtomicRmwXchg16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw_xchg, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgh val, [memCopy], mem
+        atomicxchgq t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst)
             move value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    pushInt64(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXchg8(mem, val, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw8_xchg_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgb val, [memCopy], mem
+        atomicxchgb t3, [t2], t0
     elsif X86_64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             move value, dst
         end)
     elsif ARM64
-        weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXchg16(mem, val, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i32_atomic_rmw16_xchg_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgh val, [memCopy], mem
+        atomicxchgh t3, [t2], t0
     elsif X86_64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
             move value, dst
         end)
     elsif ARM64
-        weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
+    pushInt32(t0)
+    advancePCByReg(t4)
+    nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-macro doI64AtomicRmwXchg32(mem, val, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
+ipintAtomicOp(_i64_atomic_rmw8_xchg_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
     if ARM64E
-        atomicxchgi val, [memCopy], mem
+        atomicxchgb t3, [t2], t0
     elsif X86_64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst)
+        weakCASLoopByte(t2, t3, t0, t1, macro (value, dst)
             move value, dst
         end)
     elsif ARM64
-        weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue)
+        weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue)
             move value, newValue
         end)
     else
         error
     end
-end
-
-macro doI32AtomicCmpxchg(mem, expected, newVal, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    andq 0xffffffff, mem
-    if ARM64E or X86_64
-        atomicweakcasi mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeInt(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-macro doI64AtomicCmpxchg(mem, expected, newVal, memCopy, scratch)
-    checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    if ARM64E or X86_64
-        atomicweakcasq mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeQuad(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-macro doI32AtomicCmpxchg8(mem, expected, newVal, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    andq 0xff, mem
-    if ARM64E or X86_64
-        atomicweakcasb mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeByte(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-macro doI32AtomicCmpxchg16(mem, expected, newVal, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    andq 0xffff, mem
-    if ARM64E or X86_64
-        atomicweakcash mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeHalf(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-macro doI64AtomicCmpxchg8(mem, expected, newVal, memCopy, scratch)
-    noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    andq 0xff, mem
-    if ARM64E or X86_64
-        atomicweakcasb mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeByte(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-macro doI64AtomicCmpxchg16(mem, expected, newVal, memCopy, scratch)
-    checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    andq 0xffff, mem
-    if ARM64E or X86_64
-        atomicweakcash mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeHalf(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-macro doI64AtomicCmpxchg32(mem, expected, newVal, memCopy, scratch)
-    checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess)
-    move mem, memCopy
-    move expected, mem
-    andq 0xffffffff, mem
-    if ARM64E or X86_64
-        atomicweakcasi mem, newVal, [memCopy]
-    elsif ARM64
-        weakCASExchangeInt(memCopy, newVal, mem, scratch, expected)
-    else
-        error
-    end
-end
-
-ipintAtomicOp(_memory_atomic_notify, macro()
-    # starting at sp: count, pointer
-    loadb IPInt::AtomicMemoryAccessMetadata::memoryIndex[MC], t0
-    pushInt32(t0)
-    loadq IPInt::AtomicMemoryAccessMetadata::offset[MC], t0
-    pushInt32(t0) # offset
-
-    move sp, a1
-
-    operationCall(macro() cCall2(_ipint_extern_memory_atomic_notify) end)
-    bilt r0, 0, _ipint_throw_OutOfBoundsMemoryAccess
-
-    addq (StackValueSize * 4), sp
-
-    pushInt32(r0)
-    loadb IPInt::AtomicMemoryAccessMetadata::instructionLength[MC], t0
-    advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::AtomicMemoryAccessMetadata)))
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_memory_atomic_wait32, macro()
-    # starting at sp: timeout, value, pointer
-    loadb IPInt::AtomicMemoryAccessMetadata::memoryIndex[MC], t0
-    pushInt32(t0)
-    loadq (StackValueSize * 3)[sp], t0
-    loadq IPInt::AtomicMemoryAccessMetadata::offset[MC], t1
-    addq t1, t0
-    storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset
-
-    move sp, a1
-
-    operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait32) end)
-    bilt r0, 0, _ipint_throw_OutOfBoundsMemoryAccess
-
-    addq (StackValueSize * 4), sp
-
-    pushInt32(r0)
-    loadb IPInt::AtomicMemoryAccessMetadata::instructionLength[MC], t0
-    advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::AtomicMemoryAccessMetadata)))
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_memory_atomic_wait64, macro()
-    # starting at sp: timeout, value, pointer
-    loadb IPInt::AtomicMemoryAccessMetadata::memoryIndex[MC], t0
-    pushInt32(t0)
-    loadq (StackValueSize * 3)[sp], t0
-    loadq IPInt::AtomicMemoryAccessMetadata::offset[MC], t1
-    addq t1, t0
-    storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset
-
-    move sp, a1
-
-    operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait64) end)
-    bilt r0, 0, _ipint_throw_OutOfBoundsMemoryAccess
-
-    addq (StackValueSize * 4), sp
-
-    pushInt32(r0)
-    loadb IPInt::AtomicMemoryAccessMetadata::instructionLength[MC], t0
-    advancePCByReg(t0)
-    advanceMC(constexpr (sizeof(IPInt::AtomicMemoryAccessMetadata)))
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_atomic_fence, macro()
-    fence
-    leap 1[t4], PC
-    nextIPIntInstruction()
-end)
-
-reservedAtomicOpcode(atomic_0x4)
-reservedAtomicOpcode(atomic_0x5)
-reservedAtomicOpcode(atomic_0x6)
-reservedAtomicOpcode(atomic_0x7)
-reservedAtomicOpcode(atomic_0x8)
-reservedAtomicOpcode(atomic_0x9)
-reservedAtomicOpcode(atomic_0xa)
-reservedAtomicOpcode(atomic_0xb)
-reservedAtomicOpcode(atomic_0xc)
-reservedAtomicOpcode(atomic_0xd)
-reservedAtomicOpcode(atomic_0xe)
-reservedAtomicOpcode(atomic_0xf)
-
-ipintAtomicOp(_i32_atomic_load, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_load_slow_path)
-    doI32AtomicLoad(t0, t2)
-    pushInt32(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_load, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_load_slow_path)
-    doI64AtomicLoad(t0, t2)
-    pushInt64(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_load8_u, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_load8_u_slow_path)
-    doI32AtomicLoad8(t0, t2)
-    pushInt32(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_load16_u, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_load16_u_slow_path)
-    doI32AtomicLoad16(t0, t2)
-    pushInt32(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_load8_u, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_load8_u_slow_path)
-    doI64AtomicLoad8(t0, t2)
-    pushInt64(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_load16_u, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_load16_u_slow_path)
-    doI64AtomicLoad16(t0, t2)
-    pushInt64(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_load32_u, macro()
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_load32_u_slow_path)
-    doI64AtomicLoad32(t0, t2)
-    pushInt64(t2)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_store, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_store_slow_path)
-    doI32AtomicStore(t0, t3, t2, t1)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_store, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_store_slow_path)
-    doI64AtomicStore(t0, t3, t2, t1)
-    leap 2[t4], PC
+    pushInt64(t0)
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
 end)
 
-ipintAtomicOp(_i32_atomic_store8_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_store8_u_slow_path)
-    doI32AtomicStore8(t0, t3, t2, t1)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_store16_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_store16_u_slow_path)
-    doI32AtomicStore16(t0, t3, t2, t1)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_store8_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_store8_u_slow_path)
-    doI64AtomicStore8(t0, t3, t2, t1)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_store16_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_store16_u_slow_path)
-    doI64AtomicStore16(t0, t3, t2, t1)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_store32_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_store32_u_slow_path)
-    doI64AtomicStore32(t0, t3, t2, t1)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw_add, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_add_slow_path)
-    doI32AtomicRmwAdd(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_add, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_add_slow_path)
-    doI64AtomicRmwAdd(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_add_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_add_u_slow_path)
-    doI32AtomicRmwAdd8(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_add_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_add_u_slow_path)
-    doI32AtomicRmwAdd16(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_add_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_add_u_slow_path)
-    doI64AtomicRmwAdd8(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_add_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_add_u_slow_path)
-    doI64AtomicRmwAdd16(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_add_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_add_u_slow_path)
-    doI64AtomicRmwAdd32(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw_sub, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_sub_slow_path)
-    doI32AtomicRmwSub(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_sub, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_sub_slow_path)
-    doI64AtomicRmwSub(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_sub_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_sub_u_slow_path)
-    doI32AtomicRmwSub8(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_sub_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_sub_u_slow_path)
-    doI32AtomicRmwSub16(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_sub_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_sub_u_slow_path)
-    doI64AtomicRmwSub8(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_sub_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_sub_u_slow_path)
-    doI64AtomicRmwSub16(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_sub_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_sub_u_slow_path)
-    doI64AtomicRmwSub32(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw_and, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_and_slow_path)
-    doI32AtomicRmwAnd(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_and, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_and_slow_path)
-    doI64AtomicRmwAnd(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_and_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_and_u_slow_path)
-    doI32AtomicRmwAnd8(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_and_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_and_u_slow_path)
-    doI32AtomicRmwAnd16(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_and_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_and_u_slow_path)
-    doI64AtomicRmwAnd8(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_and_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_and_u_slow_path)
-    doI64AtomicRmwAnd16(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_and_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_and_u_slow_path)
-    doI64AtomicRmwAnd32(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw_or, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_or_slow_path)
-    doI32AtomicRmwOr(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_or, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_or_slow_path)
-    doI64AtomicRmwOr(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_or_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_or_u_slow_path)
-    doI32AtomicRmwOr8(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_or_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_or_u_slow_path)
-    doI32AtomicRmwOr16(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_or_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_or_u_slow_path)
-    doI64AtomicRmwOr8(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_or_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_or_u_slow_path)
-    doI64AtomicRmwOr16(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_or_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_or_u_slow_path)
-    doI64AtomicRmwOr32(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw_xor, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_xor_slow_path)
-    doI32AtomicRmwXor(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_xor, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_xor_slow_path)
-    doI64AtomicRmwXor(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_xor_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_xor_u_slow_path)
-    doI32AtomicRmwXor8(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_xor_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_xor_u_slow_path)
-    doI32AtomicRmwXor16(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_xor_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_xor_u_slow_path)
-    doI64AtomicRmwXor8(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_xor_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_xor_u_slow_path)
-    doI64AtomicRmwXor16(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_xor_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_xor_u_slow_path)
-    doI64AtomicRmwXor32(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw_xchg, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_xchg_slow_path)
-    doI32AtomicRmwXchg(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_xchg, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_xchg_slow_path)
-    doI64AtomicRmwXchg(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_xchg_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_xchg_u_slow_path)
-    doI32AtomicRmwXchg8(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_xchg_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_xchg_u_slow_path)
-    doI32AtomicRmwXchg16(t0, t3, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_xchg_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_xchg_u_slow_path)
-    doI64AtomicRmwXchg8(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_xchg_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_xchg_u_slow_path)
-    doI64AtomicRmwXchg16(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_xchg_u, macro()
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_xchg_u_slow_path)
-    doI64AtomicRmwXchg32(t0, t3, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-macro weakCASExchangeByte(mem, value, expected, scratch, scratch2)
-    if ARM64
-    validateOpcodeConfig(scratch2)
-    .loop:
-        loadlinkacqb [mem], scratch2
-        bqneq expected, scratch2, .fail
-        storecondrelb scratch, value, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .fail:
-        storecondrelb scratch, scratch2, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .done:
-        move scratch2, expected
-    else
-        error
-    end
-end
-
-macro weakCASExchangeHalf(mem, value, expected, scratch, scratch2)
-    if ARM64
-    validateOpcodeConfig(scratch2)
-    .loop:
-        loadlinkacqh [mem], scratch2
-        bqneq expected, scratch2, .fail
-        storecondrelh scratch, value, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .fail:
-        storecondrelh scratch, scratch2, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .done:
-        move scratch2, expected
-    else
-        error
-    end
-end
-
-macro weakCASExchangeInt(mem, value, expected, scratch, scratch2)
-    if ARM64
-    validateOpcodeConfig(scratch2)
-    .loop:
-        loadlinkacqi [mem], scratch2
-        bqneq expected, scratch2, .fail
-        storecondreli scratch, value, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .fail:
-        storecondreli scratch, scratch2, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .done:
-        move scratch2, expected
-    else
-        error
-    end
-end
-
-macro weakCASExchangeQuad(mem, value, expected, scratch, scratch2)
-    if ARM64
-    validateOpcodeConfig(scratch2)
-    .loop:
-        loadlinkacqq [mem], scratch2
-        bqneq expected, scratch2, .fail
-        storecondrelq scratch, value, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .fail:
-        storecondrelq scratch, scratch2, [mem]
-        bieq scratch, 0, .done
-        jmp .loop
-    .done:
-        move scratch2, expected
-    else
-        error
-    end
-end
-
-ipintAtomicOp(_i32_atomic_rmw_cmpxchg, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_cmpxchg_slow_path)
-    doI32AtomicCmpxchg(t0, t3, t7, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw_cmpxchg, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_cmpxchg_slow_path)
-    doI64AtomicCmpxchg(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw8_cmpxchg_u, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_cmpxchg_u_slow_path)
-    doI32AtomicCmpxchg8(t0, t3, t7, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i32_atomic_rmw16_cmpxchg_u, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_cmpxchg_u_slow_path)
-    doI32AtomicCmpxchg16(t0, t3, t7, t2, t1)
-    pushInt32(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw8_cmpxchg_u, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_cmpxchg_u_slow_path)
-    doI64AtomicCmpxchg8(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw16_cmpxchg_u, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_cmpxchg_u_slow_path)
-    doI64AtomicCmpxchg16(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-ipintAtomicOp(_i64_atomic_rmw32_cmpxchg_u, macro()
-    popInt64(t7)
-    popInt64(t3)
-    popMemoryIndex(t0)
-    loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_cmpxchg_u_slow_path)
-    doI64AtomicCmpxchg32(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    leap 2[t4], PC
-    nextIPIntInstruction()
-end)
-
-#######################################
-## ULEB128 decoding logic for locals ##
-#######################################
-
-macro decodeULEB128(result)
-    # result should already be the first byte.
-    andq 0x7f, result
-    move 7, t2 # t1 holds the shift.
-    validateOpcodeConfig(t3)
-.loop:
-    loadb [PC], t3
-    andq t3, 0x7f, t1
-    lshiftq t2, t1
-    orq t1, result
-    addq 7, t2
-    advancePC(1)
-    bbaeq t3, 128, .loop
-end
-
-.ipint_local_get_slow_path:
-    decodeULEB128(t0)
-    localGetPostDecode()
-
-.ipint_local_set_slow_path:
-    decodeULEB128(t0)
-    localSetPostDecode()
-
-.ipint_local_tee_slow_path:
-    decodeULEB128(t0)
-    localTeePostDecode()
-
-##########################################
-## Out-of-line LEB128 decode slow paths ##
-##########################################
-
-.ipint_i32_const_slow_path:
-    leap 1[PC], t4
-    decodeLEBVarSInt32(t0, t4, t1, t2)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_const_slow_path:
-    leap 1[PC], t4
-    decodeLEBVarSInt64(t0, t4, t1, t2)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-##################################################
-## Out-of-line slow paths for memory load/store ##
-##################################################
-
-# The handler's fast path pops values and branches here on multi-byte memarg.
-# t0 = wasm address (from popMemoryIndex), t3 = data value (for int stores),
-# ft0 = data value (for float stores). These must survive loadStoreMakePointerSlow.
-# For int stores, t3 is saved/restored around the macro since t3 is used as scratch.
-
-.ipint_i32_load_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadi [t0], t1
-    pushInt32(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    loadq [t0], t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_f32_load_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadf [t0], ft0
-    pushFloat32(ft0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_f64_load_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    loadd [t0], ft0
-    pushFloat64(ft0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_load8s_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    loadbsi [t0], t1
-    pushInt32(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_load8u_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    loadb [t0], t1
-    pushInt32(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_load16s_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    loadhsi [t0], t1
-    pushInt32(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_load16u_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    loadh [t0], t1
-    pushInt32(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load8s_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    loadbsq [t0], t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load8u_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    loadb [t0], t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load16s_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    loadhsq [t0], t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load16u_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    loadh [t0], t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load32s_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadi [t0], t1
-    sxi2q t1, t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_load32u_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadi [t0], t1
-    pushInt64(t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_store_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    storei t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_store_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    storeq t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_f32_store_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    storef ft0, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_f64_store_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    stored ft0, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_store8_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    storeb t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_store16_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    storeh t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_store8_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    storeb t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_store16_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    storeh t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_store32_mem_slow_path:
-    leap 1[PC], t4
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    storei t3, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-###################################################
-## Out-of-line slow paths for SIMD memory access ##
-###################################################
-
-# t0 = wasm address (from popMemoryIndex before branching).
-# t4 = cursor pointing to start of memarg (past SIMD opcode, set by simd_prefix).
-# After loadStoreMakePointerSlow, t4 points past the memarg.
-
-.simd_v128_load_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 16, t1, t2, t5, t6)
-    loadv [t0], v0
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load_8x8s_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoad8x8s()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load_8x8u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoad8x8u()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load_16x4s_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoad16x4s()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load_16x4u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoad16x4u()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load_32x2s_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoad32x2s()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load_32x2u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoad32x2u()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load8_splat_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    simdLoadSplat8()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load16_splat_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    simdLoadSplat16()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load32_splat_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    simdLoadSplat32()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load64_splat_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    simdLoadSplat64()
-    pushVec(v0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_store_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 16, t1, t2, t5, t6)
-    storev v0, [t0]
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load32_zero_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadi [t0], t0
-    subp V128ISize, sp
-    storei t0, [sp]
-    storei 0, 4[sp]
-    storeq 0, 8[sp]
-    move t4, PC
-    nextIPIntInstruction()
-
-.simd_v128_load64_zero_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    loadq [t0], t0
-    subp V128ISize, sp
-    storeq t0, [sp]
-    storeq 0, 8[sp]
-    move t4, PC
-    nextIPIntInstruction()
-
-# Load lane slow paths: v0 = vector (already popped), t0 = wasm addr.
-# t4 points past memarg after loadStoreMakePointerSlow. Lane index is at [t4].
-
-.simd_v128_load8_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    loadb [t0], t0
-    loadb [t4], t1
-    andi ImmLaneIdx16Mask, t1
-    pushVec(v0)
-    storeb t0, [sp, t1]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-.simd_v128_load16_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    loadh [t0], t0
-    loadb [t4], t1
-    andi ImmLaneIdx8Mask, t1
-    pushVec(v0)
-    storeh t0, [sp, t1, 2]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-.simd_v128_load32_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadi [t0], t0
-    loadb [t4], t1
-    andi ImmLaneIdx4Mask, t1
-    pushVec(v0)
-    storei t0, [sp, t1, 4]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-.simd_v128_load64_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    loadq [t0], t0
-    loadb [t4], t1
-    andi ImmLaneIdx2Mask, t1
-    pushVec(v0)
-    storeq t0, [sp, t1, 8]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-# Store lane slow paths: v0 = vector (already popped), t0 = wasm addr.
-# t4 points past memarg. Lane index is at [t4].
-
-.simd_v128_store8_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    loadb [t4], t1
-    andi ImmLaneIdx16Mask, t1
-    pushVec(v0)
-    loadb [sp, t1], t1
-    addp V128ISize, sp
-    storeb t1, [t0]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-.simd_v128_store16_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    loadb [t4], t1
-    andi ImmLaneIdx8Mask, t1
-    pushVec(v0)
-    loadh [sp, t1, 2], t1
-    addp V128ISize, sp
-    storeh t1, [t0]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-.simd_v128_store32_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    loadb [t4], t1
-    andi ImmLaneIdx4Mask, t1
-    pushVec(v0)
-    loadi [sp, t1, 4], t1
-    addp V128ISize, sp
-    storei t1, [t0]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-.simd_v128_store64_lane_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    loadb [t4], t1
-    andi ImmLaneIdx2Mask, t1
-    pushVec(v0)
-    loadq [sp, t1, 8], t1
-    addp V128ISize, sp
-    storeq t1, [t0]
-    leap 1[t4], PC
-    nextIPIntInstruction()
-
-#########################################################
-## Out-of-line slow paths for atomic memory operations ##
-#########################################################
-
-# t0 = wasm address (from popMemoryIndex before branching).
-# t4 = cursor pointing to start of memarg (past atomic sub-opcode, set by atomic_prefix).
-# t3 = data value (for store/RMW ops, survives loadStoreMakePointerSlow).
-# t7 = new value for CAS (must be push/popped around loadStoreMakePointerSlow).
-# After loadStoreMakePointerSlow, t4 points past the memarg.
-
-.ipint_i32_atomic_load_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicLoad(t0, t2)
-    pushInt32(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_load_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicLoad(t0, t2)
-    pushInt64(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_load8_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicLoad8(t0, t2)
-    pushInt32(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_load16_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicLoad16(t0, t2)
-    pushInt32(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_load8_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicLoad8(t0, t2)
-    pushInt64(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_load16_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicLoad16(t0, t2)
-    pushInt64(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_load32_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicLoad32(t0, t2)
-    pushInt64(t2)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_store_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicStore(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_store_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicStore(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_store8_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicStore8(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_store16_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicStore16(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_store8_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicStore8(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_store16_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicStore16(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_store32_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicStore32(t0, t3, t2, t1)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw_add_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicRmwAdd(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw_add_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicRmwAdd(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw8_add_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicRmwAdd8(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw16_add_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicRmwAdd16(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw8_add_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicRmwAdd8(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw16_add_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicRmwAdd16(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw32_add_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicRmwAdd32(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw_sub_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicRmwSub(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw_sub_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicRmwSub(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw8_sub_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicRmwSub8(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw16_sub_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicRmwSub16(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw8_sub_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicRmwSub8(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw16_sub_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicRmwSub16(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw32_sub_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicRmwSub32(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw_and_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicRmwAnd(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw_and_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicRmwAnd(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw8_and_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicRmwAnd8(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw16_and_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicRmwAnd16(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw8_and_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicRmwAnd8(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw16_and_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicRmwAnd16(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw32_and_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicRmwAnd32(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw_or_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicRmwOr(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw_or_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicRmwOr(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw8_or_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicRmwOr8(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw16_or_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicRmwOr16(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw8_or_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicRmwOr8(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw16_or_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicRmwOr16(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw32_or_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicRmwOr32(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw_xor_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicRmwXor(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw_xor_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicRmwXor(t0, t3, t2, t1)
+ipintAtomicOp(_i64_atomic_rmw16_xchg_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
+    if ARM64E
+        atomicxchgh t3, [t2], t0
+    elsif X86_64
+        weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst)
+            move value, dst
+        end)
+    elsif ARM64
+        weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue)
+            move value, newValue
+        end)
+    else
+        error
+    end
     pushInt64(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i32_atomic_rmw8_xor_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicRmwXor8(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
+ipintAtomicOp(_i64_atomic_rmw32_xchg_u, macro()
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
+    if ARM64E
+        atomicxchgi t3, [t2], t0
+    elsif X86_64
+        weakCASLoopInt(t2, t3, t0, t1, macro (value, dst)
+            move value, dst
+        end)
+    elsif ARM64
+        weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue)
+            move value, newValue
+        end)
+    else
+        error
+    end
+    pushInt64(t0)
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i32_atomic_rmw16_xor_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicRmwXor16(t0, t3, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
+macro weakCASExchangeByte(mem, value, expected, scratch, scratch2)
+    if ARM64
+    validateOpcodeConfig(scratch2)
+    .loop:
+        loadlinkacqb [mem], scratch2
+        bqneq expected, scratch2, .fail
+        storecondrelb scratch, value, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .fail:
+        storecondrelb scratch, scratch2, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .done:
+        move scratch2, expected
+    else
+        error
+    end
+end
 
-.ipint_i64_atomic_rmw8_xor_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicRmwXor8(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
+macro weakCASExchangeHalf(mem, value, expected, scratch, scratch2)
+    if ARM64
+    validateOpcodeConfig(scratch2)
+    .loop:
+        loadlinkacqh [mem], scratch2
+        bqneq expected, scratch2, .fail
+        storecondrelh scratch, value, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .fail:
+        storecondrelh scratch, scratch2, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .done:
+        move scratch2, expected
+    else
+        error
+    end
+end
 
-.ipint_i64_atomic_rmw16_xor_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicRmwXor16(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
+macro weakCASExchangeInt(mem, value, expected, scratch, scratch2)
+    if ARM64
+    validateOpcodeConfig(scratch2)
+    .loop:
+        loadlinkacqi [mem], scratch2
+        bqneq expected, scratch2, .fail
+        storecondreli scratch, value, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .fail:
+        storecondreli scratch, scratch2, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .done:
+        move scratch2, expected
+    else
+        error
+    end
+end
 
-.ipint_i64_atomic_rmw32_xor_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicRmwXor32(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
+macro weakCASExchangeQuad(mem, value, expected, scratch, scratch2)
+    if ARM64
+    validateOpcodeConfig(scratch2)
+    .loop:
+        loadlinkacqq [mem], scratch2
+        bqneq expected, scratch2, .fail
+        storecondrelq scratch, value, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .fail:
+        storecondrelq scratch, scratch2, [mem]
+        bieq scratch, 0, .done
+        jmp .loop
+    .done:
+        move scratch2, expected
+    else
+        error
+    end
+end
 
-.ipint_i32_atomic_rmw_xchg_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicRmwXchg(t0, t3, t2, t1)
+ipintAtomicOp(_i32_atomic_rmw_cmpxchg, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    andq 0xffffffff, t0
+    if ARM64E or X86_64
+        atomicweakcasi t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeInt(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt32(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i64_atomic_rmw_xchg_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicRmwXchg(t0, t3, t2, t1)
+ipintAtomicOp(_i64_atomic_rmw_cmpxchg, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2)
+    checkAlignment8(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    if ARM64E or X86_64
+        atomicweakcasq t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeQuad(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt64(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i32_atomic_rmw8_xchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicRmwXchg8(t0, t3, t2, t1)
+ipintAtomicOp(_i32_atomic_rmw8_cmpxchg_u, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    andq 0xff, t0
+    if ARM64E or X86_64
+        atomicweakcasb t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeByte(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt32(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i32_atomic_rmw16_xchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicRmwXchg16(t0, t3, t2, t1)
+ipintAtomicOp(_i32_atomic_rmw16_cmpxchg_u, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    andq 0xffff, t0
+    if ARM64E or X86_64
+        atomicweakcash t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeHalf(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i64_atomic_rmw8_xchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicRmwXchg8(t0, t3, t2, t1)
-    pushInt64(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i64_atomic_rmw16_xchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicRmwXchg16(t0, t3, t2, t1)
+ipintAtomicOp(_i64_atomic_rmw8_cmpxchg_u, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2)
+    noAlignmentCheck(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    andq 0xff, t0
+    if ARM64E or X86_64
+        atomicweakcasb t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeByte(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt64(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i64_atomic_rmw32_xchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicRmwXchg32(t0, t3, t2, t1)
+ipintAtomicOp(_i64_atomic_rmw16_cmpxchg_u, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2)
+    checkAlignment2(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    andq 0xffff, t0
+    if ARM64E or X86_64
+        atomicweakcash t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeHalf(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
-
-.ipint_i32_atomic_rmw_cmpxchg_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI32AtomicCmpxchg(t0, t3, t7, t2, t1)
-    pushInt32(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i64_atomic_rmw_cmpxchg_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6)
-    doI64AtomicCmpxchg(t0, t3, t7, t2, t1)
+ipintAtomicOp(_i64_atomic_rmw32_cmpxchg_u, macro()
+    # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64.
+    # ARMv7 (where PL=t7) does not run 64-bit atomic instructions.
+    popInt64(t7)
+    popInt64(t3)
+    popMemoryIndex(t0, t2)
+    memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2)
+    checkAlignment4(t0, .throwUnaligned)
+    move t0, t2
+    move t3, t0
+    andq 0xffffffff, t0
+    if ARM64E or X86_64
+        atomicweakcasi t0, t7, [t2]
+    elsif ARM64
+        weakCASExchangeInt(t2, t7, t0, t1, t3)
+    else
+        error
+    end
     pushInt64(t0)
-    move t4, PC
+    advancePCByReg(t4)
     nextIPIntInstruction()
+.throwUnaligned:
+    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
+end)
 
-.ipint_i32_atomic_rmw8_cmpxchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI32AtomicCmpxchg8(t0, t3, t7, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
+#######################################
+## ULEB128 decoding logic for locals ##
+#######################################
 
-.ipint_i32_atomic_rmw16_cmpxchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI32AtomicCmpxchg16(t0, t3, t7, t2, t1)
-    pushInt32(t0)
-    move t4, PC
-    nextIPIntInstruction()
+macro decodeULEB128(result)
+    # result should already be the first byte.
+    andq 0x7f, result
+    move 7, t2 # t1 holds the shift.
+    validateOpcodeConfig(t3)
+.loop:
+    loadb [PC], t3
+    andq t3, 0x7f, t1
+    lshiftq t2, t1
+    orq t1, result
+    addq 7, t2
+    advancePC(1)
+    bbaeq t3, 128, .loop
+end
 
-.ipint_i64_atomic_rmw8_cmpxchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6)
-    doI64AtomicCmpxchg8(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
+slowPathLabel(_local_get)
+    decodeULEB128(t0)
+    localGetPostDecode()
 
-.ipint_i64_atomic_rmw16_cmpxchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6)
-    doI64AtomicCmpxchg16(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
+slowPathLabel(_local_set)
+    decodeULEB128(t0)
+    localSetPostDecode()
 
-.ipint_i64_atomic_rmw32_cmpxchg_u_slow_path:
-    loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6)
-    doI64AtomicCmpxchg32(t0, t3, t7, t2, t1)
-    pushInt64(t0)
-    move t4, PC
-    nextIPIntInstruction()
+slowPathLabel(_local_tee)
+    decodeULEB128(t0)
+    localTeePostDecode()
 
 ##################################
 ## "Out of line" logic for call ##
@@ -11456,6 +11043,8 @@ end
     # t3 is not used after this
     subp cfr, t3
     push t3, PC
+    # ditto for PL, t3 is okay to use as scratch
+    subp PL, cfr, t3
     push t3, wasmInstance
 
     # set up the call frame
@@ -11470,7 +11059,7 @@ end
     # reserved
     # reserved
     # (first_non_arg_addr - cfr), PC
-    # unused, wasmInstance <- t2 = native argument stack (pushed by mINT)
+    # (PL - cfr), wasmInstance <- t2 = native argument stack (pushed by mINT)
     # call frame
     # call frame
     # call frame
@@ -11786,6 +11375,7 @@ mintAlign(_tail_call)
 # CallArgumentBytecode::Call (0x1b)
 mintAlign(_call)
     pop wasmInstance, ws0
+    # pop targetInstance, targetEntrypoint
 
     # Save stack pointer, if we tail call someone who changes the frame above's stack argument size.
     # Store its value relative to cfp so stack frames can be easily relocated for JSPI.
@@ -11793,11 +11383,16 @@ mintAlign(_call)
     subp cfr, sc1
     storep sc1, ThisArgumentOffset[cfr]
 
+    # Swap instances
+    # move targetInstance, wasmInstance
+
     # Set up memory
     push t2, t3
     ipintReloadMemory()
     pop t3, t2
 
+    # move targetEntrypoint, ws0
+
     # Make the call
 if ARM64E
     leap _g_config, ws1
@@ -11973,7 +11568,7 @@ mintAlign(_end)
     # return result
     # return result     <- mintRetDst => new SP
     # (first_non_arg_addr - cfr), PC
-    # unused, wasmInstance  <- sc3
+    # (PL - cfr), wasmInstance  <- sc3
     # call frame
     # call frame
     # call frame
@@ -11993,6 +11588,7 @@ end
     loadp Callee[cfr], ws0
     unboxWasmCallee(ws0, ws1)
     storep ws0, UnboxedWasmCalleeStackSlot[cfr]
+    addp t3, cfr, PL
 
     # Restore memory
     ipintReloadMemory()
@@ -12139,36 +11735,6 @@ _ipint_mint_ret_dispatch_err:
     move 0x88, a0
     break
 
-_ipint_throw_Unreachable:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(Unreachable)
-
-_ipint_throw_NullExnrefReference:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullExnrefReference)
-
-_ipint_throw_OutOfBoundsMemoryAccess:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess)
-
-_ipint_throw_DivisionByZero:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero)
-
-_ipint_throw_IntegerOverflow:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(IntegerOverflow)
-
-_ipint_throw_OutOfBoundsTrunc:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc)
-
-_ipint_throw_NullRefAsNonNull:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullRefAsNonNull)
-
-_ipint_throw_NullAccess:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullAccess)
-
-_ipint_throw_NullI31Get:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(NullI31Get)
-
-_ipint_throw_UnalignedMemoryAccess:
-    handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess)
-
 ###########################################
 # uINT: function return value interpreter #
 ###########################################
@@ -12279,18 +11845,18 @@ uintAlign(_ret)
 argumINTAlign(_a0)
 _argumINT_begin:
     storeq wa0, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_a1)
     storeq wa1, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_a2)
 if ARM64 or ARM64E or X86_64
     storeq wa2, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 else
     break
@@ -12300,7 +11866,7 @@ end
 argumINTAlign(_a3)
 if ARM64 or ARM64E or X86_64
     storeq wa3, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 else
     break
@@ -12309,7 +11875,7 @@ end
 argumINTAlign(_a4)
 if ARM64 or ARM64E or X86_64
     storeq wa4, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 else
     break
@@ -12318,7 +11884,7 @@ end
 argumINTAlign(_a5)
 if ARM64 or ARM64E or X86_64
     storeq wa5, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 else
     break
@@ -12327,7 +11893,7 @@ end
 argumINTAlign(_a6)
 if ARM64 or ARM64E
     storeq wa6, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 else
     break
@@ -12336,7 +11902,7 @@ end
 argumINTAlign(_a7)
 if ARM64 or ARM64E
     storeq wa7, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 else
     break
@@ -12344,49 +11910,49 @@ end
 
 argumINTAlign(_fa0)
     storev wfa0, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa1)
     storev wfa1, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa2)
     storev wfa2, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa3)
     storev wfa3, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa4)
     storev wfa4, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa5)
     storev wfa5, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa6)
     storev wfa6, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_fa7)
     storev wfa7, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_stack)
     loadq [argumINTSrc], csr0
     addp SlotSize, argumINTSrc
     storeq csr0, [argumINTDst]
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_stack_vector)
@@ -12395,7 +11961,7 @@ argumINTAlign(_stack_vector)
     loadq 8[argumINTSrc], csr0
     storeq csr0, 8[argumINTDst]
     addp 2 * SlotSize, argumINTSrc
-    subp LocalSize, argumINTDst
+    addp LocalSize, argumINTDst
     argumINTDispatch()
 
 argumINTAlign(_end)
diff --git a/Source/JavaScriptCore/offlineasm/arm64.rb b/Source/JavaScriptCore/offlineasm/arm64.rb
index e792bb0d52b6..7f776a73487f 100644
--- a/Source/JavaScriptCore/offlineasm/arm64.rb
+++ b/Source/JavaScriptCore/offlineasm/arm64.rb
@@ -958,18 +958,6 @@ def lowerARM64
             emitARM64Add("add", operands, :quad)
         when 'addlshiftp'
             emitARM64AddShift("add", operands, :quad)
-        when 'addqs'
-            emitARM64Add("adds", operands, :quad)
-        when 'subqs'
-            emitARM64Sub("subs", operands, :quad)
-        when "adcq"
-            emitARM64TAC("adc", operands, :quad)
-        when "sbcq"
-            emitARM64TAC("sbc", operands, :quad)
-        when "smulhq"
-            emitARM64TAC("smulh", operands, :quad)
-        when "umulhq"
-            emitARM64TAC("umulh", operands, :quad)
         when "andi"
             emitARM64TAC("and", operands, :word)
         when "andp"
diff --git a/Source/JavaScriptCore/offlineasm/instructions.rb b/Source/JavaScriptCore/offlineasm/instructions.rb
index db7a2c899591..da4d19ab59f0 100644
--- a/Source/JavaScriptCore/offlineasm/instructions.rb
+++ b/Source/JavaScriptCore/offlineasm/instructions.rb
@@ -383,10 +383,6 @@
      "atomicloadi",
      "atomicloadq",
      "fence",
-     "adcq",
-     "sbcq",
-     "umulhq",
-     "smulhq",
     ]
 
 X86_SIMD_INSTRUCTIONS =
@@ -468,13 +464,7 @@
      "storepaird",
      "loadpairv",
      "storepairv",
-     "addlshiftp",
-     "addqs",
-     "subqs",
-     "adcq",
-     "sbcq",
-     "smulhq",
-     "umulhq"
+     "addlshiftp"
     ]
 
 ARM64_SIMD_INSTRUCTIONS =
diff --git a/Source/JavaScriptCore/offlineasm/x86.rb b/Source/JavaScriptCore/offlineasm/x86.rb
index efaf5448c88b..50ae09c235d9 100644
--- a/Source/JavaScriptCore/offlineasm/x86.rb
+++ b/Source/JavaScriptCore/offlineasm/x86.rb
@@ -1654,14 +1654,6 @@ def lowerX86Common
             $asm.puts "idiv#{x86Suffix(:quad)} #{operands[0].x86Operand(:quad)}"
         when "udivq"
             $asm.puts "div#{x86Suffix(:quad)} #{operands[0].x86Operand(:quad)}"
-        when "adcq"
-            $asm.puts "adcq #{x86Operands(:quad, :quad)}"
-        when "sbcq"
-            $asm.puts "sbbq #{x86Operands(:quad, :quad)}"
-        when "umulhq"
-            $asm.puts "mulq #{operands[0].x86Operand(:quad)}"
-        when "smulhq"
-            $asm.puts "imulq #{operands[0].x86Operand(:quad)}"
         when "popcnti"
             $asm.puts "popcnt#{x86Suffix(:int)} #{x86Operands(:int, :int)}"
         when "popcntq"
diff --git a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
index 42009e9b0ecb..7f0496e69eea 100644
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
@@ -1031,9 +1031,9 @@ PartialResult BBQJIT::addLocal(Type type, uint32_t numberOfLocals)
 
 // Globals
 
-Value BBQJIT::topValue(TypeKind type, unsigned offset)
+Value BBQJIT::topValue(TypeKind type)
 {
-    return Value::fromTemp(type, currentControlData().enclosedHeight() + currentControlData().implicitSlots() + m_parser->expressionStack().size() + offset);
+    return Value::fromTemp(type, currentControlData().enclosedHeight() + currentControlData().implicitSlots() + m_parser->expressionStack().size());
 }
 
 Value BBQJIT::exception(const ControlData& control)
@@ -4330,7 +4330,7 @@ template<size_t N>
 void BBQJIT::returnValuesFromCall(Vector<Value, N>& results, const FunctionSignature& functionType, const CallInformation& callInfo)
 {
     for (size_t i = 0; i < callInfo.results.size(); i ++) {
-        Value result = topValue(functionType.returnType(i).kind, i);
+        Value result = Value::fromTemp(functionType.returnType(i).kind, currentControlData().enclosedHeight() + currentControlData().implicitSlots() + m_parser->expressionStack().size() + i);
         Location returnLocation = Location::fromArgumentLocation(callInfo.results[i], result.type());
         if (returnLocation.isRegister()) {
             RegisterBinding currentBinding;
diff --git a/Source/JavaScriptCore/wasm/WasmBBQJIT.h b/Source/JavaScriptCore/wasm/WasmBBQJIT.h
index 15a502d297b8..0d1b4c63da60 100644
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h
@@ -1133,7 +1133,7 @@ class BBQJIT {
 
     // Globals
 
-    Value NODELETE topValue(TypeKind type, unsigned offset = 0);
+    Value NODELETE topValue(TypeKind type);
 
     Value NODELETE exception(const ControlData& control);
 
@@ -1405,11 +1405,6 @@ class BBQJIT {
     [[nodiscard]] PartialResult truncTrapping(OpType truncationOp, Value operand, Value& result, Type returnType, Type operandType);
     [[nodiscard]] PartialResult truncSaturated(Ext1OpType truncationOp, Value operand, Value& result, Type returnType, Type operandType);
 
-    // Wide arithmetic
-    [[nodiscard]] PartialResult addI64Add128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi);
-    [[nodiscard]] PartialResult addI64Sub128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi);
-    [[nodiscard]] PartialResult addI64MulWideS(Value lhs, Value rhs, Value& resultLo, Value& resultHi);
-    [[nodiscard]] PartialResult addI64MulWideU(Value lhs, Value rhs, Value& resultLo, Value& resultHi);
 
     // GC
     [[nodiscard]] PartialResult addRefI31(ExpressionType value, ExpressionType& result);
diff --git a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
index 6ed7a0895d38..31c1d9106e59 100644
--- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
+++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp
@@ -2461,202 +2461,6 @@ void BBQJIT::emitRefTestOrCast(CastKind castKind, const TypedExpression& typedVa
     );
 }
 
-// Wide Arithmetic
-
-[[nodiscard]] PartialResult BBQJIT::addI64Add128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi)
-{
-    Location lhsLoLocation = loadIfNecessary(lhsLo);
-    Location lhsHiLocation = loadIfNecessary(lhsHi);
-    Location rhsLoLocation = loadIfNecessary(rhsLo);
-    Location rhsHiLocation = loadIfNecessary(rhsHi);
-    consume(lhsLo);
-    consume(lhsHi);
-    consume(rhsLo);
-    consume(rhsHi);
-
-    resultLo = topValue(TypeKind::I64);
-    resultHi = topValue(TypeKind::I64, 1);
-    Location resultLoLocation = allocate(resultLo);
-    Location resultHiLocation = allocate(resultHi);
-
-    LOG_INSTRUCTION("I64Add128", lhsLo, lhsLoLocation, lhsHi, lhsHiLocation, rhsLo, rhsLoLocation, rhsHi, rhsHiLocation, RESULT(resultLo), RESULT(resultHi));
-
-    if (resultLoLocation.asGPR() == lhsHiLocation.asGPR()) {
-        m_jit.move(lhsHiLocation.asGPR(), wasmScratchGPR);
-        lhsHiLocation = Location::fromGPR(wasmScratchGPR);
-    } else if (resultLoLocation.asGPR() == rhsHiLocation.asGPR()) {
-        m_jit.move(rhsHiLocation.asGPR(), wasmScratchGPR);
-        rhsHiLocation = Location::fromGPR(wasmScratchGPR);
-    }
-
-#if CPU(X86_64)
-    if (resultLoLocation.asGPR() == rhsLoLocation.asGPR())
-        m_jit.add64(lhsLoLocation.asGPR(), resultLoLocation.asGPR());
-    else {
-        m_jit.move(lhsLoLocation.asGPR(), resultLoLocation.asGPR());
-        m_jit.add64(rhsLoLocation.asGPR(), resultLoLocation.asGPR());
-    }
-    if (resultHiLocation.asGPR() == rhsHiLocation.asGPR())
-        m_jit.addCarry64(lhsHiLocation.asGPR(), resultHiLocation.asGPR());
-    else {
-        m_jit.move(lhsHiLocation.asGPR(), resultHiLocation.asGPR());
-        m_jit.addCarry64(rhsHiLocation.asGPR(), resultHiLocation.asGPR());
-    }
-#elif CPU(ARM64)
-    m_jit.add64AndSetFlags(lhsLoLocation.asGPR(), rhsLoLocation.asGPR(), resultLoLocation.asGPR());
-    m_jit.addCarry64(lhsHiLocation.asGPR(), rhsHiLocation.asGPR(), resultHiLocation.asGPR());
-#endif
-
-    return { };
-}
-
-[[nodiscard]] PartialResult BBQJIT::addI64Sub128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi)
-{
-    Location lhsLoLocation = loadIfNecessary(lhsLo);
-    Location lhsHiLocation = loadIfNecessary(lhsHi);
-    Location rhsLoLocation = loadIfNecessary(rhsLo);
-    Location rhsHiLocation = loadIfNecessary(rhsHi);
-    consume(lhsLo);
-    consume(lhsHi);
-    consume(rhsLo);
-    consume(rhsHi);
-
-    resultLo = topValue(TypeKind::I64);
-    resultHi = topValue(TypeKind::I64, 1);
-    Location resultLoLocation = allocate(resultLo);
-    Location resultHiLocation = allocate(resultHi);
-
-    LOG_INSTRUCTION("I64Sub128", lhsLo, lhsLoLocation, lhsHi, lhsHiLocation, rhsLo, rhsLoLocation, rhsHi, rhsHiLocation, RESULT(resultLo), RESULT(resultHi));
-
-    if (resultLoLocation.asGPR() == lhsHiLocation.asGPR()) {
-        m_jit.move(lhsHiLocation.asGPR(), wasmScratchGPR);
-        lhsHiLocation = Location::fromGPR(wasmScratchGPR);
-    } else if (resultLoLocation.asGPR() == rhsHiLocation.asGPR()) {
-        m_jit.move(rhsHiLocation.asGPR(), wasmScratchGPR);
-        rhsHiLocation = Location::fromGPR(wasmScratchGPR);
-    }
-
-#if CPU(X86_64)
-    if (resultLoLocation.asGPR() == rhsLoLocation.asGPR()) {
-        m_jit.move(lhsLoLocation.asGPR(), wasmScratchGPR);
-        m_jit.sub64(rhsLoLocation.asGPR(), wasmScratchGPR);
-        m_jit.move(wasmScratchGPR, resultLoLocation.asGPR());
-    } else {
-        m_jit.move(lhsLoLocation.asGPR(), resultLoLocation.asGPR());
-        m_jit.sub64(rhsLoLocation.asGPR(), resultLoLocation.asGPR());
-    }
-    if (resultHiLocation.asGPR() == rhsHiLocation.asGPR()) {
-        m_jit.move(lhsHiLocation.asGPR(), wasmScratchGPR);
-        m_jit.subBorrow64(rhsHiLocation.asGPR(), wasmScratchGPR);
-        m_jit.move(wasmScratchGPR, resultHiLocation.asGPR());
-    } else {
-        m_jit.move(lhsHiLocation.asGPR(), resultHiLocation.asGPR());
-        m_jit.subBorrow64(rhsHiLocation.asGPR(), resultHiLocation.asGPR());
-    }
-#elif CPU(ARM64)
-    m_jit.sub64AndSetFlags(lhsLoLocation.asGPR(), rhsLoLocation.asGPR(), resultLoLocation.asGPR());
-    m_jit.subBorrow64(lhsHiLocation.asGPR(), rhsHiLocation.asGPR(), resultHiLocation.asGPR());
-#endif
-
-    return { };
-}
-
-[[nodiscard]] PartialResult BBQJIT::addI64MulWideU(Value lhs, Value rhs, Value& resultLo, Value& resultHi)
-{
-    Location lhsLocation = loadIfNecessary(lhs);
-    Location rhsLocation = loadIfNecessary(rhs);
-    consume(lhs);
-    consume(rhs);
-
-#if CPU(X86_64)
-    for (JSC::Reg reg : clobbersForDivX86())
-        clobber(reg);
-#endif
-
-    resultLo = topValue(TypeKind::I64);
-    resultHi = topValue(TypeKind::I64, 1);
-    Location resultLoLocation = allocate(resultLo);
-    Location resultHiLocation = allocate(resultHi);
-
-    LOG_INSTRUCTION("I64MulWideU", lhs, lhsLocation, rhs, rhsLocation, RESULT(resultLo), RESULT(resultHi));
-
-#if CPU(X86_64)
-    // x86 mul: rax * src -> rdx:rax
-    m_jit.move(lhsLocation.asGPR(), X86Registers::eax);
-    m_jit.x86UMulHigh64(rhsLocation.asGPR(), X86Registers::eax, X86Registers::edx);
-    if (resultLoLocation.asGPR() != X86Registers::edx) {
-        m_jit.move(X86Registers::eax, resultLoLocation.asGPR());
-        m_jit.move(X86Registers::edx, resultHiLocation.asGPR());
-    } else {
-        m_jit.move(X86Registers::edx, resultHiLocation.asGPR());
-        m_jit.move(X86Registers::eax, resultLoLocation.asGPR());
-    }
-#elif CPU(ARM64)
-    if (resultHiLocation.asGPR() == lhsLocation.asGPR()) {
-        m_jit.move(lhsLocation.asGPR(), wasmScratchGPR);
-        m_jit.uMulHigh64(wasmScratchGPR, rhsLocation.asGPR(), resultHiLocation.asGPR());
-        m_jit.mul64(wasmScratchGPR, rhsLocation.asGPR(), resultLoLocation.asGPR());
-    } else if (resultHiLocation.asGPR() == rhsLocation.asGPR()) {
-        m_jit.move(rhsLocation.asGPR(), wasmScratchGPR);
-        m_jit.uMulHigh64(lhsLocation.asGPR(), wasmScratchGPR, resultHiLocation.asGPR());
-        m_jit.mul64(lhsLocation.asGPR(), wasmScratchGPR, resultLoLocation.asGPR());
-    } else {
-        m_jit.uMulHigh64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultHiLocation.asGPR());
-        m_jit.mul64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultLoLocation.asGPR());
-    }
-#endif
-
-    return { };
-}
-
-[[nodiscard]] PartialResult BBQJIT::addI64MulWideS(Value lhs, Value rhs, Value& resultLo, Value& resultHi)
-{
-    Location lhsLocation = loadIfNecessary(lhs);
-    Location rhsLocation = loadIfNecessary(rhs);
-    consume(lhs);
-    consume(rhs);
-
-#if CPU(X86_64)
-    for (JSC::Reg reg : clobbersForDivX86())
-        clobber(reg);
-#endif
-
-    resultLo = topValue(TypeKind::I64);
-    resultHi = topValue(TypeKind::I64, 1);
-    Location resultLoLocation = allocate(resultLo);
-    Location resultHiLocation = allocate(resultHi);
-
-    LOG_INSTRUCTION("I64MulWideS", lhs, lhsLocation, rhs, rhsLocation, RESULT(resultLo), RESULT(resultHi));
-
-#if CPU(X86_64)
-    // x86 imul: rax * src -> rdx:rax (signed)
-    m_jit.move(lhsLocation.asGPR(), X86Registers::eax);
-    m_jit.x86MulHigh64(rhsLocation.asGPR(), X86Registers::eax, X86Registers::edx);
-    if (resultLoLocation.asGPR() != X86Registers::edx) {
-        m_jit.move(X86Registers::eax, resultLoLocation.asGPR());
-        m_jit.move(X86Registers::edx, resultHiLocation.asGPR());
-    } else {
-        m_jit.move(X86Registers::edx, resultHiLocation.asGPR());
-        m_jit.move(X86Registers::eax, resultLoLocation.asGPR());
-    }
-#elif CPU(ARM64)
-    if (resultHiLocation.asGPR() == lhsLocation.asGPR()) {
-        m_jit.move(lhsLocation.asGPR(), wasmScratchGPR);
-        m_jit.mulHigh64(wasmScratchGPR, rhsLocation.asGPR(), resultHiLocation.asGPR());
-        m_jit.mul64(wasmScratchGPR, rhsLocation.asGPR(), resultLoLocation.asGPR());
-    } else if (resultHiLocation.asGPR() == rhsLocation.asGPR()) {
-        m_jit.move(rhsLocation.asGPR(), wasmScratchGPR);
-        m_jit.mulHigh64(lhsLocation.asGPR(), wasmScratchGPR, resultHiLocation.asGPR());
-        m_jit.mul64(lhsLocation.asGPR(), wasmScratchGPR, resultLoLocation.asGPR());
-    } else {
-        m_jit.mulHigh64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultHiLocation.asGPR());
-        m_jit.mul64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultLoLocation.asGPR());
-    }
-#endif
-
-    return { };
-}
-
 void BBQJIT::emitThrowOnNullReference(ExceptionType type, Location ref)
 {
     recordJumpToThrowException(type, m_jit.branchIfNull(ref.asGPR()));
diff --git a/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp b/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp
index 294d5be676f2..73bd53ba3a63 100644
--- a/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp
+++ b/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp
@@ -294,10 +294,6 @@ class ConstExprGenerator {
     [[nodiscard]] PartialResult atomicFence(ExtAtomicOpType, uint8_t) CONST_EXPR_STUB
     [[nodiscard]] PartialResult truncTrapping(OpType, ExpressionType, ExpressionType&, Type, Type) CONST_EXPR_STUB
     [[nodiscard]] PartialResult truncSaturated(Ext1OpType, ExpressionType, ExpressionType&, Type, Type) CONST_EXPR_STUB
-    [[nodiscard]] PartialResult addI64Add128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB
-    [[nodiscard]] PartialResult addI64Sub128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB
-    [[nodiscard]] PartialResult addI64MulWideS(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB
-    [[nodiscard]] PartialResult addI64MulWideU(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB
 
     [[nodiscard]] PartialResult NODELETE addRefI31(ExpressionType value, ExpressionType& result)
     {
diff --git a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp
index 381f35d9bf40..ffb98d359244 100644
--- a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp
+++ b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp
@@ -58,95 +58,85 @@ void FunctionIPIntMetadataGenerator::addLength(size_t length)
     WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, instructionLength, IPInt::InstructionLengthMetadata);
 }
 
-void FunctionIPIntMetadataGenerator::addMemorySize(uint8_t memoryIndex)
+void FunctionIPIntMetadataGenerator::addMemoryIndex(uint8_t memoryIndex)
 {
-    IPInt::MemorySizeMetadata md {
+    IPInt::MemoryIndexMetadata mdConst {
         .memoryIndex = memoryIndex
     };
-    appendMetadata(md);
-}
-
-void FunctionIPIntMetadataGenerator::addMemoryGrow(uint8_t memoryIndex)
-{
-    IPInt::MemoryGrowMetadata md {
-        .memoryIndex = memoryIndex
-    };
-    appendMetadata(md);
-}
-
-void FunctionIPIntMetadataGenerator::addTableAccess(uint32_t index, size_t length)
-{
-    IPInt::TableAccessMetadata md {
-        .index = index,
-        .instructionLength = { .length = safeCast<uint8_t>(length) }
-    };
-    appendMetadata(md);
-}
-
-void FunctionIPIntMetadataGenerator::addRefFunc(uint32_t index, size_t length)
-{
-    IPInt::RefFuncMetadata md {
-        .index = index,
-        .instructionLength = { .length = safeCast<uint8_t>(length) }
-    };
-    appendMetadata(md);
-}
-
-void FunctionIPIntMetadataGenerator::addElemDrop(uint32_t index, size_t length)
-{
-    IPInt::ElemDropMetadata md {
-        .index = index,
-        .instructionLength = { .length = safeCast<uint8_t>(length) }
-    };
-    appendMetadata(md);
-}
-
-void FunctionIPIntMetadataGenerator::addDataAccess(uint32_t index, size_t length)
-{
-    IPInt::DataAccessMetadata md {
-        .index = index,
-        .instructionLength = { .length = safeCast<uint8_t>(length) }
-    };
-    appendMetadata(md);
+    size_t size = m_metadata.size();
+    m_metadata.grow(size + sizeof(mdConst));
+    WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::MemoryIndexMetadata);
 }
 
-void FunctionIPIntMetadataGenerator::addMemoryInit(uint8_t memoryIndex, uint32_t dataIndex, size_t length)
+void FunctionIPIntMetadataGenerator::addLEB128ConstantInt32AndLength(uint32_t value, size_t length)
 {
-    IPInt::MemoryInitMetadata md {
-        .memoryIndex = memoryIndex,
-        .dataIndex = dataIndex,
-        .instructionLength = { .length = safeCast<uint8_t>(length) }
+    IPInt::Const32Metadata mdConst {
+        .instructionLength = { .length = safeCast<uint8_t>(length) },
+        .value = value
     };
-    appendMetadata(md);
+    size_t size = m_metadata.size();
+    m_metadata.grow(size + sizeof(mdConst));
+    WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const32Metadata);
 }
 
-void FunctionIPIntMetadataGenerator::addMemoryFill(uint8_t memoryIndex, size_t length)
+void FunctionIPIntMetadataGenerator::addLEB128ConstantInt64AndLength(uint64_t value, size_t length)
 {
-    IPInt::MemoryFillMetadata md {
-        .memoryIndex = memoryIndex,
+    IPInt::Const64Metadata mdConst {
+        .value = value,
         .instructionLength = { .length = safeCast<uint8_t>(length) }
     };
-    appendMetadata(md);
+    size_t size = m_metadata.size();
+    m_metadata.grow(size + sizeof(mdConst));
+    WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const64Metadata);
 }
 
-void FunctionIPIntMetadataGenerator::addMemoryCopy(uint8_t dstMemoryIndex, uint8_t srcMemoryIndex, size_t length)
+void FunctionIPIntMetadataGenerator::addLEB128ConstantAndLengthForType(Type type, uint64_t value, size_t length)
 {
-    IPInt::MemoryCopyMetadata md {
-        .dstMemoryIndex = dstMemoryIndex,
-        .srcMemoryIndex = srcMemoryIndex,
-        .instructionLength = { .length = safeCast<uint8_t>(length) }
-    };
-    appendMetadata(md);
+    if (type.isI32()) {
+        size_t size = m_metadata.size();
+        if (length == 2) {
+            IPInt::InstructionLengthMetadata mdConst {
+                .length = safeCast<uint8_t>((value >> 7) & 1)
+            };
+            m_metadata.grow(size + sizeof(mdConst));
+            WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::InstructionLengthMetadata);
+        } else {
+            IPInt::Const32Metadata mdConst {
+                .instructionLength = { .length = safeCast<uint8_t>(length) },
+                .value = static_cast<uint32_t>(value)
+            };
+            m_metadata.grow(size + sizeof(mdConst));
+            WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const32Metadata);
+        }
+    } else if (type.isI64()) {
+        size_t size = m_metadata.size();
+        IPInt::Const64Metadata mdConst {
+            .value = static_cast<uint64_t>(value),
+            .instructionLength = { .length = safeCast<uint8_t>(length) }
+        };
+        m_metadata.grow(size + sizeof(mdConst));
+        WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const64Metadata);
+    } else if (type.isRef() || type.isRefNull() || type.isFuncref()) {
+        size_t size = m_metadata.size();
+        IPInt::Const32Metadata mdConst {
+            .instructionLength = { .length = safeCast<uint8_t>(length) },
+            .value = static_cast<uint32_t>(value)
+        };
+        m_metadata.grow(size + sizeof(mdConst));
+        WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const32Metadata);
+    } else if (!type.isF32() && !type.isF64())
+        ASSERT_NOT_IMPLEMENTED_YET();
 }
 
-void FunctionIPIntMetadataGenerator::addAtomicMemoryAccess(uint8_t memoryIndex, uint64_t offset, size_t length)
+void FunctionIPIntMetadataGenerator::addLEB128V128Constant(v128_t value, size_t length)
 {
-    IPInt::AtomicMemoryAccessMetadata md {
-        .memoryIndex = memoryIndex,
-        .offset = offset,
+    IPInt::Const128Metadata mdConst {
+        .value = value,
         .instructionLength = { .length = safeCast<uint8_t>(length) }
     };
-    appendMetadata(md);
+    size_t size = m_metadata.size();
+    m_metadata.grow(size + sizeof(mdConst));
+    WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const128Metadata);
 }
 
 void FunctionIPIntMetadataGenerator::addReturnData(const FunctionSignature& sig, const CallInformation& returnCC)
diff --git a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h
index 1f2693662e17..70ab084beb13 100644
--- a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h
+++ b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h
@@ -117,16 +117,11 @@ class FunctionIPIntMetadataGenerator {
     };
 
     void addLength(size_t length);
-    void addMemorySize(uint8_t memoryIndex);
-    void addMemoryGrow(uint8_t memoryIndex);
-    void addTableAccess(uint32_t index, size_t length);
-    void addRefFunc(uint32_t index, size_t length);
-    void addElemDrop(uint32_t index, size_t length);
-    void addDataAccess(uint32_t index, size_t length);
-    void addMemoryInit(uint8_t memoryIndex, uint32_t dataIndex, size_t length);
-    void addMemoryFill(uint8_t memoryIndex, size_t length);
-    void addMemoryCopy(uint8_t dstMemoryIndex, uint8_t srcMemoryIndex, size_t length);
-    void addAtomicMemoryAccess(uint8_t memoryIndex, uint64_t offset, size_t length);
+    void addMemoryIndex(uint8_t memoryIndex);
+    void addLEB128ConstantInt32AndLength(uint32_t value, size_t length);
+    void addLEB128ConstantInt64AndLength(uint64_t value, size_t length);
+    void addLEB128ConstantAndLengthForType(Type, uint64_t value, size_t length);
+    void addLEB128V128Constant(v128_t value, size_t length);
     void addReturnData(const FunctionSignature&, const CallInformation&);
 
     FunctionCodeIndex m_functionIndex;
diff --git a/Source/JavaScriptCore/wasm/WasmFunctionParser.h b/Source/JavaScriptCore/wasm/WasmFunctionParser.h
index 7410635c778e..d9b6d5f01671 100644
--- a/Source/JavaScriptCore/wasm/WasmFunctionParser.h
+++ b/Source/JavaScriptCore/wasm/WasmFunctionParser.h
@@ -2344,56 +2344,6 @@ FOR_EACH_WASM_MEMORY_STORE_OP(CREATE_CASE)
         FOR_EACH_WASM_TRUNC_SATURATED_OP(CREATE_CASE)
 #undef CREATE_CASE
 
-        case Ext1OpType::I64Add128:
-        case Ext1OpType::I64Sub128: {
-            WASM_PARSER_FAIL_IF(!Options::useWasmWideArithmetic(), "wasm wide arithmetic is not enabled"_s);
-
-            TypedExpression rhsHi;
-            TypedExpression rhsLo;
-            TypedExpression lhsHi;
-            TypedExpression lhsLo;
-            WASM_TRY_POP_EXPRESSION_STACK_INTO(rhsHi, "i64.add128/sub128"_s);
-            WASM_TRY_POP_EXPRESSION_STACK_INTO(rhsLo, "i64.add128/sub128"_s);
-            WASM_TRY_POP_EXPRESSION_STACK_INTO(lhsHi, "i64.add128/sub128"_s);
-            WASM_TRY_POP_EXPRESSION_STACK_INTO(lhsLo, "i64.add128/sub128"_s);
-            WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != lhsLo.type().kind, "i64.add128/sub128 lhs_lo to type "_s, lhsLo.type(), " expected "_s, TypeKind::I64);
-            WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != lhsHi.type().kind, "i64.add128/sub128 lhs_hi to type "_s, lhsHi.type(), " expected "_s, TypeKind::I64);
-            WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != rhsLo.type().kind, "i64.add128/sub128 rhs_lo to type "_s, rhsLo.type(), " expected "_s, TypeKind::I64);
-            WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != rhsHi.type().kind, "i64.add128/sub128 rhs_hi to type "_s, rhsHi.type(), " expected "_s, TypeKind::I64);
-
-            ExpressionType resultLo;
-            ExpressionType resultHi;
-            if (op == Ext1OpType::I64Add128)
-                WASM_TRY_ADD_TO_CONTEXT(addI64Add128(lhsLo, lhsHi, rhsLo, rhsHi, resultLo, resultHi));
-            else
-                WASM_TRY_ADD_TO_CONTEXT(addI64Sub128(lhsLo, lhsHi, rhsLo, rhsHi, resultLo, resultHi));
-            m_expressionStack.constructAndAppend(Types::I64, resultLo);
-            m_expressionStack.constructAndAppend(Types::I64, resultHi);
-            break;
-        }
-
-        case Ext1OpType::I64MulWideS:
-        case Ext1OpType::I64MulWideU: {
-            WASM_PARSER_FAIL_IF(!Options::useWasmWideArithmetic(), "wasm wide arithmetic is not enabled"_s);
-
-            TypedExpression rhs;
-            TypedExpression lhs;
-            WASM_TRY_POP_EXPRESSION_STACK_INTO(rhs, "i64.mul_wide"_s);
-            WASM_TRY_POP_EXPRESSION_STACK_INTO(lhs, "i64.mul_wide"_s);
-            WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != lhs.type().kind, "i64.mul_wide lhs to type "_s, lhs.type(), " expected "_s, TypeKind::I64);
-            WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != rhs.type().kind, "i64.mul_wide rhs to type "_s, rhs.type(), " expected "_s, TypeKind::I64);
-
-            ExpressionType resultLo;
-            ExpressionType resultHi;
-            if (op == Ext1OpType::I64MulWideS)
-                WASM_TRY_ADD_TO_CONTEXT(addI64MulWideS(lhs, rhs, resultLo, resultHi));
-            else
-                WASM_TRY_ADD_TO_CONTEXT(addI64MulWideU(lhs, rhs, resultLo, resultHi));
-            m_expressionStack.constructAndAppend(Types::I64, resultLo);
-            m_expressionStack.constructAndAppend(Types::I64, resultHi);
-            break;
-        }
-
         default:
             WASM_PARSER_FAIL_IF(true, "invalid 0xfc extended op "_s, m_currentExtOp);
             break;
diff --git a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp
index e48de1a9e370..8c8aaee111c4 100644
--- a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp
+++ b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp
@@ -328,13 +328,6 @@ class IPIntGenerator {
 
     [[nodiscard]] PartialResult truncSaturated(Ext1OpType, ExpressionType, ExpressionType&, Type, Type);
 
-    // Wide arithmetic
-
-    [[nodiscard]] PartialResult addI64Add128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&);
-    [[nodiscard]] PartialResult addI64Sub128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&);
-    [[nodiscard]] PartialResult addI64MulWideS(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&);
-    [[nodiscard]] PartialResult addI64MulWideU(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&);
-
     // GC
 
     [[nodiscard]] PartialResult addRefI31(ExpressionType, ExpressionType&);
@@ -701,46 +694,55 @@ IPIntGenerator::IPIntGenerator(ModuleInformation& info, FunctionCodeIndex functi
     return { };
 }
 
-Value IPIntGenerator::addConstant(Type, uint64_t)
+Value IPIntGenerator::addConstant(Type type, uint64_t value)
 {
     changeStackSize(1);
+    m_metadata->addLEB128ConstantAndLengthForType(type, value, getCurrentInstructionLength());
     return { };
 }
 
 // SIMD
 
-[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoad(ExpressionType, uint32_t, ExpressionType&, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoad(ExpressionType, uint32_t offset, ExpressionType&, uint8_t memoryIndex)
 {
     changeStackSize(0); // Pop address, push v128 value (net change = 0)
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::addSIMDStore(ExpressionType, ExpressionType, uint32_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::addSIMDStore(ExpressionType, ExpressionType, uint32_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-2); // Pop address and v128 value
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDSplat(SIMDLane, ExpressionType, ExpressionType&)
 {
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDShuffle(v128_t, ExpressionType, ExpressionType, ExpressionType&)
 {
     changeStackSize(-1);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDShift(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, ExpressionType&)
 {
     changeStackSize(-1);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDExtmul(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, ExpressionType&)
 {
     changeStackSize(-1);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
@@ -749,15 +751,19 @@ Value IPIntGenerator::addConstant(Type, uint64_t)
     return addSIMDLoad(pointer, offset, result, memoryIndex);
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoadLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t, uint8_t, ExpressionType&, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoadLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t offset, uint8_t, ExpressionType&, uint8_t memoryIndex)
 {
     changeStackSize(-1);
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::addSIMDStoreLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t, uint8_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::addSIMDStoreLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t offset, uint8_t, uint8_t memoryIndex)
 {
     changeStackSize(-2);
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
@@ -774,33 +780,39 @@ Value IPIntGenerator::addConstant(Type, uint64_t)
 IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 {
     changeStackSize(1);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDExtractLane(SIMDInfo, uint8_t, ExpressionType, ExpressionType&)
 {
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDReplaceLane(SIMDInfo, uint8_t, ExpressionType, ExpressionType, ExpressionType&)
 {
     changeStackSize(-1);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDI_V(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType&)
 {
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDV_V(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType&)
 {
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDBitwiseSelect(ExpressionType, ExpressionType, ExpressionType, ExpressionType&)
 {
     changeStackSize(-2); // 3 operands, 1 result
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
@@ -808,6 +820,7 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDRelOp(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, B3::Air::Arg, ExpressionType&)
 {
     changeStackSize(-1);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 #endif
@@ -815,6 +828,7 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 [[nodiscard]] PartialResult IPIntGenerator::addSIMDV_VV(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, ExpressionType&)
 {
     changeStackSize(-1); // Pop two v128 values, push one v128 value
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
@@ -834,7 +848,7 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 [[nodiscard]] PartialResult IPIntGenerator::addRefFunc(FunctionSpaceIndex index, ExpressionType&)
 {
     changeStackSize(1);
-    m_metadata->addRefFunc(index, getCurrentInstructionLength());
+    m_metadata->addLEB128ConstantInt32AndLength(index, getCurrentInstructionLength());
     return { };
 }
 
@@ -853,14 +867,14 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 
 [[nodiscard]] PartialResult IPIntGenerator::addTableGet(unsigned index, ExpressionType, ExpressionType&)
 {
-    m_metadata->addTableAccess(index, getCurrentInstructionLength());
+    m_metadata->addLEB128ConstantInt32AndLength(index, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addTableSet(unsigned index, ExpressionType, ExpressionType)
 {
     changeStackSize(-2);
-    m_metadata->addTableAccess(index, getCurrentInstructionLength());
+    m_metadata->addLEB128ConstantInt32AndLength(index, getCurrentInstructionLength());
     return { };
 }
 
@@ -878,14 +892,14 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 
 [[nodiscard]] PartialResult IPIntGenerator::addElemDrop(unsigned elementIndex)
 {
-    m_metadata->addElemDrop(elementIndex, getCurrentInstructionLength());
+    m_metadata->addLEB128ConstantInt32AndLength(elementIndex, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addTableSize(unsigned tableIndex, ExpressionType&)
 {
     changeStackSize(1);
-    m_metadata->addTableAccess(tableIndex, getCurrentInstructionLength());
+    m_metadata->addLEB128ConstantInt32AndLength(tableIndex, getCurrentInstructionLength());
     return { };
 }
 
@@ -1073,14 +1087,24 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 
 // Loads and Stores
 
-[[nodiscard]] PartialResult IPIntGenerator::load(LoadOpType, ExpressionType, ExpressionType&, uint64_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::load(LoadOpType, ExpressionType, ExpressionType&, uint64_t offset, uint8_t memoryIndex)
 {
+    m_metadata->addMemoryIndex(memoryIndex);
+    if (m_info.memory(memoryIndex).isMemory64())
+        m_metadata->addLEB128ConstantInt64AndLength(offset, getCurrentInstructionLength());
+    else
+        m_metadata->addLEB128ConstantInt32AndLength(static_cast<uint32_t>(offset), getCurrentInstructionLength());
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::store(StoreOpType, ExpressionType, ExpressionType, uint64_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::store(StoreOpType, ExpressionType, ExpressionType, uint64_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-2);
+    m_metadata->addMemoryIndex(memoryIndex);
+    if (m_info.memory(memoryIndex).isMemory64())
+        m_metadata->addLEB128ConstantInt64AndLength(offset, getCurrentInstructionLength());
+    else
+        m_metadata->addLEB128ConstantInt32AndLength(static_cast<uint32_t>(offset), getCurrentInstructionLength());
     return { };
 }
 
@@ -1088,85 +1112,100 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
 
 [[nodiscard]] PartialResult IPIntGenerator::addGrowMemory(ExpressionType, ExpressionType&, uint8_t memoryIndex)
 {
-    m_metadata->addMemoryGrow(memoryIndex);
+    m_metadata->addMemoryIndex(memoryIndex);
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addCurrentMemory(ExpressionType&, uint8_t memoryIndex)
 {
     changeStackSize(1);
-    m_metadata->addMemorySize(memoryIndex);
+    m_metadata->addMemoryIndex(memoryIndex);
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addMemoryFill(ExpressionType, ExpressionType, ExpressionType, uint8_t memoryIndex)
 {
     changeStackSize(-3);
-    m_metadata->addMemoryFill(memoryIndex, getCurrentInstructionLength());
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addMemoryCopy(ExpressionType, ExpressionType, ExpressionType, uint8_t dstMemoryIndex, uint8_t srcMemoryIndex)
 {
     changeStackSize(-3);
-    m_metadata->addMemoryCopy(dstMemoryIndex, srcMemoryIndex, getCurrentInstructionLength());
+    m_metadata->addMemoryIndex(dstMemoryIndex);
+    m_metadata->addMemoryIndex(srcMemoryIndex);
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addMemoryInit(unsigned dataIndex, ExpressionType, ExpressionType, ExpressionType, uint8_t memoryIndex)
 {
     changeStackSize(-3);
-    m_metadata->addMemoryInit(memoryIndex, dataIndex, getCurrentInstructionLength());
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(dataIndex, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::addDataDrop(unsigned dataIndex)
 {
-    m_metadata->addDataAccess(dataIndex, getCurrentInstructionLength());
+    m_metadata->addLEB128ConstantInt32AndLength(dataIndex, getCurrentInstructionLength());
     return { };
 }
 
 // Atomics
 
-[[nodiscard]] PartialResult IPIntGenerator::atomicLoad(ExtAtomicOpType, Type, ExpressionType, ExpressionType&, uint32_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::atomicLoad(ExtAtomicOpType, Type, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex)
 {
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::atomicStore(ExtAtomicOpType, Type, ExpressionType, ExpressionType, uint32_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::atomicStore(ExtAtomicOpType, Type, ExpressionType, ExpressionType, uint32_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-2);
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::atomicBinaryRMW(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType&, uint32_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::atomicBinaryRMW(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-1);
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::atomicCompareExchange(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, uint32_t, uint8_t)
+[[nodiscard]] PartialResult IPIntGenerator::atomicCompareExchange(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-2);
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::atomicWait(ExtAtomicOpType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-2);
-    m_metadata->addAtomicMemoryAccess(memoryIndex, offset, getCurrentInstructionLength());
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::atomicNotify(ExtAtomicOpType, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex)
 {
     changeStackSize(-1);
-    m_metadata->addAtomicMemoryAccess(memoryIndex, offset, getCurrentInstructionLength());
+    m_metadata->addMemoryIndex(memoryIndex);
+    m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength());
     return { };
 }
 
 [[nodiscard]] PartialResult IPIntGenerator::atomicFence(ExtAtomicOpType, uint8_t)
 {
+    m_metadata->addLength(getCurrentInstructionLength());
     return { };
 }
 
@@ -2036,30 +2075,6 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t)
     return { };
 }
 
-[[nodiscard]] PartialResult IPIntGenerator::addI64Add128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&)
-{
-    changeStackSize(-2); // pops 4, pushes 2
-    return { };
-}
-
-[[nodiscard]] PartialResult IPIntGenerator::addI64Sub128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&)
-{
-    changeStackSize(-2); // pops 4, pushes 2
-    return { };
-}
-
-[[nodiscard]] PartialResult IPIntGenerator::addI64MulWideS(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&)
-{
-    changeStackSize(0); // pops 2, pushes 2
-    return { };
-}
-
-[[nodiscard]] PartialResult IPIntGenerator::addI64MulWideU(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&)
-{
-    changeStackSize(0); // pops 2, pushes 2
-    return { };
-}
-
 // Conversions
 
 [[nodiscard]] PartialResult IPIntGenerator::addI32WrapI64(ExpressionType, ExpressionType&)
diff --git a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h
index 67acb64512c0..824c44a04c47 100644
--- a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h
+++ b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h
@@ -82,6 +82,10 @@ struct InstructionLengthMetadata {
     uint8_t length; // 1B for length of current instruction
 };
 
+struct MemoryIndexMetadata {
+    uint8_t memoryIndex; // 1B for memory index (JS embedding of wasm is limited to 100 memories)
+};
+
 struct BlockMetadata {
     // Field order is significant, both may be loaded with one 'loadpairi' instruction.
     // Negative deltas are possible for some Wasm instructions and require sign extension to 64b before the addition.
@@ -133,60 +137,25 @@ struct GlobalMetadata {
     uint8_t isRef; // 1B for ref flag
 };
 
-// Metadata for instructions that pass a single index/offset to a C call.
-// Each category gets its own named type.
-
-struct TableAccessMetadata {
-    uint32_t index; // 4B for table index
-    InstructionLengthMetadata instructionLength;
-};
-
-struct RefFuncMetadata {
-    uint32_t index; // 4B for function space index
-    InstructionLengthMetadata instructionLength;
-};
-
-struct ElemDropMetadata {
-    uint32_t index; // 4B for element index
-    InstructionLengthMetadata instructionLength;
-};
+// Constant metadata structures
 
-struct DataAccessMetadata {
-    uint32_t index; // 4B for data index
+struct Const32Metadata {
+    // instructionLength needs to go first because we encode small
+    // i32 as just instructionLength with the value embedded in bytecode.
     InstructionLengthMetadata instructionLength;
+    uint32_t value;
 };
 
-struct MemoryInitMetadata {
-    uint8_t memoryIndex;
-    uint32_t dataIndex; // 4B for data index
+struct Const64Metadata {
+    uint64_t value;
     InstructionLengthMetadata instructionLength;
 };
 
-struct MemoryFillMetadata {
-    uint8_t memoryIndex;
+struct Const128Metadata {
+    v128_t value;
     InstructionLengthMetadata instructionLength;
 };
 
-struct MemoryCopyMetadata {
-    uint8_t dstMemoryIndex;
-    uint8_t srcMemoryIndex;
-    InstructionLengthMetadata instructionLength;
-};
-
-struct AtomicMemoryAccessMetadata {
-    uint8_t memoryIndex;
-    uint64_t offset;
-    InstructionLengthMetadata instructionLength;
-};
-
-struct MemorySizeMetadata {
-    uint8_t memoryIndex;
-};
-
-struct MemoryGrowMetadata {
-    uint8_t memoryIndex;
-};
-
 struct TableInitMetadata {
     uint32_t elementIndex; // 4B for index of element
     uint32_t tableIndex; // 4B for index of table
diff --git a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp
index 5846fc50ea0a..ca23c8915e62 100644
--- a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp
+++ b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp
@@ -63,33 +63,6 @@ namespace JSC { namespace IPInt {
         return encodeResult(first, second); \
     } while (false)
 
-static constexpr size_t ipintCalleeSaveSpaceStackAligned = WTF::roundUpToMultipleOf<stackAlignmentBytes()>((Wasm::numberOfIPIntCalleeSaveRegisters + Wasm::numberOfIPIntInternalRegisters) * sizeof(Register));
-static constexpr size_t ipintLocalsBaseOffset = ipintCalleeSaveSpaceStackAligned + IPInt::LOCAL_SIZE;
-
-IPIntLocal* FrameAccess::localBase()
-{
-    // Points to local[0], matching assembly's CFR - IPIntLocalsBaseOffset.
-    return reinterpret_cast<IPIntLocal*>(reinterpret_cast<uint8_t*>(m_callFrame) - ipintLocalsBaseOffset);
-}
-
-IPIntLocal* FrameAccess::localSlot(unsigned index)
-{
-    return &localBase()[-static_cast<ptrdiff_t>(index)];
-}
-
-IPIntLocal* FrameAccess::rethrowSlot(unsigned index)
-{
-    return &localBase()[-static_cast<ptrdiff_t>(m_callee->localSizeToAlloc() + index)];
-}
-
-IPIntStackEntry* FrameAccess::stackEnd()
-{
-    // CFR - calleeSaveSpace - (localSizeToAlloc + rethrowSlots) * LocalSize
-    return reinterpret_cast<IPIntStackEntry*>(
-        reinterpret_cast<uint8_t*>(m_callFrame) - ipintCalleeSaveSpaceStackAligned
-        - (m_callee->localSizeToAlloc() + m_callee->rethrowSlots()) * IPInt::LOCAL_SIZE);
-}
-
 #define WASM_CALL_RETURN(targetInstance, callTarget) do { \
         static_assert(callTarget.getTag() == WasmEntryPtrTag); \
         callTarget.validate(); \
@@ -259,7 +232,7 @@ WASM_IPINT_EXTERN_CPP_DECL(prologue_osr, CallFrame* callFrame)
 }
 
 // This needs to be kept in sync with BBQJIT::makeStackMap.
-static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopOSR(Wasm::IPIntCallee* ipintCallee, Wasm::BBQCallee* bbqCallee, JSWebAssemblyInstance* instance, const Wasm::IPIntTierUpCounter::OSREntryData& osrEntryData, CallFrame* callFrame, IPIntStackEntry* sp)
+static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopOSR(Wasm::IPIntCallee* ipintCallee, Wasm::BBQCallee* bbqCallee, JSWebAssemblyInstance* instance, const Wasm::IPIntTierUpCounter::OSREntryData& osrEntryData, IPIntLocal* pl)
 {
     ASSERT(bbqCallee->compilationMode() == Wasm::CompilationMode::BBQMode);
     size_t osrEntryScratchBufferSize = bbqCallee->osrEntryScratchBufferSize();
@@ -270,8 +243,8 @@ static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopO
     if (!buffer)
         return nullptr;
     auto* currentEntry = buffer;
-    auto copyValueToBuffer = [&](const auto& entry) ALWAYS_INLINE_LAMBDA {
-        *std::bit_cast<v128_t*>(currentEntry++) = entry.v128;
+    auto copyValueToBuffer = [&](const IPIntLocal& local) ALWAYS_INLINE_LAMBDA {
+        *std::bit_cast<v128_t*>(currentEntry++) = local.v128;
     };
 
     // The loop index isn't really an IPIntLocal value, but it occupies the first slot of the OSR scratch buffer
@@ -280,14 +253,13 @@ static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopO
     loopIndexLocal.v128.u64x2[1] = 0;
     copyValueToBuffer(loopIndexLocal);
 
-    FrameAccess frame(callFrame, ipintCallee);
     for (uint32_t i = 0; i < ipintCallee->numLocals(); ++i)
-        copyValueToBuffer(*frame.localSlot(i));
+        copyValueToBuffer(pl[i]);
 
     if (ipintCallee->rethrowSlots()) {
         ASSERT(osrEntryData.tryDepth <= ipintCallee->rethrowSlots());
         for (uint32_t i = 0; i < osrEntryData.tryDepth; ++i)
-            copyValueToBuffer(*frame.rethrowSlot(i));
+            copyValueToBuffer(pl[ipintCallee->localSizeToAlloc() + i]);
     } else {
         // If there's no rethrow slots just 0 fill the buffer.
         IPIntLocal zeroValue = { };
@@ -296,15 +268,15 @@ static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopO
             copyValueToBuffer(zeroValue);
     }
 
-    auto stackSlots = std::span { sp, sp + osrEntryData.numberOfStackValues };
-    for (auto& value : stackSlots | std::views::reverse)
-        copyValueToBuffer(value);
-
+    for (uint32_t i = 0; i < osrEntryData.numberOfStackValues; ++i) {
+        pl -= 1;
+        copyValueToBuffer(*pl);
+    }
     return buffer;
 }
 
 
-WASM_IPINT_EXTERN_CPP_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntStackEntry* sp)
+WASM_IPINT_EXTERN_CPP_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntLocal* pl)
 {
     Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame);
     Wasm::IPIntTierUpCounter& tierUpCounter = callee->tierUpCounter();
@@ -336,13 +308,13 @@ WASM_IPINT_EXTERN_CPP_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntSta
     // The BBQ frame may use more stack than the IPInt frame. If there's not enough stack space,
     // skip OSR and continue executing in IPInt.
     if (bbqCallee->stackCheckSize() != Wasm::stackCheckNotNeeded) {
-        auto stackAtOSREntry = reinterpret_cast<uintptr_t>(sp);
+        auto stackAtOSREntry = reinterpret_cast<uintptr_t>(pl - osrEntryData.numberOfStackValues);
         auto candidateNewStackPointer = reinterpret_cast<void*>(stackAtOSREntry - bbqCallee->stackCheckSize());
         if (candidateNewStackPointer < instance->softStackLimit()) [[unlikely]]
             WASM_RETURN_TWO(nullptr, nullptr);
     }
 
-    auto* buffer = buildEntryBufferForLoopOSR(callee, bbqCallee, instance, osrEntryData, callFrame, sp);
+    auto* buffer = buildEntryBufferForLoopOSR(callee, bbqCallee, instance, osrEntryData, pl);
     if (!buffer)
         WASM_RETURN_TWO(nullptr, nullptr);
 
@@ -399,7 +371,7 @@ static void NODELETE copyExceptionPayloadToStack(const Wasm::FunctionSignature&
     ASSERT(!payloadIndex);
 }
 
-WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer)
+WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer, IPIntLocal* pl)
 {
     VM& vm = instance->vm();
     auto throwScope = DECLARE_THROW_SCOPE(vm);
@@ -408,8 +380,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, I
     Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame);
     if (callee->rethrowSlots()) {
         RELEASE_ASSERT(vm.targetTryDepthForThrow <= callee->rethrowSlots());
-        FrameAccess frame(callFrame, callee);
-        frame.rethrowSlot(vm.targetTryDepthForThrow - 1)->i64 = std::bit_cast<uint64_t>(throwScope.exception()->value());
+        pl[callee->localSizeToAlloc() + vm.targetTryDepthForThrow - 1].i64 = std::bit_cast<uint64_t>(throwScope.exception()->value());
     }
 
     if (stackPointer) {
@@ -427,7 +398,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, I
     WASM_RETURN_TWO(nullptr, nullptr);
 }
 
-WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer)
+WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer, IPIntLocal* pl)
 {
     VM& vm = instance->vm();
     auto throwScope = DECLARE_THROW_SCOPE(vm);
@@ -436,8 +407,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFra
     Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame);
     if (callee->rethrowSlots()) {
         RELEASE_ASSERT(vm.targetTryDepthForThrow <= callee->rethrowSlots());
-        FrameAccess frame(callFrame, callee);
-        frame.rethrowSlot(vm.targetTryDepthForThrow - 1)->i64 = std::bit_cast<uint64_t>(throwScope.exception()->value());
+        pl[callee->localSizeToAlloc() + vm.targetTryDepthForThrow - 1].i64 = std::bit_cast<uint64_t>(throwScope.exception()->value());
     }
 
     Exception* exception = throwScope.exception();
@@ -451,7 +421,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFra
     WASM_RETURN_TWO(nullptr, nullptr);
 }
 
-WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame* callFrame, IPIntStackEntry* stackPointer)
+WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame* callFrame, IPIntStackEntry* stackPointer, IPIntLocal* pl)
 {
     VM& vm = instance->vm();
     auto throwScope = DECLARE_THROW_SCOPE(vm);
@@ -460,8 +430,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception_and_arguments, Call
     Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame);
     if (callee->rethrowSlots()) {
         RELEASE_ASSERT(vm.targetTryDepthForThrow <= callee->rethrowSlots());
-        FrameAccess frame(callFrame, callee);
-        frame.rethrowSlot(vm.targetTryDepthForThrow - 1)->i64 = std::bit_cast<uint64_t>(throwScope.exception()->value());
+        pl[callee->localSizeToAlloc() + vm.targetTryDepthForThrow - 1].i64 = std::bit_cast<uint64_t>(throwScope.exception()->value());
     }
 
     Exception* exception = throwScope.exception();
@@ -506,7 +475,7 @@ WASM_IPINT_EXTERN_CPP_DECL(throw_exception, CallFrame* callFrame, IPIntStackEntr
     WASM_RETURN_TWO(vm.targetMachinePCForThrow, nullptr);
 }
 
-WASM_IPINT_EXTERN_CPP_DECL(rethrow_exception, CallFrame* callFrame, unsigned tryDepth)
+WASM_IPINT_EXTERN_CPP_DECL(rethrow_exception, CallFrame* callFrame, IPIntStackEntry* pl, unsigned tryDepth)
 {
     SlowPathFrameTracer tracer(instance->vm(), callFrame);
 
@@ -516,11 +485,10 @@ WASM_IPINT_EXTERN_CPP_DECL(rethrow_exception, CallFrame* callFrame, unsigned try
 
     Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame);
     RELEASE_ASSERT(tryDepth <= callee->rethrowSlots());
-    FrameAccess frame(callFrame, callee);
 #if CPU(ADDRESS64)
-    JSWebAssemblyException* exception = std::bit_cast<JSWebAssemblyException*>(frame.rethrowSlot(tryDepth - 1)->i64);
+    JSWebAssemblyException* exception = std::bit_cast<JSWebAssemblyException*>(pl[callee->localSizeToAlloc() + tryDepth - 1].i64);
 #else
-    JSWebAssemblyException* exception = std::bit_cast<JSWebAssemblyException*>(frame.rethrowSlot(tryDepth - 1)->i32);
+    JSWebAssemblyException* exception = std::bit_cast<JSWebAssemblyException*>(pl[callee->localSizeToAlloc() + tryDepth - 1].i32);
 #endif
     RELEASE_ASSERT(exception);
     throwException(globalObject, throwScope, exception);
@@ -1068,13 +1036,11 @@ WASM_IPINT_EXTERN_CPP_DECL(prepare_function_body, CallFrame* callFrame)
 
 /**
  * Given a function index, determine the pointer to its executable code.
- * Return a pair of the target wasm instance and the code pointer (via WASM_CALL_RETURN).
- * For wasm imports, returns the target instance and the real entrypoint (bypassing the
- * wasm_to_wasm wrapper). For JS imports, returns the caller instance and the import stub.
+ * Return a pair of the wasm instance pointer received as the first argument and the code pointer.
  * Additionally, store the following into the 'calleeAndWasmInstanceReturn':
  *
  *  - calleeAndWasmInstanceReturn[0] - the callee to use, goes into the 'callee' slot of the CallFrame.
- *  - calleeAndWasmInstanceReturn[1] - the wasm instance to use, goes into the 'codeBlock' slot of the CallFrame. For JS this is reused for the function info.
+ *  - calleeAndWasmInstanceReturn[1] - the wasm instance to use, goes into the 'codeBlock' slot of the CallFrame.
  */
 WASM_IPINT_EXTERN_CPP_DECL(prepare_call, CallFrame* callFrame, CallMetadata* call, Register* calleeAndWasmInstanceReturn)
 {
@@ -1088,18 +1054,16 @@ WASM_IPINT_EXTERN_CPP_DECL(prepare_call, CallFrame* callFrame, CallMetadata* cal
     Register& calleeReturn = calleeAndWasmInstanceReturn[0];
     Register& wasmInstanceReturn = calleeAndWasmInstanceReturn[1];
     CodePtr<WasmEntryPtrTag> codePtr;
-    JSWebAssemblyInstance* targetInstance = instance;
+    bool isJSCallee = false;
     if (functionIndex < importFunctionCount) {
         auto* functionInfo = instance->importFunctionInfo(functionIndex);
+        codePtr = functionInfo->importFunctionStub;
         calleeReturn = functionInfo->boxedCallee.encodedBits();
         if (functionInfo->isJS()) {
-            codePtr = functionInfo->importFunctionStub;
+            isJSCallee = true;
             wasmInstanceReturn = reinterpret_cast<uintptr_t>(functionInfo);
-        } else {
-            codePtr = *functionInfo->entrypointLoadLocation;
-            targetInstance = functionInfo->targetInstance.get();
-            wasmInstanceReturn = targetInstance;
-        }
+        } else
+            wasmInstanceReturn = functionInfo->targetInstance.get();
     } else {
         // Target is a wasm function within the same instance
         codePtr = *instance->calleeGroup()->entrypointLoadLocationFromFunctionIndexSpace(functionIndex);
@@ -1108,15 +1072,14 @@ WASM_IPINT_EXTERN_CPP_DECL(prepare_call, CallFrame* callFrame, CallMetadata* cal
         wasmInstanceReturn = instance;
     }
 
+    JSWebAssemblyInstance* targetInstance = isJSCallee ? nullptr : jsDynamicCast<JSWebAssemblyInstance*>(wasmInstanceReturn.unboxedCell());
     IPINT_HANDLE_STEP_INTO_CALL(instance->vm(), CalleeBits(calleeReturn.encodedJSValue()), targetInstance);
 
     RELEASE_ASSERT(WTF::isTaggedWith<WasmEntryPtrTag>(codePtr));
 
-    WASM_CALL_RETURN(targetInstance, codePtr);
+    WASM_CALL_RETURN(instance, codePtr);
 }
 
-// Returns the same outputs as prepare_call: entrypoint and target instance
-// via result registers, callee and function-info/instance via the stack slots.
 WASM_IPINT_EXTERN_CPP_DECL(prepare_call_indirect, CallFrame* callFrame, Wasm::FunctionSpaceIndex* functionIndex, CallIndirectMetadata* call)
 {
     auto* callee = IPINT_CALLEE(callFrame);
@@ -1345,7 +1308,7 @@ WASM_IPINT_EXTERN_CPP_DECL(check_stack_and_vm_traps, void* candidateNewStackPoin
 }
 
 #if ENABLE(WEBASSEMBLY_DEBUGGER)
-static UNUSED_FUNCTION void displayWasmDebugState(JSWebAssemblyInstance* instance, Wasm::IPIntCallee* callee, CallFrame* callFrame, IPIntStackEntry* sp)
+static UNUSED_FUNCTION void displayWasmDebugState(JSWebAssemblyInstance* instance, Wasm::IPIntCallee* callee, IPIntStackEntry* sp, IPIntLocal* pl)
 {
     dataLogLn("=== WASM Debug State ===");
 
@@ -1354,14 +1317,12 @@ static UNUSED_FUNCTION void displayWasmDebugState(JSWebAssemblyInstance* instanc
     auto functionIndex = callee->functionIndex();
     const auto& moduleInfo = instance->module().moduleInformation();
     const Vector<Wasm::Type>& localTypes = moduleInfo.debugInfo->ensureFunctionDebugInfo(functionIndex).locals;
-    FrameAccess frame(callFrame, callee);
     for (uint32_t i = 0; i < numLocals; ++i)
-        logWasmLocalValue(i, *frame.localSlot(i), localTypes[i]);
+        logWasmLocalValue(i,  pl[i], localTypes[i]);
 
-    auto* stackEnd = frame.stackEnd();
-    if (sp && std::bit_cast<uintptr_t>(sp) <= std::bit_cast<uintptr_t>(stackEnd)) {
-        constexpr size_t STACK_ENTRY_SIZE = 16;
-        size_t stackDepth = (reinterpret_cast<uint8_t*>(stackEnd) - reinterpret_cast<uint8_t*>(sp)) / STACK_ENTRY_SIZE;
+    constexpr size_t STACK_ENTRY_SIZE = 16;
+    if (sp && pl && std::bit_cast<uintptr_t>(sp) <= std::bit_cast<uintptr_t>(pl)) {
+        size_t stackDepth = (reinterpret_cast<uint8_t*>(pl) - reinterpret_cast<uint8_t*>(sp)) / STACK_ENTRY_SIZE;
         dataLogLn("WASM Stack (", stackDepth, " entries - showing all type interpretations):");
 
         IPIntStackEntry* currentEntry = sp;
@@ -1387,12 +1348,13 @@ WASM_IPINT_EXTERN_CPP_DECL(handle_debugger_trap_if_needed, CallFrame* callFrame,
         if (debugServer.hasDebugger()) {
             uint8_t* pc = static_cast<uint8_t*>(sp[2].pointer());
             uint8_t* mc = static_cast<uint8_t*>(sp[3].pointer());
+            IPIntLocal* pl = static_cast<IPIntLocal*>(sp[0].pointer());
             auto* callee = static_cast<Wasm::IPIntCallee*>(sp[1].pointer());
             auto* stack = std::bit_cast<IPIntStackEntry*>(sp + 4);
             auto exceptionType = static_cast<Wasm::ExceptionType>(callFrame->argumentCountIncludingThis());
             if (Options::verboseWasmDebugger() && exceptionType == Wasm::ExceptionType::Unreachable)
-                displayWasmDebugState(instance, callee, callFrame, stack);
-            auto trapStatus = debugServer.execution().handleDebuggerTrapIfNeeded(callFrame, instance, callee, pc, mc, stack, exceptionType);
+                displayWasmDebugState(instance, callee, stack, pl);
+            auto trapStatus = debugServer.execution().handleDebuggerTrapIfNeeded(callFrame, instance, callee, pc, mc, pl, stack, exceptionType);
             shouldThrow = trapStatus == Wasm::DebuggerTrapStatus::NotResolvedByDebugger;
         }
     }
diff --git a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h
index 41a9aedbc86c..a4d9c9f6cb90 100644
--- a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h
+++ b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h
@@ -70,15 +70,15 @@ static constexpr uintptr_t SlowPathExceptionTag = JSValue::InvalidTag;
 
 #if ENABLE(WEBASSEMBLY_BBQJIT)
 WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(prologue_osr, CallFrame* callFrame);
-WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntStackEntry* sp);
+WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntLocal* pl);
 WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(epilogue_osr, CallFrame* callFrame);
 #endif
 
-WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_and_clear_exception, CallFrame*, IPIntStackEntry* stack);
-WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception, CallFrame*, IPIntStackEntry* stack);
-WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame*, IPIntStackEntry* stack);
+WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_and_clear_exception, CallFrame*, IPIntStackEntry* stack, IPIntLocal* pl);
+WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception, CallFrame*, IPIntStackEntry* stack, IPIntLocal* pl);
+WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame*, IPIntStackEntry* stack, IPIntLocal* pl);
 WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(throw_exception, CallFrame*, IPIntStackEntry* arguments, unsigned exceptionIndex);
-WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(rethrow_exception, CallFrame*, unsigned tryDepth);
+WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(rethrow_exception, CallFrame*, IPIntStackEntry* pl, unsigned tryDepth);
 WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(throw_ref, CallFrame* callFrame, EncodedJSValue exnref);
 
 WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(ref_func, unsigned index);
@@ -143,29 +143,6 @@ WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(memory_atomic_notify, IPIntStackEntry*);
 WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(check_stack_and_vm_traps, void* candidateNewStackPointer, Wasm::IPIntCallee*, CallFrame*);
 WASM_IPINT_EXTERN_CPP_DECL(handle_debugger_trap_if_needed, CallFrame*, Register*);
 
-
-class FrameAccess {
-public:
-    FrameAccess(CallFrame* callFrame, const Wasm::IPIntCallee* callee)
-        : m_callFrame(callFrame)
-        , m_callee(callee)
-    {
-    }
-
-    IPIntLocal* localSlot(unsigned);
-    IPIntLocal* rethrowSlot(unsigned);
-    // Past-the-end pointer for the expression stack area (= bottom of rethrow/locals area).
-    IPIntStackEntry* stackEnd();
-
-private:
-    // Returns pointer to local[0], matching assembly's CFR - IPIntLocalsBaseOffset.
-    // local[i] = localBase()[-i], rethrow[i] = localBase()[-(localSizeToAlloc + i)].
-    IPIntLocal* localBase();
-
-    CallFrame* m_callFrame;
-    SUPPRESS_UNCOUNTED_MEMBER const Wasm::IPIntCallee* m_callee;
-};
-
 } } // namespace JSC::IPInt
 
 #endif
diff --git a/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp b/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp
index 832f970bad38..0f4a6a3a096a 100644
--- a/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp
+++ b/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp
@@ -756,13 +756,6 @@ class OMGIRGenerator {
     // Saturated truncation.
     [[nodiscard]] PartialResult truncSaturated(Ext1OpType, ExpressionType operand, ExpressionType& result, Type returnType, Type operandType);
 
-    // Wide arithmetic.
-    [[nodiscard]] PartialResult addI64Add128(ExpressionType lhsLo, ExpressionType lhsHi, ExpressionType rhsLo, ExpressionType rhsHi, ExpressionType& resultLo, ExpressionType& resultHi);
-    [[nodiscard]] PartialResult addI64Sub128(ExpressionType lhsLo, ExpressionType lhsHi, ExpressionType rhsLo, ExpressionType rhsHi, ExpressionType& resultLo, ExpressionType& resultHi);
-    [[nodiscard]] PartialResult addI64MulWideS(ExpressionType lhs, ExpressionType rhs, ExpressionType& resultLo, ExpressionType& resultHi);
-    [[nodiscard]] PartialResult addI64MulWideU(ExpressionType lhs, ExpressionType rhs, ExpressionType& resultLo, ExpressionType& resultHi);
-    B3::Type int64PairTupleType();
-
     // GC
     [[nodiscard]] PartialResult addRefI31(ExpressionType value, ExpressionType& result);
     [[nodiscard]] PartialResult addI31GetS(TypedExpression ref, ExpressionType& result);
@@ -1125,7 +1118,6 @@ class OMGIRGenerator {
     unsigned* m_osrEntryScratchBufferSize;
     UncheckedKeyHashMap<ValueKey, Value*> m_constantPool;
     UncheckedKeyHashMap<const TypeDefinition*, B3::Type> m_tupleMap;
-    B3::Type m_int64PairTupleType { };
     InsertionSet m_constantInsertionValues;
     Value* m_framePointer { nullptr };
     bool m_makesCalls { false };
@@ -1560,13 +1552,6 @@ B3::Type OMGIRGenerator::toB3ResultType(const TypeDefinition* returnType)
     return result.iterator->value;
 }
 
-B3::Type OMGIRGenerator::int64PairTupleType()
-{
-    if (m_int64PairTupleType == B3::Void)
-        m_int64PairTupleType = m_proc.addTuple({ B3::Int64, B3::Int64 });
-    return m_int64PairTupleType;
-}
-
 auto OMGIRGenerator::addLocal(Type type, uint32_t count) -> PartialResult
 {
     size_t newSize = m_locals.size() + count;
@@ -3401,140 +3386,6 @@ auto OMGIRGenerator::truncSaturated(Ext1OpType op, ExpressionType argVar, Expres
     return { };
 }
 
-// Wide arithmetic
-
-auto OMGIRGenerator::addI64Add128(ExpressionType lhsLoVar, ExpressionType lhsHiVar, ExpressionType rhsLoVar, ExpressionType rhsHiVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult
-{
-    Value* lhsLo = get(lhsLoVar);
-    Value* lhsHi = get(lhsHiVar);
-    Value* rhsLo = get(rhsLoVar);
-    Value* rhsHi = get(rhsHiVar);
-
-    B3::Type tupleType = int64PairTupleType();
-    PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, tupleType, origin());
-    patchpoint->append(lhsLo, ValueRep::SomeRegister);
-    patchpoint->append(lhsHi, ValueRep::SomeRegister);
-    patchpoint->append(rhsLo, ValueRep::SomeRegister);
-    patchpoint->append(rhsHi, ValueRep::SomeRegister);
-    patchpoint->resultConstraints = { ValueRep::SomeEarlyRegister, isX86() ? ValueRep::SomeEarlyRegister : ValueRep::SomeRegister };
-    patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) {
-        GPRReg resLo = params[0].gpr();
-        GPRReg resHi = params[1].gpr();
-        GPRReg aLo = params[2].gpr();
-        GPRReg aHi = params[3].gpr();
-        GPRReg bLo = params[4].gpr();
-        GPRReg bHi = params[5].gpr();
-#if CPU(ARM64)
-        jit.add64AndSetFlags(aLo, bLo, resLo);
-        jit.addCarry64(aHi, bHi, resHi);
-#elif CPU(X86_64)
-        jit.move(aLo, resLo);
-        jit.add64(bLo, resLo);
-        jit.move(aHi, resHi);
-        jit.addCarry64(bHi, resHi);
-#endif
-    });
-    patchpoint->effects = Effects::none();
-
-    resultLo = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 0));
-    resultHi = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 1));
-    return { };
-}
-
-auto OMGIRGenerator::addI64Sub128(ExpressionType lhsLoVar, ExpressionType lhsHiVar, ExpressionType rhsLoVar, ExpressionType rhsHiVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult
-{
-    Value* lhsLo = get(lhsLoVar);
-    Value* lhsHi = get(lhsHiVar);
-    Value* rhsLo = get(rhsLoVar);
-    Value* rhsHi = get(rhsHiVar);
-
-    B3::Type tupleType = int64PairTupleType();
-    PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, tupleType, origin());
-    patchpoint->append(lhsLo, ValueRep::SomeRegister);
-    patchpoint->append(lhsHi, ValueRep::SomeRegister);
-    patchpoint->append(rhsLo, ValueRep::SomeRegister);
-    patchpoint->append(rhsHi, ValueRep::SomeRegister);
-    patchpoint->resultConstraints = { ValueRep::SomeEarlyRegister, isX86() ? ValueRep::SomeEarlyRegister : ValueRep::SomeRegister };
-    patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) {
-        GPRReg resLo = params[0].gpr();
-        GPRReg resHi = params[1].gpr();
-        GPRReg aLo = params[2].gpr();
-        GPRReg aHi = params[3].gpr();
-        GPRReg bLo = params[4].gpr();
-        GPRReg bHi = params[5].gpr();
-#if CPU(ARM64)
-        jit.sub64AndSetFlags(aLo, bLo, resLo);
-        jit.subBorrow64(aHi, bHi, resHi);
-#elif CPU(X86_64)
-        jit.move(aLo, resLo);
-        jit.sub64(bLo, resLo);
-        jit.move(aHi, resHi);
-        jit.subBorrow64(bHi, resHi);
-#endif
-    });
-    patchpoint->effects = Effects::none();
-
-    resultLo = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 0));
-    resultHi = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 1));
-    return { };
-}
-
-auto OMGIRGenerator::addI64MulWideU(ExpressionType lhsVar, ExpressionType rhsVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult
-{
-    Value* lhs = get(lhsVar);
-    Value* rhs = get(rhsVar);
-
-#if CPU(ARM64)
-    resultLo = push(m_currentBlock->appendNew<Value>(m_proc, Mul, origin(), lhs, rhs));
-    resultHi = push(m_currentBlock->appendNew<Value>(m_proc, UMulHigh, origin(), lhs, rhs));
-
-#elif CPU(X86_64)
-    // FIXME: We should get B3 on X86 to lower this to one instruction without a patchpoint.
-    B3::Type tupleType = int64PairTupleType();
-    PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, tupleType, origin());
-    patchpoint->append(lhs, ValueRep::reg(X86Registers::eax));
-    patchpoint->append(rhs, ValueRep::SomeRegister);
-    patchpoint->resultConstraints = { ValueRep::reg(X86Registers::eax), ValueRep::reg(X86Registers::edx) };
-    patchpoint->effects = Effects::none();
-    patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) {
-        jit.x86UMulHigh64(params[3].gpr(), params[0].gpr(), params[1].gpr());
-    });
-
-    resultLo = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 0));
-    resultHi = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 1));
-#endif
-
-    return { };
-}
-
-auto OMGIRGenerator::addI64MulWideS(ExpressionType lhsVar, ExpressionType rhsVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult
-{
-    Value* lhs = get(lhsVar);
-    Value* rhs = get(rhsVar);
-
-#if CPU(ARM64)
-    resultLo = push(m_currentBlock->appendNew<Value>(m_proc, Mul, origin(), lhs, rhs));
-    resultHi = push(m_currentBlock->appendNew<Value>(m_proc, MulHigh, origin(), lhs, rhs));
-
-#elif CPU(X86_64)
-    // FIXME: We should get B3 on X86 to lower this to one instruction without a patchpoint.
-    B3::Type tupleType = int64PairTupleType();
-    PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, tupleType, origin());
-    patchpoint->append(lhs, ValueRep::reg(X86Registers::eax));
-    patchpoint->append(rhs, ValueRep::SomeRegister);
-    patchpoint->resultConstraints = { ValueRep::reg(X86Registers::eax), ValueRep::reg(X86Registers::edx) };
-    patchpoint->effects = Effects::none();
-    patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) {
-        jit.x86MulHigh64(params[3].gpr(), params[0].gpr(), params[1].gpr());
-    });
-
-    resultLo = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 0));
-    resultHi = push(m_currentBlock->appendNew<ExtractValue>(m_proc, origin(), B3::Int64, patchpoint, 1));
-#endif
-
-    return { };
-}
-
 auto OMGIRGenerator::addRefI31(ExpressionType value, ExpressionType& result) -> PartialResult
 {
     ASSERT(value.type() == Int32);
diff --git a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp
index 54c05bbe8fbf..07d8159e1772 100644
--- a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp
+++ b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp
@@ -176,6 +176,15 @@ bool getWasmReturnPC(CallFrame* currentFrame, uint8_t*& returnPC, VirtualAddress
     return true;
 }
 
+// This is the C++ equivalent of the "# Recompute PL" block in InPlaceInterpreter.asm.
+IPInt::IPIntLocal* localsFromFrame(CallFrame* callFrame, const IPIntCallee* callee)
+{
+    // IPIntCalleeSaveSpaceStackAligned is defined in InPlaceInterpreter.asm.
+    static constexpr size_t ipintCalleeSaveSpaceStackAligned = WTF::roundUpToMultipleOf<stackAlignmentBytes()>((Wasm::numberOfIPIntCalleeSaveRegisters + Wasm::numberOfIPIntInternalRegisters) * sizeof(Register));
+    size_t localsAndRethrowSize = (callee->localSizeToAlloc() + callee->rethrowSlots()) * IPInt::LOCAL_SIZE;
+    auto pl = reinterpret_cast<uintptr_t>(callFrame) - ipintCalleeSaveSpaceStackAligned - localsAndRethrowSize;
+    return reinterpret_cast<IPInt::IPIntLocal*>(pl);
+}
 
 // Walk the full CallFrame chain from a WASM breakpoint, collecting virtual addresses for
 // every WASM and JS frame. The result is consumed by qWasmCallStack to give LLDB a
@@ -338,11 +347,12 @@ StopData::StopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFra
 {
 }
 
-StopData::StopData(VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame)
+StopData::StopData(VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame)
     : address(address)
     , originalBytecode(originalBytecode)
     , pc(pc)
     , mc(mc)
+    , locals(locals)
     , stack(stack)
     , callee(callee)
     , instance(instance)
@@ -350,8 +360,8 @@ StopData::StopData(VirtualAddress address, uint8_t originalBytecode, uint8_t* pc
 {
 }
 
-StopData::StopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType type)
-    : StopData(VirtualAddress::toVirtual(instance, callee->functionIndex(), pc), 0, pc, mc, stack, callee, instance, callFrame)
+StopData::StopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType type)
+    : StopData(VirtualAddress::toVirtual(instance, callee->functionIndex(), pc), 0, pc, mc, locals, stack, callee, instance, callFrame)
 {
     wasmTrapType = type;
 }
@@ -364,6 +374,7 @@ void StopData::dump(PrintStream& out) const
     out.print(", originalBytecode:", originalBytecode);
     out.print(", pc:", RawPointer(pc));
     out.print(", mc:", RawPointer(mc));
+    out.print(", locals:", RawPointer(locals));
     out.print(", stack:", RawPointer(stack));
     out.print(", callee:", RawPointer(callee.get()));
     out.print(", instance:", RawPointer(instance));
diff --git a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h
index 6a001af51050..e156c5d96189 100644
--- a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h
+++ b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h
@@ -137,11 +137,11 @@ struct Breakpoint {
 struct StopData {
     WTF_MAKE_STRUCT_TZONE_ALLOCATED(StopData);
 
-    StopData(VirtualAddress, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry*, IPIntCallee*, JSWebAssemblyInstance*, CallFrame*);
+    StopData(VirtualAddress, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal*, IPInt::IPIntStackEntry*, IPIntCallee*, JSWebAssemblyInstance*, CallFrame*);
 
     StopData(IPIntCallee*, JSWebAssemblyInstance*, CallFrame*); // Prologue: no pc/mc
 
-    StopData(IPIntCallee*, JSWebAssemblyInstance*, CallFrame*, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry*, Wasm::ExceptionType); // Trap
+    StopData(IPIntCallee*, JSWebAssemblyInstance*, CallFrame*, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal*, IPInt::IPIntStackEntry*, Wasm::ExceptionType); // Trap
 
     ~StopData();
 
@@ -151,6 +151,7 @@ struct StopData {
     uint8_t originalBytecode { 0 };
     uint8_t* pc { nullptr };
     uint8_t* mc { nullptr };
+    IPInt::IPIntLocal* locals { nullptr };
     IPInt::IPIntStackEntry* stack { nullptr };
     RefPtr<IPIntCallee> callee;
     JSWebAssemblyInstance* instance { nullptr };
@@ -181,7 +182,7 @@ struct DebugState {
         stopData = makeUnique<StopData>(callee, instance, callFrame);
     }
 
-    void setBreakpointStopData(Breakpoint::Type type, VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame)
+    void setBreakpointStopData(Breakpoint::Type type, VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame)
     {
         switch (type) {
         case Breakpoint::Type::Step:
@@ -191,13 +192,13 @@ struct DebugState {
             stopReason = Reason::Breakpoint;
             break;
         }
-        stopData = makeUnique<StopData>(address, originalBytecode, pc, mc, stack, callee, instance, callFrame);
+        stopData = makeUnique<StopData>(address, originalBytecode, pc, mc, locals, stack, callee, instance, callFrame);
     }
 
-    void setTrapStopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType wasmTrapType)
+    void setTrapStopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType wasmTrapType)
     {
         stopReason = Reason::WasmTrap;
-        stopData = makeUnique<StopData>(callee, instance, callFrame, pc, mc, stack, wasmTrapType);
+        stopData = makeUnique<StopData>(callee, instance, callFrame, pc, mc, locals, stack, wasmTrapType);
     }
 
     // WHERE-based helpers — determined by stopData presence and pc:
@@ -296,6 +297,7 @@ struct FrameInfo {
 
 Vector<FrameInfo> collectCallStack(VirtualAddress stopAddress, CallFrame* startFrame, VM&, unsigned maxFrames = 100);
 
+IPInt::IPIntLocal* localsFromFrame(CallFrame*, const IPIntCallee*);
 
 inline StringView getErrorReply(ProtocolError error)
 {
diff --git a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp
index bd1125eab0f1..c2ba90fa761d 100644
--- a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp
+++ b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp
@@ -134,13 +134,13 @@ void ExecutionHandler::stopTheWorld(VM& debuggee, StopTheWorldEvent event)
     VMManager::singleton().notifyVMStop(debuggee, event);
 }
 
-DebuggerTrapStatus ExecutionHandler::handleDebuggerTrapIfNeeded(CallFrame* callFrame, JSWebAssemblyInstance* instance, IPIntCallee* callee, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType exceptionType)
+DebuggerTrapStatus ExecutionHandler::handleDebuggerTrapIfNeeded(CallFrame* callFrame, JSWebAssemblyInstance* instance, IPIntCallee* callee, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType exceptionType)
 {
     VM& debuggee = instance->vm();
     if (exceptionType == Wasm::ExceptionType::Unreachable && hasBreakpoints()) {
         VirtualAddress address = VirtualAddress::toVirtual(instance, callee->functionIndex(), pc);
         if (auto* breakpoint = m_breakpointManager->findBreakpoint(address)) {
-            debuggee.debugState()->setBreakpointStopData(breakpoint->type, address, breakpoint->originalBytecode, pc, mc, stack, callee, instance, callFrame);
+            debuggee.debugState()->setBreakpointStopData(breakpoint->type, address, breakpoint->originalBytecode, pc, mc, locals, stack, callee, instance, callFrame);
             dataLogLnIf(Options::verboseWasmDebugger(), "[Code][handleDebuggerTrapIfNeeded] Breakpoint at ", *breakpoint, " with ", *debuggee.debugState()->stopData);
             stopTheWorld(debuggee, StopTheWorldEvent::WasmProgramStop);
             return DebuggerTrapStatus::ResolvedByDebugger; // Don't throw; resume execution at this breakpoint
@@ -157,7 +157,7 @@ DebuggerTrapStatus ExecutionHandler::handleDebuggerTrapIfNeeded(CallFrame* callF
         debuggee.debugState()->stopReason = DebugState::Reason::WasmTrap;
         debuggee.debugState()->stopData->wasmTrapType = exceptionType;
     } else
-        debuggee.debugState()->setTrapStopData(callee, instance, callFrame, pc, mc, stack, exceptionType);
+        debuggee.debugState()->setTrapStopData(callee, instance, callFrame, pc, mc, locals, stack, exceptionType);
     dataLogLnIf(Options::verboseWasmDebugger(), "[Code][handleDebuggerTrapIfNeeded] Wasm trap at ", *debuggee.debugState()->stopData);
     stopTheWorld(debuggee, StopTheWorldEvent::WasmProgramStop);
     return DebuggerTrapStatus::NotResolvedByDebugger; // Throw; trap was reported, now propagate it
@@ -541,7 +541,8 @@ void ExecutionHandler::setStepIntoBreakpointForCall(VM& callerVM, CalleeBits box
         dataLogLnIf(Options::verboseWasmDebugger(), "[Code][StepIntoEvent] Start for call");
         RELEASE_ASSERT(m_debuggerState == DebuggerState::StepRequested);
 
-        RELEASE_ASSERT(calleeInstance);
+        if (!calleeInstance)
+            return;
         if (!boxedCallee.isNativeCallee())
             return;
         RefPtr wasmCallee = downcast<Wasm::Callee>(boxedCallee.asNativeCallee());
diff --git a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h
index cd5e8443cb35..46e0c097793b 100644
--- a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h
+++ b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h
@@ -78,7 +78,7 @@ class ExecutionHandler {
 
     ResumeMode stopCode(Locker<Lock>&, StopTheWorldEvent) WTF_REQUIRES_LOCK(m_lock);
 
-    DebuggerTrapStatus handleDebuggerTrapIfNeeded(CallFrame*, JSWebAssemblyInstance*, IPIntCallee*, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry*, Wasm::ExceptionType);
+    DebuggerTrapStatus handleDebuggerTrapIfNeeded(CallFrame*, JSWebAssemblyInstance*, IPIntCallee*, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal*, IPInt::IPIntStackEntry*, Wasm::ExceptionType);
 
     JS_EXPORT_PRIVATE void resume();
     JS_EXPORT_PRIVATE void step();
diff --git a/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp b/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp
index 0bde9d27d764..a49490f741b0 100644
--- a/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp
+++ b/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp
@@ -366,12 +366,12 @@ void QueryHandler::handleWasmLocal(StringView packet)
     }
 
     auto& stopData = *state->stopData;
-    CallFrame* localCallFrame = nullptr;
+    IPInt::IPIntLocal* locals = nullptr;
     RefPtr<IPIntCallee> localCallee;
     JSWebAssemblyInstance* instance = nullptr;
 
     if (!frameIndex) {
-        localCallFrame = stopData.callFrame;
+        locals = stopData.locals;
         localCallee = stopData.callee;
         instance = stopData.instance;
     } else {
@@ -381,7 +381,7 @@ void QueryHandler::handleWasmLocal(StringView packet)
             return;
         }
         const auto& frameInfo = frames[frameIndex];
-        localCallFrame = frameInfo.wasmCallFrame;
+        locals = localsFromFrame(frameInfo.wasmCallFrame, frameInfo.wasmCallee.get());
         localCallee = frameInfo.wasmCallee;
         instance = frameInfo.wasmCallFrame->wasmInstance();
     }
@@ -395,8 +395,7 @@ void QueryHandler::handleWasmLocal(StringView packet)
         return;
     }
 
-    IPInt::FrameAccess frame(localCallFrame, localCallee.get());
-    IPInt::IPIntLocal& local = *frame.localSlot(localIndex);
+    IPInt::IPIntLocal& local = locals[localIndex];
     Type localType = localTypes[localIndex];
     logWasmLocalValue(localIndex, local, localType);