diff --git a/JSTests/wasm/ipint-tests/ipint-test-leb-decode.js b/JSTests/wasm/ipint-tests/ipint-test-leb-decode.js deleted file mode 100644 index 20a9cbfb7655..000000000000 --- a/JSTests/wasm/ipint-tests/ipint-test-leb-decode.js +++ /dev/null @@ -1,296 +0,0 @@ -// Comprehensive LEB128 decode verification for IPInt -// Tests i32.const (signed LEB128 i32) and i64.const (signed LEB128 i64) - -// Helper: build a wasm module that returns a constant -function makeI32ConstModule(bytes) { - // (func (result i32) (i32.const ) ) - let code = [0x00, 0x41, ...bytes, 0x0b]; // no locals, i32.const, end - let funcBody = [code.length, ...code]; - let wasmBytes = new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7f, - 0x03, 0x02, 0x01, 0x00, - 0x07, 0x05, 0x01, 0x01, 0x66, 0x00, 0x00, - 0x0a, funcBody.length + 1, 0x01, ...funcBody - ]); - return new WebAssembly.Instance(new WebAssembly.Module(wasmBytes.buffer)); -} - -function makeI64ConstModule(bytes) { - // (func (result i64) (i64.const ) ) - let code = [0x00, 0x42, ...bytes, 0x0b]; // no locals, i64.const, end - let funcBody = [code.length, ...code]; - let wasmBytes = new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7e, - 0x03, 0x02, 0x01, 0x00, - 0x07, 0x05, 0x01, 0x01, 0x66, 0x00, 0x00, - 0x0a, funcBody.length + 1, 0x01, ...funcBody - ]); - return new WebAssembly.Instance(new WebAssembly.Module(wasmBytes.buffer)); -} - -// Encode signed LEB128 for i32 -function encodeSLEB128_i32(value) { - value = value | 0; // ensure i32 - let bytes = []; - while (true) { - let b = value & 0x7f; - value >>= 7; - if ((value === 0 && (b & 0x40) === 0) || (value === -1 && (b & 0x40) !== 0)) { - bytes.push(b); - break; - } - bytes.push(b | 0x80); - } - return bytes; -} - -// Encode signed LEB128 for i64 (using BigInt) -function encodeSLEB128_i64(value) { - value = BigInt(value); - let bytes = []; - while (true) { - let b = Number(value & 0x7fn); - value >>= 7n; - if ((value === 0n && (b & 0x40) === 0) || (value === -1n && (b & 0x40) !== 0)) { - bytes.push(b); - break; - } - bytes.push(b | 0x80); - } - return bytes; -} - -let failures = 0; - -function testI32(value, label) { - let bytes = encodeSLEB128_i32(value); - let m = makeI32ConstModule(bytes); - let result = m.exports.f(); - let expected = value | 0; - if (result !== expected) { - print("FAIL i32 " + label + ": value=" + value + " bytes=[" + bytes.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result); - failures++; - } -} - -function testI64(value, label) { - let bytes = encodeSLEB128_i64(value); - let m = makeI64ConstModule(bytes); - let result = m.exports.f(); - let expected = BigInt(value); - if (result !== expected) { - print("FAIL i64 " + label + ": value=" + value + " bytes=[" + bytes.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result); - failures++; - } -} - -// === i32.const tests === - -// Single byte (1 byte LEB128): values -64 to 63 -testI32(0, "zero"); -testI32(1, "one"); -testI32(-1, "neg_one"); -testI32(63, "max_single_pos"); // 0x3F — last positive single-byte -testI32(-64, "min_single_neg"); // 0x40 — last negative single-byte -testI32(42, "42"); - -// Two bytes: values -8192 to 8191 -testI32(64, "first_two_byte_pos"); // 0xC0 0x00 -testI32(-65, "first_two_byte_neg"); // 0xBF 0x7F -testI32(127, "127"); -testI32(-128, "-128"); -testI32(128, "128"); -testI32(8191, "max_two_byte_pos"); -testI32(-8192, "min_two_byte_neg"); - -// Three bytes -testI32(8192, "first_three_byte"); -testI32(-8193, "first_three_byte_neg"); -testI32(65535, "65535"); -testI32(-65535, "-65535"); - -// Four bytes -testI32(1048576, "1M"); -testI32(-1048576, "-1M"); -testI32(16777215, "16M-1"); - -// Five bytes (max for i32) -testI32(2147483647, "INT32_MAX"); -testI32(-2147483648, "INT32_MIN"); -testI32(1717661556, "1717661556"); // The value that was failing before -testI32(-1923807898, "-1923807898"); -testI32(219737259, "219737259"); -testI32(2371159398 | 0, "2371159398_as_i32"); // wraps to negative - -// === i64.const tests === - -// Single byte -testI64(0n, "zero"); -testI64(1n, "one"); -testI64(-1n, "neg_one"); -testI64(63n, "max_single_pos"); -testI64(-64n, "min_single_neg"); - -// Multi-byte -testI64(64n, "first_two_byte"); -testI64(-65n, "first_two_byte_neg"); -testI64(128n, "128"); -testI64(-128n, "-128"); - -// Large positive -testI64(2147483647n, "INT32_MAX"); -testI64(2147483648n, "INT32_MAX+1"); -testI64(4294967295n, "UINT32_MAX"); -testI64(4294967296n, "UINT32_MAX+1"); - -// Large negative -testI64(-2147483648n, "INT32_MIN"); -testI64(-2147483649n, "INT32_MIN-1"); - -// 64-bit range -testI64(9223372036854775807n, "INT64_MAX"); -testI64(-9223372036854775808n, "INT64_MIN"); -testI64(18231657398634828518n - (1n << 64n), "large_neg_from_test"); // The value from the failing test -testI64(5825195283807165538n, "large_pos_from_test"); - -// Values near sign extension boundaries (shift = 7, 14, 21, 28, 35, 42, 49, 56, 63) -testI64(0x40n, "sign_bit_shift7"); // bit 6 set at shift 0 -testI64(0x2000n, "sign_bit_shift14"); // bit 13 set -testI64(0x100000n, "sign_bit_shift21"); -testI64(0x8000000n, "sign_bit_shift28"); -testI64(0x400000000n, "sign_bit_shift35"); -testI64(-0x40n, "neg_sign_bit_shift7"); -testI64(-0x2000n, "neg_sign_bit_shift14"); -testI64(-0x100000n, "neg_sign_bit_shift21"); -testI64(-0x8000000n, "neg_sign_bit_shift28"); -testI64(-0x400000000n, "neg_sign_bit_shift35"); - -// === Non-canonical (padded) LEB128 tests === -// Non-canonical LEBs use more bytes than necessary by adding redundant -// continuation bytes with sign-extension padding. These are valid per the -// WebAssembly spec and must decode to the same value. - -// Pad a canonical signed LEB128 encoding to targetLength bytes. -// For non-negative values (bit 6 of last byte clear), pad with 0x00. -// For negative values (bit 6 of last byte set), pad with 0x7f. -function padSLEB128(canonicalBytes, targetLength) { - if (canonicalBytes.length >= targetLength) - return canonicalBytes; - let padded = [...canonicalBytes]; - while (padded.length < targetLength) { - let lastByte = padded[padded.length - 1]; - let signExtByte = (lastByte & 0x40) ? 0x7f : 0x00; - padded[padded.length - 1] = lastByte | 0x80; // set continuation bit - padded.push(signExtByte); // sign-extension terminator - } - return padded; -} - -function testI32Padded(value, padLen, label) { - let canonical = encodeSLEB128_i32(value); - let padded = padSLEB128(canonical, padLen); - let m = makeI32ConstModule(padded); - let result = m.exports.f(); - let expected = value | 0; - if (result !== expected) { - print("FAIL i32 padded " + label + ": value=" + value + " canonical=[" + canonical.map(b=>"0x"+b.toString(16)).join(",") + "] padded=[" + padded.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result); - failures++; - } -} - -function testI64Padded(value, padLen, label) { - let canonical = encodeSLEB128_i64(value); - let padded = padSLEB128(canonical, padLen); - let m = makeI64ConstModule(padded); - let result = m.exports.f(); - let expected = BigInt(value); - if (result !== expected) { - print("FAIL i64 padded " + label + ": value=" + value + " canonical=[" + canonical.map(b=>"0x"+b.toString(16)).join(",") + "] padded=[" + padded.map(b=>"0x"+b.toString(16)).join(",") + "] expected=" + expected + " got=" + result); - failures++; - } -} - -// i32 non-canonical: pad up to 5 bytes (max for i32 signed LEB128) - -// Zero with all possible padding lengths -testI32Padded(0, 2, "zero_2byte"); // [0x80, 0x00] instead of [0x00] -testI32Padded(0, 3, "zero_3byte"); // [0x80, 0x80, 0x00] -testI32Padded(0, 4, "zero_4byte"); // [0x80, 0x80, 0x80, 0x00] -testI32Padded(0, 5, "zero_5byte"); // [0x80, 0x80, 0x80, 0x80, 0x00] - -// -1 with all possible padding lengths -testI32Padded(-1, 2, "neg1_2byte"); // [0xff, 0x7f] instead of [0x7f] -testI32Padded(-1, 3, "neg1_3byte"); // [0xff, 0xff, 0x7f] -testI32Padded(-1, 4, "neg1_4byte"); // [0xff, 0xff, 0xff, 0x7f] -testI32Padded(-1, 5, "neg1_5byte"); // [0xff, 0xff, 0xff, 0xff, 0x7f] - -// Small positive values padded to max -testI32Padded(1, 5, "one_5byte"); -testI32Padded(42, 5, "42_5byte"); -testI32Padded(63, 5, "63_5byte"); // max single-byte positive, padded - -// Small negative values padded to max -testI32Padded(-2, 5, "neg2_5byte"); -testI32Padded(-64, 5, "neg64_5byte"); // max single-byte negative, padded - -// Two-byte canonical values padded to various lengths -testI32Padded(64, 3, "64_3byte"); -testI32Padded(64, 5, "64_5byte"); -testI32Padded(-65, 3, "neg65_3byte"); -testI32Padded(-65, 5, "neg65_5byte"); -testI32Padded(127, 5, "127_5byte"); -testI32Padded(-128, 5, "neg128_5byte"); -testI32Padded(8191, 5, "8191_5byte"); -testI32Padded(-8192, 5, "neg8192_5byte"); - -// Three-byte canonical values padded -testI32Padded(8192, 4, "8192_4byte"); -testI32Padded(8192, 5, "8192_5byte"); -testI32Padded(-8193, 5, "neg8193_5byte"); -testI32Padded(65535, 5, "65535_5byte"); - -// Four-byte canonical values padded to 5 -testI32Padded(1048576, 5, "1M_5byte"); -testI32Padded(-1048576, 5, "neg1M_5byte"); - -// i64 non-canonical: pad up to 10 bytes (max for i64 signed LEB128) - -// Zero with various padding lengths -testI64Padded(0n, 2, "zero_2byte"); -testI64Padded(0n, 5, "zero_5byte"); -testI64Padded(0n, 10, "zero_10byte"); - -// -1 with various padding lengths -testI64Padded(-1n, 2, "neg1_2byte"); -testI64Padded(-1n, 5, "neg1_5byte"); -testI64Padded(-1n, 10, "neg1_10byte"); - -// Small values padded to max -testI64Padded(1n, 10, "one_10byte"); -testI64Padded(-2n, 10, "neg2_10byte"); -testI64Padded(63n, 10, "63_10byte"); -testI64Padded(-64n, 10, "neg64_10byte"); - -// Values that cross byte boundaries, padded -testI64Padded(64n, 10, "64_10byte"); -testI64Padded(-65n, 10, "neg65_10byte"); -testI64Padded(8192n, 10, "8192_10byte"); -testI64Padded(-8193n, 10, "neg8193_10byte"); - -// 32-bit range values padded in 64-bit encoding -testI64Padded(2147483647n, 10, "INT32_MAX_10byte"); -testI64Padded(-2147483648n, 10, "INT32_MIN_10byte"); -testI64Padded(4294967295n, 10, "UINT32_MAX_10byte"); - -// Large 64-bit values padded (these are already 9-10 byte canonical, but test padding where possible) -testI64Padded(0x400000000n, 10, "shift35_10byte"); -testI64Padded(-0x400000000n, 10, "neg_shift35_10byte"); -testI64Padded(0x20000000000n, 10, "shift42_10byte"); -testI64Padded(-0x20000000000n, 10, "neg_shift42_10byte"); - -if (failures !== 0) { - print(failures + " tests FAILED"); - throw new Error(failures + " LEB128 decode failures"); -} diff --git a/JSTests/wasm/ipint-tests/ipint-test-simd-memory.js b/JSTests/wasm/ipint-tests/ipint-test-simd-memory.js deleted file mode 100644 index e16f161e5fb8..000000000000 --- a/JSTests/wasm/ipint-tests/ipint-test-simd-memory.js +++ /dev/null @@ -1,141 +0,0 @@ -import { instantiate } from "../wabt-wrapper.js" -import * as assert from "../assert.js" - -// Test SIMD memory operations through the fast/slow path memarg parsing. - -let wat = ` -(module - (memory 1) - - ;; Initialize memory with known pattern - (data (i32.const 0) "\\01\\02\\03\\04\\05\\06\\07\\08\\09\\0a\\0b\\0c\\0d\\0e\\0f\\10\\11\\12\\13\\14\\15\\16\\17\\18\\19\\1a\\1b\\1c\\1d\\1e\\1f\\20") - - ;; v128.load + v128.store round-trip - (func (export "test_load_store") (result i32) - ;; Load v128 from offset 0, store to offset 16 - (v128.store (i32.const 16) (v128.load (i32.const 0))) - ;; Verify by extracting lane 0 from reloaded value - (i32x4.extract_lane 0 (v128.load (i32.const 16))) - ) - - ;; v128.load8_splat - (func (export "test_load8_splat") (result i32) - ;; Load byte at offset 4 (value 0x05) and splat - (i8x16.extract_lane_u 7 (v128.load8_splat (i32.const 4))) - ) - - ;; v128.load16_splat - (func (export "test_load16_splat") (result i32) - ;; Load halfword at offset 0 (value 0x0201) and splat - (i16x8.extract_lane_u 3 (v128.load16_splat (i32.const 0))) - ) - - ;; v128.load32_splat - (func (export "test_load32_splat") (result i32) - ;; Load word at offset 0 (value 0x04030201) and splat - (i32x4.extract_lane 2 (v128.load32_splat (i32.const 0))) - ) - - ;; v128.load64_splat - (func (export "test_load64_splat") (result i64) - ;; Load doubleword at offset 0 and splat - (i64x2.extract_lane 1 (v128.load64_splat (i32.const 0))) - ) - - ;; v128.load8x8_s (sign extend 8->16) - (func (export "test_load8x8s") (result i32) - ;; Load 8 bytes from offset 0, sign-extend to i16x8 - (i16x8.extract_lane_u 0 (v128.load8x8_s (i32.const 0))) - ) - - ;; v128.load32_zero - (func (export "test_load32_zero") (result i32) - ;; Load 32-bit from offset 0, zero upper - (i32x4.extract_lane 1 (v128.load32_zero (i32.const 0))) - ) - - ;; v128.load8_lane - (func (export "test_load8_lane") (result i32) - (v128.load8_lane 3 (i32.const 5) (v128.const i32x4 0 0 0 0)) ;; Load byte at addr 5 (value 0x06), replace lane 3 - (i8x16.extract_lane_u 3) - ) - - ;; v128.store8_lane - (func (export "test_store8_lane") (result i32) - ;; Store lane 2 of a known vector to memory address 31 - (v128.store8_lane 2 (i32.const 31) (v128.const i8x16 0x41 0x42 0x43 0x44 0 0 0 0 0 0 0 0 0 0 0 0)) - ;; Read back - (i32.load8_u (i32.const 31)) - ) - - ;; v128.load16_lane - (func (export "test_load16_lane") (result i32) - (v128.load16_lane 1 (i32.const 2) (v128.const i32x4 0 0 0 0)) ;; Load halfword at addr 2 (value 0x0403), replace lane 1 - (i16x8.extract_lane_u 1) - ) - - ;; v128.store16_lane - (func (export "test_store16_lane") (result i32) - (v128.store16_lane 1 (i32.const 30) (v128.const i16x8 0x1234 0x5678 0 0 0 0 0 0)) - (i32.load16_u (i32.const 30)) - ) - - ;; v128.load32_lane - (func (export "test_load32_lane") (result i32) - (v128.load32_lane 2 (i32.const 0) (v128.const i32x4 0 0 0 0)) ;; Load word at addr 0 (0x04030201), replace lane 2 - (i32x4.extract_lane 2) - ) - - ;; v128.store32_lane - (func (export "test_store32_lane") (result i32) - (v128.store32_lane 0 (i32.const 28) (v128.const i32x4 0xDEADBEEF 0 0 0)) - (i32.load (i32.const 28)) - ) -) -` - -async function test() { - const instance = await instantiate(wat, {}); - const e = instance.exports - - // v128.load + v128.store: bytes 0-3 are 0x04030201 in little-endian - assert.eq(e.test_load_store(), 0x04030201) - - // v128.load8_splat: byte at offset 4 = 0x05 - assert.eq(e.test_load8_splat(), 5) - - // v128.load16_splat: halfword at offset 0 = 0x0201 - assert.eq(e.test_load16_splat(), 0x0201) - - // v128.load32_splat: word at offset 0 = 0x04030201 - assert.eq(e.test_load32_splat(), 0x04030201) - - // v128.load64_splat: dword at offset 0 = 0x0807060504030201 - assert.eq(e.test_load64_splat(), 0x0807060504030201n) - - // v128.load8x8_s: byte 0x01 sign-extended to i16 = 1 - assert.eq(e.test_load8x8s(), 1) - - // v128.load32_zero: upper lane should be 0 - assert.eq(e.test_load32_zero(), 0) - - // v128.load8_lane: byte at addr 5 = 0x06 - assert.eq(e.test_load8_lane(), 6) - - // v128.store8_lane: lane 2 = 0x43 - assert.eq(e.test_store8_lane(), 0x43) - - // v128.load16_lane: halfword at addr 2 = 0x0403 - assert.eq(e.test_load16_lane(), 0x0403) - - // v128.store16_lane: lane 1 = 0x5678 - assert.eq(e.test_store16_lane(), 0x5678) - - // v128.load32_lane: word at addr 0 = 0x04030201 - assert.eq(e.test_load32_lane(), 0x04030201) - - // v128.store32_lane: lane 0 = 0xDEADBEEF - assert.eq(e.test_store32_lane(), 0xDEADBEEF | 0) -} - -await assert.asyncTest(test()) diff --git a/JSTests/wasm/ipint-tests/ipint-test-simd-multi-byte-leb.js b/JSTests/wasm/ipint-tests/ipint-test-simd-multi-byte-leb.js deleted file mode 100644 index f491bcea9a81..000000000000 --- a/JSTests/wasm/ipint-tests/ipint-test-simd-multi-byte-leb.js +++ /dev/null @@ -1,160 +0,0 @@ -import * as assert from "../assert.js" - -// Test SIMD instructions with padded (non-minimal) LEB128 opcode encoding. -// The simd_prefix handler decodes the opcode LEB128 into t4, so t4 correctly -// points past however many bytes the opcode takes. Before this fix, the code -// used a hardcoded offset from PC (ImmLaneIdxOffset = 2), which assumed -// a 1-byte opcode. With padded LEB128, the opcode is 2+ bytes, making the -// hardcoded offset wrong. This test verifies the t4-based approach works. - -function buildModule(codeBody) { - // Minimal wasm module: 1 type (() -> i32), 1 func, 1 export "f" - const header = [0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00]; - const typeSection = [0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7f]; // () -> i32 - const funcSection = [0x03, 0x02, 0x01, 0x00]; - const exportSection = [0x07, 0x05, 0x01, 0x01, 0x66, 0x00, 0x00]; // "f" - - const bodyWithLocals = [0x00, ...codeBody, 0x0b]; // 0 locals, body, end - const bodyLen = bodyWithLocals.length; - const codeSection = [0x0a, ...uleb128(bodyLen + 1 + uleb128(bodyLen).length), 0x01, ...uleb128(bodyLen), ...bodyWithLocals]; - - return new Uint8Array([...header, ...typeSection, ...funcSection, ...exportSection, ...codeSection]); -} - -function uleb128(value) { - const bytes = []; - do { - let byte = value & 0x7f; - value >>>= 7; - if (value !== 0) byte |= 0x80; - bytes.push(byte); - } while (value !== 0); - return bytes; -} - -// Encode a SIMD opcode with padded LEB128 (add continuation byte). -// Opcode 0x16 -> [0xfd, 0x96, 0x00] instead of [0xfd, 0x16] -// Opcode 0xAE (already 2-byte: 0xAE,0x01) -> [0xfd, 0xAE, 0x81, 0x00] (3-byte padded) -function paddedSIMDOp(opcode) { - // Normal LEB128 encoding, then pad with one extra continuation byte - const lebBytes = uleb128(opcode); - // Set continuation bit on the last byte, then add 0x00 - lebBytes[lebBytes.length - 1] |= 0x80; - lebBytes.push(0x00); - return [0xfd, ...lebBytes]; -} - -// v128.const i32x4 with specified values -function v128ConstI32x4(a, b, c, d) { - const buf = new ArrayBuffer(16); - const view = new DataView(buf); - view.setInt32(0, a, true); - view.setInt32(4, b, true); - view.setInt32(8, c, true); - view.setInt32(12, d, true); - return [0xfd, 0x0c, ...new Uint8Array(buf)]; -} - -// v128.const with padded opcode -function v128ConstI32x4Padded(a, b, c, d) { - const buf = new ArrayBuffer(16); - const view = new DataView(buf); - view.setInt32(0, a, true); - view.setInt32(4, b, true); - view.setInt32(8, c, true); - view.setInt32(12, d, true); - return [...paddedSIMDOp(0x0c), ...new Uint8Array(buf)]; -} - -// i32.const with correct signed LEB128 encoding -function i32Const(value) { - const bytes = []; - let more = true; - while (more) { - let byte = value & 0x7f; - value >>= 7; - if ((value === 0 && (byte & 0x40) === 0) || (value === -1 && (byte & 0x40) !== 0)) - more = false; - else - byte |= 0x80; - bytes.push(byte); - } - return [0x41, ...bytes]; -} - -function instantiateAndRun(bytes) { - const mod = new WebAssembly.Module(bytes); - const inst = new WebAssembly.Instance(mod); - return inst.exports.f(); -} - -// Test 1: i8x16.extract_lane_u (opcode 0x16) with padded LEB128 -// Extracts byte at lane 4 from i32x4 [0x04030201, ...] -// Lane 4 = first byte of second i32 = 0x05 -{ - const bytes = buildModule([ - ...v128ConstI32x4(0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d), - ...paddedSIMDOp(0x16), 0x04, // i8x16.extract_lane_u 4 (padded opcode) - ]); - assert.eq(instantiateAndRun(bytes), 5); -} - -// Test 2: i32x4.extract_lane (opcode 0x1b) with padded LEB128 -{ - const bytes = buildModule([ - ...v128ConstI32x4(10, 20, 30, 40), - ...paddedSIMDOp(0x1b), 0x02, // i32x4.extract_lane 2 (padded opcode) - ]); - assert.eq(instantiateAndRun(bytes), 30); -} - -// Test 3: v128.const (opcode 0x0c) with padded LEB128 -{ - const bytes = buildModule([ - ...v128ConstI32x4Padded(42, 0, 0, 0), // v128.const with padded opcode - 0xfd, 0x1b, 0x00, // i32x4.extract_lane 0 (normal encoding) - ]); - assert.eq(instantiateAndRun(bytes), 42); -} - -// Test 4: i8x16.replace_lane (opcode 0x17) with padded LEB128 -{ - const bytes = buildModule([ - ...v128ConstI32x4(0, 0, 0, 0), - ...i32Const(99), // i32.const 99 (proper signed LEB128) - ...paddedSIMDOp(0x17), 0x02, // i8x16.replace_lane 2 (padded opcode) - ...paddedSIMDOp(0x16), 0x02, // i8x16.extract_lane_u 2 (padded opcode) - ]); - assert.eq(instantiateAndRun(bytes), 99); -} - -// Test 5: Chain of padded ops — v128.const (padded) + i32x4.add (padded, opcode 0xAE) -// + i32x4.extract_lane (padded) -{ - const bytes = buildModule([ - ...v128ConstI32x4Padded(10, 20, 30, 40), - ...v128ConstI32x4Padded(1, 2, 3, 4), - ...paddedSIMDOp(0xAE), // i32x4.add (padded) - ...paddedSIMDOp(0x1b), 0x01, // i32x4.extract_lane 1 (padded opcode) - ]); - assert.eq(instantiateAndRun(bytes), 22); -} - -// Test 6: i8x16.shuffle (opcode 0x0d) with padded LEB128 -// Shuffle: take first 4 bytes from second vector, then first 12 from first. -{ - const shuffleImm = [ - 0x10, 0x11, 0x12, 0x13, // lanes 16-19 (from second vector) - 0x00, 0x01, 0x02, 0x03, // lanes 0-3 (from first vector) - 0x04, 0x05, 0x06, 0x07, // lanes 4-7 (from first vector) - 0x08, 0x09, 0x0a, 0x0b, // lanes 8-11 (from first vector) - ]; - const bytes = buildModule([ - ...v128ConstI32x4(0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d), - ...v128ConstI32x4(0x44434241, 0x48474645, 0x4c4b4a49, 0x504f4e4d), - ...paddedSIMDOp(0x0d), ...shuffleImm, // i8x16.shuffle (padded opcode) - 0xfd, 0x1b, 0x00, // i32x4.extract_lane 0 (normal encoding) - ]); - // First 4 bytes of result = bytes 16-19 = first 4 bytes of second vector = 0x44434241 - assert.eq(instantiateAndRun(bytes), 0x44434241); -} diff --git a/JSTests/wasm/stress/atomic-cmpxchg-large-offset.js b/JSTests/wasm/stress/atomic-cmpxchg-large-offset.js deleted file mode 100644 index 3fe46252c1c2..000000000000 --- a/JSTests/wasm/stress/atomic-cmpxchg-large-offset.js +++ /dev/null @@ -1,116 +0,0 @@ -//@ skip if $architecture != "arm64" && $architecture != "x86_64" - -import * as assert from "../assert.js"; -import { instantiate } from "../wabt-wrapper.js"; - -// Test atomic cmpxchg with offset >= 128 to exercise the IPInt slow path. -// When offset >= 128 the LEB128 encoding is multi-byte (first byte >= 0x80), -// causing loadStoreMakePointerFast to fall through to the slow path. -// This specifically tests that expected and replacement are not swapped -// in the slow path's push/pop save/restore sequence. - -let wat = ` -(module - (memory 1 1 shared) - (export "memory" (memory 0)) - - (func (export "i32_cmpxchg") (param i32 i32 i32) (result i32) - (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw.cmpxchg offset=128)) - (func (export "i64_cmpxchg") (param i32 i64 i64) (result i64) - (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw.cmpxchg offset=128)) - (func (export "i32_cmpxchg8") (param i32 i32 i32) (result i32) - (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw8.cmpxchg_u offset=128)) - (func (export "i32_cmpxchg16") (param i32 i32 i32) (result i32) - (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw16.cmpxchg_u offset=128)) - (func (export "i64_cmpxchg8") (param i32 i64 i64) (result i64) - (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw8.cmpxchg_u offset=128)) - (func (export "i64_cmpxchg16") (param i32 i64 i64) (result i64) - (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw16.cmpxchg_u offset=128)) - (func (export "i64_cmpxchg32") (param i32 i64 i64) (result i64) - (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw32.cmpxchg_u offset=128)) -) -`; - -async function test() { - const instance = await instantiate(wat, {}, { threads: true }); - const e = instance.exports; - const mem = new DataView(e.memory.buffer); - - // The effective address is param0 + 128. We use param0=0 so effective addr=128. - const addr = 0; - const effectiveAddr = 128; - - for (let i = 0; i < wasmTestLoopCount; i++) { - // i32.atomic.rmw.cmpxchg - success: expected matches memory - mem.setInt32(effectiveAddr, 10, true); - assert.eq(e.i32_cmpxchg(addr, 10, 42), 10); // returns old value - assert.eq(mem.getInt32(effectiveAddr, true), 42); // memory updated to replacement - - // i32.atomic.rmw.cmpxchg - failure: expected doesn't match memory - mem.setInt32(effectiveAddr, 10, true); - assert.eq(e.i32_cmpxchg(addr, 99, 42), 10); // returns old value (no match) - assert.eq(mem.getInt32(effectiveAddr, true), 10); // memory unchanged - - // i64.atomic.rmw.cmpxchg - success - mem.setBigInt64(effectiveAddr, 10n, true); - assert.eq(e.i64_cmpxchg(addr, 10n, 42n), 10n); - assert.eq(mem.getBigInt64(effectiveAddr, true), 42n); - - // i64.atomic.rmw.cmpxchg - failure - mem.setBigInt64(effectiveAddr, 10n, true); - assert.eq(e.i64_cmpxchg(addr, 99n, 42n), 10n); - assert.eq(mem.getBigInt64(effectiveAddr, true), 10n); - - // i32.atomic.rmw8.cmpxchg_u - success - mem.setUint8(effectiveAddr, 10); - assert.eq(e.i32_cmpxchg8(addr, 10, 42), 10); - assert.eq(mem.getUint8(effectiveAddr), 42); - - // i32.atomic.rmw8.cmpxchg_u - failure - mem.setUint8(effectiveAddr, 10); - assert.eq(e.i32_cmpxchg8(addr, 99, 42), 10); - assert.eq(mem.getUint8(effectiveAddr), 10); - - // i32.atomic.rmw16.cmpxchg_u - success - mem.setUint16(effectiveAddr, 10, true); - assert.eq(e.i32_cmpxchg16(addr, 10, 42), 10); - assert.eq(mem.getUint16(effectiveAddr, true), 42); - - // i32.atomic.rmw16.cmpxchg_u - failure - mem.setUint16(effectiveAddr, 10, true); - assert.eq(e.i32_cmpxchg16(addr, 99, 42), 10); - assert.eq(mem.getUint16(effectiveAddr, true), 10); - - // i64.atomic.rmw8.cmpxchg_u - success - mem.setUint8(effectiveAddr, 10); - assert.eq(e.i64_cmpxchg8(addr, 10n, 42n), 10n); - assert.eq(mem.getUint8(effectiveAddr), 42); - - // i64.atomic.rmw8.cmpxchg_u - failure - mem.setUint8(effectiveAddr, 10); - assert.eq(e.i64_cmpxchg8(addr, 99n, 42n), 10n); - assert.eq(mem.getUint8(effectiveAddr), 10); - - // i64.atomic.rmw16.cmpxchg_u - success - mem.setUint16(effectiveAddr, 10, true); - assert.eq(e.i64_cmpxchg16(addr, 10n, 42n), 10n); - assert.eq(mem.getUint16(effectiveAddr, true), 42); - - // i64.atomic.rmw16.cmpxchg_u - failure - mem.setUint16(effectiveAddr, 10, true); - assert.eq(e.i64_cmpxchg16(addr, 99n, 42n), 10n); - assert.eq(mem.getUint16(effectiveAddr, true), 10); - - // i64.atomic.rmw32.cmpxchg_u - success - mem.setUint32(effectiveAddr, 10, true); - assert.eq(e.i64_cmpxchg32(addr, 10n, 42n), 10n); - assert.eq(mem.getUint32(effectiveAddr, true), 42); - - // i64.atomic.rmw32.cmpxchg_u - failure - mem.setUint32(effectiveAddr, 10, true); - assert.eq(e.i64_cmpxchg32(addr, 99n, 42n), 10n); - assert.eq(mem.getUint32(effectiveAddr, true), 10); - } -} - -await assert.asyncTest(test()); diff --git a/JSTests/wasm/stress/memory64-atomics.js b/JSTests/wasm/stress/memory64-atomics.js deleted file mode 100644 index 7b43297dc453..000000000000 --- a/JSTests/wasm/stress/memory64-atomics.js +++ /dev/null @@ -1,169 +0,0 @@ -//@ skip if $addressBits <= 32 -//@ runDefaultWasm("-m", "--useWasmMemory64=1", "--useOMGJIT=0") -import { instantiate } from "../wabt-wrapper.js"; -import * as assert from "../assert.js"; - -// Test atomic operations with memory64 and a constant offset. -// Uses memory64 (i64 addresses) with shared memory and offset=256 to exercise -// the 64-bit offset metadata path in IPInt. - -// First, test non-atomic load/store with memory64 to verify basic memory64 works. -{ - let wat = ` - (module - (memory (export "memory") i64 1) - - (func (export "i32_store") (param i64 i32) (local.get 0) (local.get 1) (i32.store offset=256)) - (func (export "i32_load") (param i64) (result i32) (local.get 0) (i32.load offset=256)) - (func (export "i64_store") (param i64 i64) (local.get 0) (local.get 1) (i64.store offset=256)) - (func (export "i64_load") (param i64) (result i64) (local.get 0) (i64.load offset=256)) - ) - `; - - const instance = await instantiate(wat, {}, { memory64: true }); - const e = instance.exports; - - for (let i = 0; i < wasmTestLoopCount; i++) { - e.i32_store(0n, 0x12345678); - assert.eq(e.i32_load(0n), 0x12345678); - - e.i64_store(0n, 0x123456789ABCDEF0n); - assert.eq(e.i64_load(0n), 0x123456789ABCDEF0n); - - e.i32_store(8n, 42); - assert.eq(e.i32_load(8n), 42); - - // Test with a larger base address - e.i32_store(128n, 0xDEADBEEF); - assert.eq(e.i32_load(128n), 0xDEADBEEF | 0); - } -} - -// Test atomics with shared memory (memory32) and a large offset to exercise -// the offset metadata path. -{ - const offset = 256; - - let wat = ` - (module - (memory (export "memory") 1 1 shared) - - ;; Non-atomic helpers (for setup/verification) - (func (export "i32_store") (param i32 i32) (local.get 0) (local.get 1) (i32.store offset=${offset})) - (func (export "i32_load") (param i32) (result i32) (local.get 0) (i32.load offset=${offset})) - (func (export "i64_store") (param i32 i64) (local.get 0) (local.get 1) (i64.store offset=${offset})) - (func (export "i64_load") (param i32) (result i64) (local.get 0) (i64.load offset=${offset})) - - ;; Atomic loads with offset - (func (export "test_i32_atomic_load") (param i32) (result i32) (local.get 0) (i32.atomic.load offset=${offset})) - (func (export "test_i64_atomic_load") (param i32) (result i64) (local.get 0) (i64.atomic.load offset=${offset})) - (func (export "test_i32_atomic_load8_u") (param i32) (result i32) (local.get 0) (i32.atomic.load8_u offset=${offset})) - (func (export "test_i32_atomic_load16_u") (param i32) (result i32) (local.get 0) (i32.atomic.load16_u offset=${offset})) - (func (export "test_i64_atomic_load8_u") (param i32) (result i64) (local.get 0) (i64.atomic.load8_u offset=${offset})) - (func (export "test_i64_atomic_load16_u") (param i32) (result i64) (local.get 0) (i64.atomic.load16_u offset=${offset})) - (func (export "test_i64_atomic_load32_u") (param i32) (result i64) (local.get 0) (i64.atomic.load32_u offset=${offset})) - - ;; Atomic stores with offset - (func (export "test_i32_atomic_store") (param i32 i32) (local.get 0) (local.get 1) (i32.atomic.store offset=${offset})) - (func (export "test_i64_atomic_store") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store offset=${offset})) - (func (export "test_i32_atomic_store8") (param i32 i32) (local.get 0) (local.get 1) (i32.atomic.store8 offset=${offset})) - (func (export "test_i32_atomic_store16") (param i32 i32) (local.get 0) (local.get 1) (i32.atomic.store16 offset=${offset})) - (func (export "test_i64_atomic_store8") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store8 offset=${offset})) - (func (export "test_i64_atomic_store16") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store16 offset=${offset})) - (func (export "test_i64_atomic_store32") (param i32 i64) (local.get 0) (local.get 1) (i64.atomic.store32 offset=${offset})) - - ;; Atomic RMW add with offset - (func (export "test_i32_atomic_rmw_add") (param i32 i32) (result i32) (local.get 0) (local.get 1) (i32.atomic.rmw.add offset=${offset})) - (func (export "test_i64_atomic_rmw_add") (param i32 i64) (result i64) (local.get 0) (local.get 1) (i64.atomic.rmw.add offset=${offset})) - - ;; Atomic RMW cmpxchg with offset - (func (export "test_i32_atomic_rmw_cmpxchg") (param i32 i32 i32) (result i32) (local.get 0) (local.get 1) (local.get 2) (i32.atomic.rmw.cmpxchg offset=${offset})) - (func (export "test_i64_atomic_rmw_cmpxchg") (param i32 i64 i64) (result i64) (local.get 0) (local.get 1) (local.get 2) (i64.atomic.rmw.cmpxchg offset=${offset})) - - ;; Atomic notify with offset - (func (export "test_memory_atomic_notify") (param i32 i32) (result i32) (local.get 0) (local.get 1) (memory.atomic.notify offset=${offset})) - ) - `; - - const instance = await instantiate(wat, {}, { threads: true }); - const e = instance.exports; - - function clear() { - e.i64_store(0, 0n); - e.i64_store(8, 0n); - } - - for (let i = 0; i < wasmTestLoopCount; i++) { - // === Atomic Loads === - clear(); - e.i64_store(0, 0x7766554433221142n); - - assert.eq(e.test_i32_atomic_load(0), 0x33221142); - assert.eq(e.test_i64_atomic_load(0), 0x7766554433221142n); - assert.eq(e.test_i32_atomic_load8_u(0), 0x42); - assert.eq(e.test_i32_atomic_load16_u(0), 0x1142); - assert.eq(e.test_i64_atomic_load8_u(0), 0x42n); - assert.eq(e.test_i64_atomic_load16_u(0), 0x1142n); - assert.eq(e.test_i64_atomic_load32_u(0), 0x33221142n); - - // === Atomic Stores === - clear(); - e.test_i32_atomic_store(0, 0x12345678); - assert.eq(e.i32_load(0), 0x12345678); - - clear(); - e.test_i64_atomic_store(0, 0x123456789ABCDEF0n); - assert.eq(e.i64_load(0), 0x123456789ABCDEF0n); - - clear(); - e.test_i32_atomic_store8(0, 0x42); - assert.eq(e.i32_load(0), 0x42); - - clear(); - e.test_i32_atomic_store16(0, 0x1234); - assert.eq(e.i32_load(0), 0x1234); - - clear(); - e.test_i64_atomic_store8(0, 0x42n); - assert.eq(e.i64_load(0), 0x42n); - - clear(); - e.test_i64_atomic_store16(0, 0x1234n); - assert.eq(e.i64_load(0), 0x1234n); - - clear(); - e.test_i64_atomic_store32(0, 0x12345678n); - assert.eq(e.i64_load(0), 0x12345678n); - - // === Atomic RMW add === - clear(); - e.i32_store(0, 10); - assert.eq(e.test_i32_atomic_rmw_add(0, 5), 10); // returns old value - assert.eq(e.i32_load(0), 15); - - clear(); - e.i64_store(0, 100n); - assert.eq(e.test_i64_atomic_rmw_add(0, 50n), 100n); - assert.eq(e.i64_load(0), 150n); - - // === Atomic RMW cmpxchg === - clear(); - e.i32_store(0, 42); - assert.eq(e.test_i32_atomic_rmw_cmpxchg(0, 42, 99), 42); // match: swap - assert.eq(e.i32_load(0), 99); - - clear(); - e.i32_store(0, 42); - assert.eq(e.test_i32_atomic_rmw_cmpxchg(0, 0, 99), 42); // no match: no swap - assert.eq(e.i32_load(0), 42); - - clear(); - e.i64_store(0, 42n); - assert.eq(e.test_i64_atomic_rmw_cmpxchg(0, 42n, 99n), 42n); - assert.eq(e.i64_load(0), 99n); - - // === memory.atomic.notify === - clear(); - assert.eq(e.test_memory_atomic_notify(0, 1), 0); // no waiters - } -} diff --git a/JSTests/wasm/stress/wide-arithmetic.js b/JSTests/wasm/stress/wide-arithmetic.js deleted file mode 100644 index 10cd88f54f1d..000000000000 --- a/JSTests/wasm/stress/wide-arithmetic.js +++ /dev/null @@ -1,684 +0,0 @@ -//@ requireOptions("--useWasmWideArithmetic=1") -import * as assert from '../assert.js'; - -// Helper: wasm i64 values are returned as signed BigInt in JS. -// u() converts to BigInt. Use string-form for large values to avoid Number precision loss. -const u = (v) => BigInt(v); - -// ============================================================================ -// Main module (binary format, since wabt doesn't know about wide arithmetic) -// ============================================================================ - -// Module with 4 exported functions: -// i64.add128(i64,i64,i64,i64) -> (i64,i64) -// i64.sub128(i64,i64,i64,i64) -> (i64,i64) -// i64.mul_wide_s(i64,i64) -> (i64,i64) -// i64.mul_wide_u(i64,i64) -> (i64,i64) -const mainBytes = new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, // magic - 0x01, 0x00, 0x00, 0x00, // version - - // type section - 0x01, 0x11, - 0x02, // 2 types - 0x60, // type0 = func - 0x04, 0x7e, 0x7e, 0x7e, 0x7e, // 4 i64 params - 0x02, 0x7e, 0x7e, // 2 i64 results - 0x60, // type1 = func - 0x02, 0x7e, 0x7e, // 2 i64 params - 0x02, 0x7e, 0x7e, // 2 i64 results - - // function section - 0x03, 0x05, - 0x04, // 4 functions - 0x00, 0x00, 0x01, 0x01, // types: 0, 0, 1, 1 - - // export section - 0x07, 0x3d, - 0x04, // 4 exports - 0x0a, 0x69, 0x36, 0x34, 0x2e, 0x61, 0x64, 0x64, 0x31, 0x32, 0x38, 0x00, 0x00, // "i64.add128" func 0 - 0x0a, 0x69, 0x36, 0x34, 0x2e, 0x73, 0x75, 0x62, 0x31, 0x32, 0x38, 0x00, 0x01, // "i64.sub128" func 1 - 0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x73, 0x00, 0x02, // "i64.mul_wide_s" func 2 - 0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x75, 0x00, 0x03, // "i64.mul_wide_u" func 3 - - // code section - 0x0a, 0x2d, - 0x04, // 4 functions - - // function 0: i64.add128 - 0x0c, // byte length = 12 - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0x20, 0x02, // local.get 2 - 0x20, 0x03, // local.get 3 - 0xfc, 0x13, // i64.add128 - 0x0b, // end - - // function 1: i64.sub128 - 0x0c, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0x20, 0x02, // local.get 2 - 0x20, 0x03, // local.get 3 - 0xfc, 0x14, // i64.sub128 - 0x0b, // end - - // function 2: i64.mul_wide_s - 0x08, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0xfc, 0x15, // i64.mul_wide_s - 0x0b, // end - - // function 3: i64.mul_wide_u - 0x08, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0xfc, 0x16, // i64.mul_wide_u - 0x0b, // end -]); - -function testMain() { - const module = new WebAssembly.Module(mainBytes); - const instance = new WebAssembly.Instance(module); - const add128 = instance.exports["i64.add128"]; - const sub128 = instance.exports["i64.sub128"]; - const mul_wide_s = instance.exports["i64.mul_wide_s"]; - const mul_wide_u = instance.exports["i64.mul_wide_u"]; - - let r; - - for (let iteration = 0; iteration < wasmTestLoopCount; ++iteration) { - - // ==================================================================== - // Simple addition tests - // ==================================================================== - - r = add128(0n, 0n, 0n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = add128(0n, 1n, 1n, 0n); - assert.eq(r[0], 1n); - assert.eq(r[1], 1n); - - r = add128(1n, 0n, -1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 1n); - - r = add128(1n, 1n, -1n, -1n); - assert.eq(r[0], 0n); - assert.eq(r[1], 1n); - - // ==================================================================== - // Simple subtraction tests - // ==================================================================== - - r = sub128(0n, 0n, 0n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = sub128(0n, 0n, 1n, 0n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1n); - - r = sub128(0n, 1n, 1n, 1n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1n); - - r = sub128(0n, 0n, 1n, 1n); - assert.eq(r[0], -1n); - assert.eq(r[1], -2n); - - // ==================================================================== - // Simple mul_wide tests - // ==================================================================== - - r = mul_wide_s(0n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(0n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_s(1n, 1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, 1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-1n, -1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-1n, 1n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1n); - - r = mul_wide_u(-1n, 1n); - assert.eq(r[0], -1n); - assert.eq(r[1], 0n); - - // ==================================================================== - // 20 randomly generated test cases for i64.add128 - // ==================================================================== - - r = add128(-2418420703207364752n, -1n, -1n, -1n); - assert.eq(r[0], -2418420703207364753n); - assert.eq(r[1], -1n); - - r = add128(0n, 0n, -4579433644172935106n, -1n); - assert.eq(r[0], -4579433644172935106n); - assert.eq(r[1], -1n); - - r = add128(0n, 0n, 1n, -1n); - assert.eq(r[0], 1n); - assert.eq(r[1], -1n); - - r = add128(1n, 0n, 1n, 0n); - assert.eq(r[0], 2n); - assert.eq(r[1], 0n); - - r = add128(-1n, -1n, -1n, -1n); - assert.eq(r[0], -2n); - assert.eq(r[1], -1n); - - r = add128(0n, -1n, 1n, 0n); - assert.eq(r[0], 1n); - assert.eq(r[1], -1n); - - r = add128(0n, 0n, 0n, -1n); - assert.eq(r[0], 0n); - assert.eq(r[1], -1n); - - r = add128(1n, 0n, -1n, -1n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = add128(0n, 6184727276166606191n, 0n, 1n); - assert.eq(r[0], 0n); - assert.eq(r[1], 6184727276166606192n); - - r = add128(-8434911321912688222n, -1n, 1n, -1n); - assert.eq(r[0], -8434911321912688221n); - assert.eq(r[1], -2n); - - r = add128(1n, -1n, 0n, -1n); - assert.eq(r[0], 1n); - assert.eq(r[1], -2n); - - r = add128(1n, -5148941131328838092n, 0n, 0n); - assert.eq(r[0], 1n); - assert.eq(r[1], -5148941131328838092n); - - r = add128(1n, 1n, 1n, 0n); - assert.eq(r[0], 2n); - assert.eq(r[1], 1n); - - r = add128(-1n, -1n, -3636740005180858631n, -1n); - assert.eq(r[0], -3636740005180858632n); - assert.eq(r[1], -1n); - - r = add128(-5529682780229988275n, -1n, 0n, 0n); - assert.eq(r[0], -5529682780229988275n); - assert.eq(r[1], -1n); - - r = add128(1n, -5381447440966559717n, 1020031372481336745n, 1n); - assert.eq(r[0], 1020031372481336746n); - assert.eq(r[1], -5381447440966559716n); - - r = add128(1n, 1n, 0n, 0n); - assert.eq(r[0], 1n); - assert.eq(r[1], 1n); - - r = add128(-9133888546939907356n, -1n, 1n, 1n); - assert.eq(r[0], -9133888546939907355n); - assert.eq(r[1], 0n); - - r = add128(-4612047512704241719n, -1n, 0n, -1n); - assert.eq(r[0], -4612047512704241719n); - assert.eq(r[1], -2n); - - r = add128(414720966820876428n, -1n, 1n, 0n); - assert.eq(r[0], 414720966820876429n); - assert.eq(r[1], -1n); - - // ==================================================================== - // 20 randomly generated test cases for i64.sub128 - // ==================================================================== - - r = sub128(0n, -2459085471354756766n, -9151153060221070927n, -1n); - assert.eq(r[0], 9151153060221070927n); - assert.eq(r[1], -2459085471354756766n); - - r = sub128(4566502638724063423n, -4282658540409485563n, -6884077310018979971n, -1n); - assert.eq(r[0], -6996164124966508222n); - assert.eq(r[1], -4282658540409485563n); - - r = sub128(1n, 3118380319444903041n, 0n, 3283115686417695443n); - assert.eq(r[0], 1n); - assert.eq(r[1], -164735366972792402n); - - r = sub128(-7208415241680161810n, -1n, 1n, 0n); - assert.eq(r[0], -7208415241680161811n); - assert.eq(r[1], -1n); - - r = sub128(0n, 3944850126731328706n, 1n, 1n); - assert.eq(r[0], -1n); - assert.eq(r[1], 3944850126731328704n); - - r = sub128(1n, -1n, -1n, -1n); - assert.eq(r[0], 2n); - assert.eq(r[1], -1n); - - r = sub128(-1n, -1n, 4855833073346115923n, -6826437637438999645n); - assert.eq(r[0], -4855833073346115924n); - assert.eq(r[1], 6826437637438999644n); - - r = sub128(1n, 0n, -1n, -1n); - assert.eq(r[0], 2n); - assert.eq(r[1], 0n); - - r = sub128(1n, 0n, 1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = sub128(-1n, -1n, 0n, 0n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1n); - - r = sub128(1n, -1n, -6365475388498096428n, -1n); - assert.eq(r[0], 6365475388498096429n); - assert.eq(r[1], -1n); - - r = sub128(6804238617560992346n, -1n, 0n, -1n); - assert.eq(r[0], 6804238617560992346n); - assert.eq(r[1], 0n); - - r = sub128(0n, 1n, 1n, -7756145513466453619n); - assert.eq(r[0], -1n); - assert.eq(r[1], 7756145513466453619n); - - r = sub128(1n, -1n, 1n, 1n); - assert.eq(r[0], 0n); - assert.eq(r[1], -2n); - - r = sub128(0n, 1n, 1n, 0n); - assert.eq(r[0], -1n); - assert.eq(r[1], 0n); - - r = sub128(1n, 5602881641763648953n, -2110589244314239080n, -1n); - assert.eq(r[0], 2110589244314239081n); - assert.eq(r[1], 5602881641763648953n); - - r = sub128(0n, 1n, -1n, -1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 1n); - - r = sub128(0n, -1n, 3553816990259121806n, -2105235417856431622n); - assert.eq(r[0], -3553816990259121806n); - assert.eq(r[1], 2105235417856431620n); - - r = sub128(1861102705894987245n, 1n, 3713781778534059871n, 1n); - assert.eq(r[0], -1852679072639072626n); - assert.eq(r[1], -1n); - - r = sub128(0n, -1n, 1n, 1832524486821761762n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1832524486821761764n); - - // ==================================================================== - // 20 randomly generated test cases for i64.mul_wide_s - // ==================================================================== - - r = mul_wide_s(1n, 1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_s(0n, 6287758211025156705n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-6643537319803451357n, 1n); - assert.eq(r[0], -6643537319803451357n); - assert.eq(r[1], -1n); - - r = mul_wide_s(-2483565146858803428n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_s(1n, 1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-3838951433439430085n, 3471602925362676030n); - assert.eq(r[0], 5186941893001237834n); - assert.eq(r[1], -722475195264825124n); - - r = mul_wide_s(-8262495286814853129n, 7883241869666573970n); - assert.eq(r[0], -8557189786755031842n); - assert.eq(r[1], -3530988912334554469n); - - r = mul_wide_s(4278371902407959701n, 1n); - assert.eq(r[0], 4278371902407959701n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-8852706149487089182n, -1n); - assert.eq(r[0], 8852706149487089182n); - assert.eq(r[1], 0n); - - r = mul_wide_s(1n, -1n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1n); - - r = mul_wide_s(-1n, -4329244561838653387n); - assert.eq(r[0], 4329244561838653387n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-1n, -1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_s(697896157315764057n, 1n); - assert.eq(r[0], 697896157315764057n); - assert.eq(r[1], 0n); - - r = mul_wide_s(1n, 1n); - assert.eq(r[0], 1n); - assert.eq(r[1], 0n); - - r = mul_wide_s(-1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_s(0n, -3769664482072947073n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_s(1n, 8414291037346403854n); - assert.eq(r[0], 8414291037346403854n); - assert.eq(r[1], 0n); - - r = mul_wide_s(1n, -1n); - assert.eq(r[0], -1n); - assert.eq(r[1], -1n); - - r = mul_wide_s(5014655679779318485n, -5080037812563681985n); - assert.eq(r[0], 2842857627777395563n); - assert.eq(r[1], -1380983027057486843n); - - r = mul_wide_s(0n, 1n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - // ==================================================================== - // 20 randomly generated test cases for i64.mul_wide_u - // ==================================================================== - - r = mul_wide_u(-4734436040338162711n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(3270597527173764279n, 6636648075495406358n); - assert.eq(r[0], -5430303818902260550n); - assert.eq(r[1], 1176674035141685826n); - - r = mul_wide_u(-7771814344630108151n, 1n); - assert.eq(r[0], -7771814344630108151n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, -7864138787704962081n); - assert.eq(r[0], -7864138787704962081n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, 518555141550256010n); - assert.eq(r[0], 518555141550256010n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, -1n); - assert.eq(r[0], -1n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1118900477321231571n, -1n); - assert.eq(r[0], -1118900477321231571n); - assert.eq(r[1], 1118900477321231570n); - - r = mul_wide_u(-1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(-5586890671027490027n, 1n); - assert.eq(r[0], -5586890671027490027n); - assert.eq(r[1], 0n); - - r = mul_wide_u(0n, 3603850799751152505n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(-1n, -1n); - assert.eq(r[0], 1n); - assert.eq(r[1], -2n); - - r = mul_wide_u(0n, 1n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(-7344082851774441644n, 3896439839137544024n); - assert.eq(r[0], 5738542512914895072n); - assert.eq(r[1], 2345175459296971666n); - - r = mul_wide_u(0n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(616395976148874061n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - r = mul_wide_u(2810729703362889816n, -1n); - assert.eq(r[0], -2810729703362889816n); - assert.eq(r[1], 2810729703362889815n); - - r = mul_wide_u(1n, -1n); - assert.eq(r[0], -1n); - assert.eq(r[1], 0n); - - r = mul_wide_u(1n, 0n); - assert.eq(r[0], 0n); - assert.eq(r[1], 0n); - - } // end wasmTestLoopCount loop -} - -testMain(); - -// ============================================================================ -// Overlong binary encoding module -// ============================================================================ - -function testOverlongEncoding() { - // This module uses overlong LEB128 encodings for each wide arithmetic - // instruction's opcode, which must be accepted per the spec. - const bytes = new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, // magic: \0asm - 0x01, 0x00, 0x00, 0x00, // version: 1 - - // type section, 17 bytes - 0x01, 0x11, - 0x02, // 2 types - 0x60, // type0 = function - 0x04, 0x7e, 0x7e, 0x7e, 0x7e, // 4 params - all i64 - 0x02, 0x7e, 0x7e, // 2 results - both i64 - 0x60, // type1 = function - 0x02, 0x7e, 0x7e, // 2 params - both i64 - 0x02, 0x7e, 0x7e, // 2 results - both i64 - - // function section, 5 bytes - 0x03, 0x05, - 0x04, // 4 functions - 0x00, 0x00, 0x01, 0x01, // types: 0, 0, 1, 1 - - // export section, 0x3d bytes - 0x07, 0x3d, - 0x04, // 4 exports - 0x0a, 0x69, 0x36, 0x34, 0x2e, 0x61, 0x64, 0x64, 0x31, 0x32, 0x38, 0x00, 0x00, // "i64.add128" func 0 - 0x0a, 0x69, 0x36, 0x34, 0x2e, 0x73, 0x75, 0x62, 0x31, 0x32, 0x38, 0x00, 0x01, // "i64.sub128" func 1 - 0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x73, 0x00, 0x02, // "i64.mul_wide_s" func 2 - 0x0e, 0x69, 0x36, 0x34, 0x2e, 0x6d, 0x75, 0x6c, 0x5f, 0x77, 0x69, 0x64, 0x65, 0x5f, 0x75, 0x00, 0x03, // "i64.mul_wide_u" func 3 - - // code section - 0x0a, 0x37, - 0x04, // 4 functions - - // function 0: i64.add128 with overlong encoding (0xfc 0x93 0x80 0x00) - 0x0e, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0x20, 0x02, // local.get 2 - 0x20, 0x03, // local.get 3 - 0xfc, 0x93, 0x80, 0x00, // i64.add128 (overlong) - 0x0b, // end - - // function 1: i64.sub128 with overlong encoding (0xfc 0x94 0x00) - 0x0d, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0x20, 0x02, // local.get 2 - 0x20, 0x03, // local.get 3 - 0xfc, 0x94, 0x00, // i64.sub128 (overlong) - 0x0b, // end - - // function 2: i64.mul_wide_s with overlong encoding (0xfc 0x95 0x80 0x80 0x80 0x00) - 0x0c, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0xfc, 0x95, 0x80, 0x80, 0x80, 0x00, // i64.mul_wide_s (overlong) - 0x0b, // end - - // function 3: i64.mul_wide_u with overlong encoding (0xfc 0x96 0x80 0x80 0x00) - 0x0b, // byte length - 0x00, // no locals - 0x20, 0x00, // local.get 0 - 0x20, 0x01, // local.get 1 - 0xfc, 0x96, 0x80, 0x80, 0x00, // i64.mul_wide_u (overlong) - 0x0b, // end - ]); - - const module = new WebAssembly.Module(bytes); - const instance = new WebAssembly.Instance(module); - const add128 = instance.exports["i64.add128"]; - const sub128 = instance.exports["i64.sub128"]; - const mul_wide_s = instance.exports["i64.mul_wide_s"]; - const mul_wide_u = instance.exports["i64.mul_wide_u"]; - - let r; - - for (let iteration = 0; iteration < wasmTestLoopCount; ++iteration) { - r = add128(1n, 2n, 3n, 4n); - assert.eq(r[0], 4n); - assert.eq(r[1], 6n); - - r = sub128(2n, 5n, 1n, 2n); - assert.eq(r[0], 1n); - assert.eq(r[1], 3n); - - r = mul_wide_s(1n, -2n); - assert.eq(r[0], -2n); - assert.eq(r[1], -1n); - - r = mul_wide_u(3n, 2n); - assert.eq(r[0], 6n); - assert.eq(r[1], 0n); - } // end wasmTestLoopCount loop -} - -testOverlongEncoding(); - -// ============================================================================ -// assert_invalid tests: type mismatches (binary format) -// ============================================================================ - -// Helper: build a minimal wasm module with one function -// typeParams/typeResults are arrays of wasm valtype bytes (0x7e = i64) -// bodyLocals is the local.get sequence, opcode is the wide arith opcode -function makeInvalidModule(typeParams, typeResults, bodyGetCount, opcodeBytes) { - // Type section - const funcType = [0x60, typeParams.length, ...typeParams, typeResults.length, ...typeResults]; - const typeSection = [0x01, funcType.length + 1, 0x01, ...funcType]; - - // Function section - const funcSection = [0x03, 0x02, 0x01, 0x00]; - - // Code section - const bodyGets = []; - for (let i = 0; i < bodyGetCount; i++) - bodyGets.push(0x20, i); - const bodyContent = [0x00, ...bodyGets, ...opcodeBytes, 0x0b]; - const codeSection = [0x0a, bodyContent.length + 2, 0x01, bodyContent.length, ...bodyContent]; - - return new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, - ...typeSection, ...funcSection, ...codeSection - ]); -} - -const i64 = 0x7e; - -// i64.add128: too few results (1 instead of 2) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64, i64, i64, i64], [i64], 4, [0xfc, 0x13]) -), WebAssembly.CompileError, ""); - -// i64.add128: too few params (3 instead of 4) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64, i64, i64], [i64, i64], 3, [0xfc, 0x13]) -), WebAssembly.CompileError, ""); - -// i64.sub128: too few results (1 instead of 2) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64, i64, i64, i64], [i64], 4, [0xfc, 0x14]) -), WebAssembly.CompileError, ""); - -// i64.sub128: too few params (3 instead of 4) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64, i64, i64], [i64, i64], 3, [0xfc, 0x14]) -), WebAssembly.CompileError, ""); - -// i64.mul_wide_s: too few results (1 instead of 2) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64, i64], [i64], 2, [0xfc, 0x15]) -), WebAssembly.CompileError, ""); - -// i64.mul_wide_s: too few params (1 instead of 2) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64], [i64, i64], 1, [0xfc, 0x15]) -), WebAssembly.CompileError, ""); - -// i64.mul_wide_u: too few results (1 instead of 2) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64, i64], [i64], 2, [0xfc, 0x16]) -), WebAssembly.CompileError, ""); - -// i64.mul_wide_u: too few params (1 instead of 2) -assert.throws(() => new WebAssembly.Module( - makeInvalidModule([i64], [i64, i64], 1, [0xfc, 0x16]) -), WebAssembly.CompileError, ""); - diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h index 0edcc51ae075..35ec179dfb35 100644 --- a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h +++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h @@ -299,16 +299,6 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler { add64AndSetFlags(imm, dest, dest); } - void add64AndSetFlags(RegisterID a, RegisterID b, RegisterID dest) - { - m_assembler.add<64, S>(dest, a, b); - } - - void addCarry64(RegisterID a, RegisterID b, RegisterID dest) - { - m_assembler.adc<64>(dest, a, b); - } - void add64(TrustedImm64 imm, RegisterID dest) { add64(imm, dest, dest); @@ -1607,16 +1597,6 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler { } } - void sub64AndSetFlags(RegisterID a, RegisterID b, RegisterID dest) - { - m_assembler.sub<64, S>(dest, a, b); - } - - void subBorrow64(RegisterID a, RegisterID b, RegisterID dest) - { - m_assembler.sbc<64>(dest, a, b); - } - void urshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest) { m_assembler.lsr<32>(dest, src, shiftAmount); diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h b/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h index 84750f015d59..a20d0e9a4cd3 100644 --- a/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h +++ b/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h @@ -5067,17 +5067,7 @@ class MacroAssemblerX86_64 : public AbstractMacroAssembler { { m_assembler.addq_rr(src, dest); } - - void addCarry64(RegisterID src, RegisterID dest) - { - m_assembler.adcq_rr(src, dest); - } - - void subBorrow64(RegisterID src, RegisterID dest) - { - m_assembler.sbbq_rr(src, dest); - } - + void add64(Address src, RegisterID dest) { m_assembler.addq_mr(src.offset, src.base, dest); diff --git a/Source/JavaScriptCore/assembler/X86Assembler.h b/Source/JavaScriptCore/assembler/X86Assembler.h index ac8c02ee61d9..4c4d0e2f2412 100644 --- a/Source/JavaScriptCore/assembler/X86Assembler.h +++ b/Source/JavaScriptCore/assembler/X86Assembler.h @@ -175,8 +175,6 @@ class X86Assembler { OP_OR_GvEv = 0x0B, OP_OR_EAXIv = 0x0D, OP_2BYTE_ESCAPE = 0x0F, - OP_ADC_EvGv = 0x11, - OP_SBB_EvGv = 0x19, OP_AND_EvGb = 0x20, OP_AND_EvGv = 0x21, OP_AND_GvEv = 0x23, @@ -507,7 +505,6 @@ class X86Assembler { GROUP1_OP_ADD = 0, GROUP1_OP_OR = 1, GROUP1_OP_ADC = 2, - GROUP1_OP_SBB = 3, GROUP1_OP_AND = 4, GROUP1_OP_SUB = 5, GROUP1_OP_XOR = 6, @@ -1400,16 +1397,6 @@ class X86Assembler { m_formatter.oneByteOp64(OP_SUB_EvGv, src, dst); } - void adcq_rr(RegisterID src, RegisterID dst) - { - m_formatter.oneByteOp64(OP_ADC_EvGv, src, dst); - } - - void sbbq_rr(RegisterID src, RegisterID dst) - { - m_formatter.oneByteOp64(OP_SBB_EvGv, src, dst); - } - void subq_mr(int offset, RegisterID base, RegisterID dst) { m_formatter.oneByteOp64(OP_SUB_GvEv, dst, base, offset); diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm index a4307e69f32c..74697f1525a1 100644 --- a/Source/JavaScriptCore/llint/InPlaceInterpreter.asm +++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.asm @@ -50,12 +50,11 @@ # - MC: (Metadata Counter) IPInt's metadata pointer. This records the corresponding position in generated metadata. # - WI: (Wasm Instance) pointer to the current JSWebAssemblyInstance object. This is used for accessing # function-specific data (callee-save). +# - PL: (Pointer to Locals) pointer to the address of local 0 in the current function. This is used for accessing +# locals quickly. # - MB: (Memory Base) pointer to the current Wasm memory base address (callee-save). # - BC: (Bounds Check) the size of the current Wasm memory region, for bounds checking (callee-save). # -# Locals are accessed at a constant offset from CFR: -# local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize -# # Finally, we provide four "sc" (safe for call) registers which are guaranteed to not overlap with argument # registers (sc0, sc1, sc2, sc3) @@ -68,6 +67,7 @@ const alignMInt = constexpr JSC::IPInt::alignMInt if ARM64 or ARM64E const PC = csr7 const MC = csr6 + const PL = t6 # Wasm Pinned Registers const WI = csr0 @@ -81,6 +81,7 @@ if ARM64 or ARM64E elsif X86_64 const PC = csr2 const MC = csr1 + const PL = t5 # Wasm Pinned Registers const WI = csr0 @@ -94,6 +95,7 @@ elsif X86_64 elsif RISCV64 const PC = csr7 const MC = csr6 + const PL = csr10 # Wasm Pinned Registers const WI = csr0 @@ -107,6 +109,7 @@ elsif RISCV64 elsif ARMv7 const PC = csr1 const MC = t6 + const PL = t7 # Wasm Pinned Registers const WI = csr0 @@ -120,6 +123,7 @@ elsif ARMv7 else const PC = invalidGPR const MC = invalidGPR + const PL = invalidGPR # Wasm Pinned Registers const WI = invalidGPR @@ -167,9 +171,6 @@ const WasmToJSIPIntReturnPCSlot = constexpr Wasm::WasmToJSIPIntReturnPCSlot const IPIntCalleeSaveSpaceAsVirtualRegisters = constexpr Wasm::numberOfIPIntCalleeSaveRegisters + constexpr Wasm::numberOfIPIntInternalRegisters const IPIntCalleeSaveSpaceStackAligned = (IPIntCalleeSaveSpaceAsVirtualRegisters * SlotSize + StackAlignment - 1) & ~StackAlignmentMask -# Offset from CFR to local[0]: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize -const IPIntLocalsBaseOffset = IPIntCalleeSaveSpaceStackAligned + LocalSize - # Must match GPRInfo.h if X86_64 const NumberOfWasmArgumentGPRs = 6 @@ -235,112 +236,32 @@ macro advanceMCByReg(amount) addp amount, MC end -macro decodeLEBVarUInt(dst, cursor, scratch1, scratch2) - loadb [cursor], dst - addp 1, cursor - bbb dst, 0x80, .done - andq 0x7f, dst - move 7, scratch1 - validateOpcodeConfig(scratch2) -.loop: - loadb [cursor], scratch2 - addp 1, cursor - bbb scratch2, 0x80, .lastByte - andq 0x7f, scratch2 - lshiftq scratch1, scratch2 - orq scratch2, dst - addq 7, scratch1 - jmp .loop -.lastByte: - # bit 7 already 0, no AND needed - lshiftq scratch1, scratch2 - orq scratch2, dst -.done: -end - -macro decodeLEBVarSInt32(dst, cursor, scratch1, scratch2) - loadb [cursor], dst - addp 1, cursor - bbb dst, 0x80, .singleByte - andq 0x7f, dst - move 7, scratch1 - validateOpcodeConfig(scratch2) -.loop: - loadb [cursor], scratch2 - addp 1, cursor - bbb scratch2, 0x80, .lastByte - andq 0x7f, scratch2 - lshiftq scratch1, scratch2 - orq scratch2, dst - addq 7, scratch1 - jmp .loop -.lastByte: - # bit 7 already 0, no AND needed - # Check sign bit (0x40) BEFORE shifting - btiz scratch2, 0x40, .noSignExtend - lshiftq scratch1, scratch2 - ori scratch2, dst # Ensure output is always upper zero-cleared. - addq 7, scratch1 - # sign extend if shift < 32 - bigteq scratch1, 32, .done - move -1, scratch2 - lshiftq scratch1, scratch2 - ori scratch2, dst # Ensure output is always upper zero-cleared. - jmp .done -.noSignExtend: - lshiftq scratch1, scratch2 - ori scratch2, dst # Ensure output is always upper zero-cleared. - jmp .done -.singleByte: - lshifti 25, dst - rshifti 25, dst -.done: -end - -macro decodeLEBVarSInt64(dst, cursor, scratch1, scratch2) - loadb [cursor], dst - addp 1, cursor - bbb dst, 0x80, .singleByte - andq 0x7f, dst +macro decodeLEBVarUInt32(offset, dst, scratch1, scratch2, scratch3, scratch4) + # if it's a single byte, fastpath it + const tempPC = scratch4 + leap offset[PC], tempPC + loadb [tempPC], dst + + bbb dst, 0x80, .fastpath + # otherwise, set up for second iteration + # next shift is 7 move 7, scratch1 + # take off high bit + subi 0x80, dst validateOpcodeConfig(scratch2) .loop: - loadb [cursor], scratch2 - addp 1, cursor - bbb scratch2, 0x80, .lastByte - andq 0x7f, scratch2 - lshiftq scratch1, scratch2 - orq scratch2, dst - addq 7, scratch1 - jmp .loop -.lastByte: - # bit 7 already 0, no AND needed - # Check sign bit (0x40) BEFORE shifting - btiz scratch2, 0x40, .noSignExtend - lshiftq scratch1, scratch2 - orq scratch2, dst - addq 7, scratch1 - # sign extend if shift < 64 - bigteq scratch1, 64, .done - move -1, scratch2 - lshiftq scratch1, scratch2 - orq scratch2, dst - jmp .done -.noSignExtend: - lshiftq scratch1, scratch2 - orq scratch2, dst - jmp .done -.singleByte: - lshiftq 57, dst - rshiftq 57, dst -.done: -end - -macro skipLEB128(cursor, scratch) -.loop: - loadb [cursor], scratch - addp 1, cursor - bbaeq scratch, 0x80, .loop + addp 1, tempPC + loadb [tempPC], scratch2 + # scratch3 = high bit 7 + # leave scratch2 with low bits 6-0 + move 0x80, scratch3 + andi scratch2, scratch3 + xori scratch3, scratch2 + lshifti scratch1, scratch2 + addi 7, scratch1 + ori scratch2, dst + bbneq scratch3, 0, .loop +.fastpath: end macro checkStackOverflow(callee, scratch) @@ -386,6 +307,12 @@ macro instructionLabel(instrname) _ipint%instrname%: end +macro slowPathLabel(instrname) + aligned _ipint%instrname%_slow_path_validate alignIPInt + _ipint%instrname%_slow_path_validate: + _ipint%instrname%_slow_path: +end + macro unimplementedInstruction(instrname) instructionLabel(instrname) validateOpcodeConfig(a0) @@ -458,14 +385,18 @@ macro operationCall(fn) move wasmInstance, a0 push PC, MC if ARM64 or ARM64E - # Save ws0 with padding for 16-byte alignment (PC+MC=16, ws0+pad=16, total=32) - subp MachineRegisterSize * 2, sp - storep ws0, [sp] + push PL, ws0 + elsif X86_64 + push PL + # preserve 16 byte alignment. + subq MachineRegisterSize, sp end fn() if ARM64 or ARM64E - loadp [sp], ws0 - addp MachineRegisterSize * 2, sp + pop ws0, PL + elsif X86_64 + addq MachineRegisterSize, sp + pop PL end pop MC, PC end @@ -477,8 +408,11 @@ macro operationCallMayThrowImpl(fn, sizeOfExtraRegistersPreserved) move wasmInstance, a0 push PC, MC if ARM64 or ARM64E - # Save ws0 with padding for 16-byte alignment (PC+MC=16, ws0+ws0=16, total=32) - push ws0, ws0 + push PL, ws0 + elsif X86_64 + push PL + # preserve 16 byte alignment. + subq MachineRegisterSize, sp end fn() bpneq r1, (constexpr JSC::IPInt::SlowPathExceptionTag), .continuation @@ -488,15 +422,15 @@ macro operationCallMayThrowImpl(fn, sizeOfExtraRegistersPreserved) move cfr, a1 move sp, a2 operationCall(macro() cCall3(_ipint_extern_handle_debugger_trap_if_needed) end) - addp sizeOfExtraRegistersPreserved + (4 * MachineRegisterSize), sp - elsif X86_64 - addp sizeOfExtraRegistersPreserved + (2 * MachineRegisterSize), sp end + addp sizeOfExtraRegistersPreserved + (4 * MachineRegisterSize), sp jmp _wasm_throw_from_slow_path_trampoline .continuation: if ARM64 or ARM64E - loadp [sp], ws0 - addp MachineRegisterSize * 2, sp + pop ws0, PL + elsif X86_64 + addq MachineRegisterSize, sp + pop PL end pop MC, PC end @@ -576,7 +510,7 @@ if JIT and not ARMv7 move PC, a2 # Add 1 to the index due to WTF::UncheckedKeyHashMap not supporting 0 as a key addq 1, a2 - move sp, a3 + move PL, a3 operationCall(macro() cCall4(_ipint_extern_loop_osr) end) btpz r1, .recover restoreIPIntRegisters() @@ -1269,9 +1203,9 @@ end macro handleDebuggerTrapIfNeeded() push PC, MC - push ws0, ws0 # sp[0]=ws0 (unused), sp[1]=ws0 (IPIntCallee*), sp[2]=PC, sp[3]=MC + push PL, ws0 # sp[0]=PL, sp[1]=ws0 (IPIntCallee*), sp[2]=PC, sp[3]=MC move cfr, a1 - move sp, a2 # a2 = pointer to saved [ws0, ws0, PC, MC] + move sp, a2 # a2 = pointer to saved [PL, ws0, PC, MC] operationCall(macro() cCall3(_ipint_extern_handle_debugger_trap_if_needed) end) addp 4 * MachineRegisterSize, sp end @@ -1320,7 +1254,7 @@ end) op(wasm_throw_from_fault_handler_trampoline_reg_instance, macro () # enableWasmDebugger disables BBQ/OMG, so this trampoline is only # reached from IPInt when the debugger is active. The signal handler only patches - # the machine PC, so IPInt registers (PC, MC, ws0, cfr) are still live. + # the machine PC, so IPInt registers (PC, MC, PL, ws0, cfr) are still live. # Exception type comes from instance->m_exception; copy to CFR slot for handle_debugger_trap_if_needed. loadi JSWebAssemblyInstance::m_exception[wasmInstance], t0 storei t0, ArgumentCountIncludingThis + PayloadOffset[cfr] @@ -1375,6 +1309,7 @@ end operationCall(macro() cCall2(_ipint_extern_prepare_function_body) end) move r0, ws0 + move sp, PL loadp Wasm::IPIntCallee::m_bytecode[ws0], PC loadp Wasm::IPIntCallee::m_metadata + VectorBufferOffset[ws0], MC @@ -1423,9 +1358,18 @@ end loadp Wasm::IPIntCallee::m_metadata + VectorBufferOffset[ws0], t1 addp t1, MC - # Recompute SP from catch metadata. [MC] contains localSizeToAlloc + stackValues. - # Add rethrowSlots to get the total frame size below callee-save space. - loadi Wasm::IPIntCallee::m_numRethrowSlotsToAlloc[ws0], t1 + # Recompute PL + if ARM64 or ARM64E + loadpairi Wasm::IPIntCallee::m_localSizeToAlloc[ws0], t0, t1 + else + loadi Wasm::IPIntCallee::m_numRethrowSlotsToAlloc[ws0], t1 + loadi Wasm::IPIntCallee::m_localSizeToAlloc[ws0], t0 + end + addp t1, t0 + mulp LocalSize, t0 + addp IPIntCalleeSaveSpaceStackAligned, t0 + subp cfr, t0, PL + loadi [MC], t0 addp t1, t0 mulp StackValueSize, t0 @@ -1448,7 +1392,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64) move cfr, a1 move sp, a2 - operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end) + move PL, a3 + operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end) ipintReloadMemory() advanceMC(4) @@ -1464,7 +1409,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64) move cfr, a1 move 0, a2 - operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end) + move PL, a3 + operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end) ipintReloadMemory() advanceMC(4) @@ -1482,7 +1428,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) move cfr, a1 move sp, a2 - operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end) + move PL, a3 + operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end) ipintReloadMemory() advanceMC(4) @@ -1500,7 +1447,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) move cfr, a1 move sp, a2 - operationCall(macro() cCall3(_ipint_extern_retrieve_clear_and_push_exception_and_arguments) end) + move PL, a3 + operationCall(macro() cCall4(_ipint_extern_retrieve_clear_and_push_exception_and_arguments) end) ipintReloadMemory() advanceMC(4) @@ -1518,7 +1466,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) move cfr, a1 move 0, a2 - operationCall(macro() cCall3(_ipint_extern_retrieve_and_clear_exception) end) + move PL, a3 + operationCall(macro() cCall4(_ipint_extern_retrieve_and_clear_exception) end) ipintReloadMemory() advanceMC(4) @@ -1536,7 +1485,8 @@ if WEBASSEMBLY and (ARM64 or ARM64E or X86_64 or ARMv7) move cfr, a1 move sp, a2 - operationCall(macro() cCall3(_ipint_extern_retrieve_clear_and_push_exception) end) + move PL, a3 + operationCall(macro() cCall4(_ipint_extern_retrieve_clear_and_push_exception) end) ipintReloadMemory() advanceMC(4) diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp b/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp index e513fe49d189..a14c01138994 100644 --- a/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp +++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.cpp @@ -63,6 +63,7 @@ do { \ #define VALIDATE_IPINT_SIMD_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_simd_v128_load_mem_validate, alignIPInt, opcode, name) #define VALIDATE_IPINT_ATOMIC_OPCODE(opcode, name) VALIDATE_IPINT_ATOMIC_OPCODE_FROM_BASE(ipint_memory_atomic_notify_atomic_validate, alignAtomicIPInt, opcode, name) #define VALIDATE_IPINT_ARGUMINT_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_argumINT_a0_validate, alignArgumInt, opcode, name) +#define VALIDATE_IPINT_SLOW_PATH(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_local_get_slow_path_validate, alignIPInt, opcode, name) #define VALIDATE_IPINT_MINT_CALL_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_mint_a0_validate, alignMInt, opcode, name) #define VALIDATE_IPINT_MINT_RETURN_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_mint_r0_validate, alignMInt, opcode, name) #define VALIDATE_IPINT_UINT_OPCODE(opcode, name) VALIDATE_IPINT_OPCODE_FROM_BASE(ipint_uint_r0_validate, alignUInt, opcode, name) @@ -91,6 +92,7 @@ void initialize() FOR_EACH_IPINT_ATOMIC_OPCODE(VALIDATE_IPINT_ATOMIC_OPCODE); FOR_EACH_IPINT_ARGUMINT_OPCODE(VALIDATE_IPINT_ARGUMINT_OPCODE); + FOR_EACH_IPINT_SLOW_PATH(VALIDATE_IPINT_SLOW_PATH); FOR_EACH_IPINT_MINT_CALL_OPCODE(VALIDATE_IPINT_MINT_CALL_OPCODE); FOR_EACH_IPINT_MINT_RETURN_OPCODE(VALIDATE_IPINT_MINT_RETURN_OPCODE); FOR_EACH_IPINT_UINT_OPCODE(VALIDATE_IPINT_UINT_OPCODE); diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter.h b/Source/JavaScriptCore/llint/InPlaceInterpreter.h index 7e3e2793710b..6d6d95024df9 100644 --- a/Source/JavaScriptCore/llint/InPlaceInterpreter.h +++ b/Source/JavaScriptCore/llint/InPlaceInterpreter.h @@ -711,6 +711,11 @@ extern "C" void SYSV_ABI ipint_entry(); m(0x11, argumINT_stack_vector) \ m(0x12, argumINT_end) \ +#define FOR_EACH_IPINT_SLOW_PATH(m) \ + m(0x00, local_get_slow_path) \ + m(0x01, local_set_slow_path) \ + m(0x02, local_tee_slow_path) \ + #define FOR_EACH_IPINT_MINT_CALL_OPCODE(m) \ m(0x00, mint_a0) \ m(0x01, mint_a1) \ @@ -790,6 +795,7 @@ FOR_EACH_IPINT_CONVERSION_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); FOR_EACH_IPINT_SIMD_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); FOR_EACH_IPINT_ATOMIC_OPCODE(IPINT_ATOMIC_VALIDATE_DEFINE_FUNCTION); FOR_EACH_IPINT_ARGUMINT_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); +FOR_EACH_IPINT_SLOW_PATH(IPINT_VALIDATE_DEFINE_FUNCTION); FOR_EACH_IPINT_MINT_CALL_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); FOR_EACH_IPINT_MINT_RETURN_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); FOR_EACH_IPINT_UINT_OPCODE(IPINT_VALIDATE_DEFINE_FUNCTION); diff --git a/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm b/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm index 0d2a20cd338e..653b6a343537 100644 --- a/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm +++ b/Source/JavaScriptCore/llint/InPlaceInterpreter64.asm @@ -180,21 +180,15 @@ macro ipintEntry() end mulp LocalSize, argumINTEnd mulp LocalSize, argumINTTmp - # Allocate locals first (closest to CFR) + subp argumINTEnd, sp + move sp, argumINTEnd subp argumINTTmp, sp move sp, argumINTDsp - # Allocate rethrow slots below locals - subp argumINTEnd, sp - # argumINTEnd = boundary for zero-init loop. Handlers write [argumINTDst] then subp, - # so after localSizeToAlloc handlers, argumINTDst = argumINTDsp - LocalSize. - move argumINTDsp, argumINTEnd - subp LocalSize, argumINTEnd loadp Wasm::IPIntCallee::m_argumINTBytecode + VectorBufferOffset[ws0], MC push argumINTTmp, argumINTDst, argumINTSrc, argumINTEnd - # Start writing at local[0] = CFR - IPIntLocalsBaseOffset, going downward - leap -IPIntLocalsBaseOffset[cfr], argumINTDst + move argumINTDsp, argumINTDst leap FirstArgumentOffset[cfr], argumINTSrc validateOpcodeConfig(argumINTTmp) @@ -216,7 +210,7 @@ end end macro argumINTInitializeDefaultLocals() - # zero out remaining locals (argumINTDst moves downward toward argumINTEnd) + # zero out remaining locals bpeq argumINTDst, argumINTEnd, .ipint_entry_finish_zero loadb [MC], argumINTTmp addp 1, MC @@ -229,7 +223,7 @@ elsif X86_64 storep argumINTTmp, [argumINTDst] storep 0, 8[argumINTDst] end - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst end macro argumINTFinish() @@ -241,7 +235,7 @@ end ############################# ipintOp(_unreachable, macro() - jmp _ipint_throw_Unreachable + handleDebuggerTrapIfNeededAndThrowWasmTrap(Unreachable) end) ipintOp(_nop, macro() @@ -362,14 +356,15 @@ ipintOp(_rethrow, macro() copyCalleeSavesToEntryFrameCalleeSavesBuffer(t0) move cfr, a1 - loadi IPInt::RethrowMetadata::tryDepth[MC], a2 - operationCall(macro() cCall3(_ipint_extern_rethrow_exception) end) + move PL, a2 + loadi IPInt::RethrowMetadata::tryDepth[MC], a3 + operationCall(macro() cCall4(_ipint_extern_rethrow_exception) end) jumpToException() end) ipintOp(_throw_ref, macro() popQuad(a2) - bieq a2, ValueNull, _ipint_throw_NullExnrefReference + bieq a2, ValueNull, .throw_null_ref saveCallSiteIndex() @@ -380,6 +375,9 @@ ipintOp(_throw_ref, macro() move cfr, a1 operationCall(macro() cCall3(_ipint_extern_throw_ref) end) jumpToException() + +.throw_null_ref: + handleDebuggerTrapIfNeededAndThrowWasmTrap(NullExnrefReference) end) macro uintDispatch() @@ -745,10 +743,9 @@ end) ################################### macro localGetPostDecode() - # Index into locals: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize + # Index into locals mulq LocalSize, t0 - subp cfr, t0, t0 - loadv -IPIntLocalsBaseOffset[t0], v0 + loadv [PL, t0], v0 # Push to stack pushVec(v0) nextIPIntInstruction() @@ -758,17 +755,16 @@ ipintOp(_local_get, macro() # local.get loadb 1[PC], t0 advancePC(2) - bbaeq t0, 0x80, .ipint_local_get_slow_path + bbaeq t0, 128, _ipint_local_get_slow_path localGetPostDecode() end) macro localSetPostDecode() # Pop from stack popVec(v0) - # Store to locals: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize + # Store to locals mulq LocalSize, t0 - subp cfr, t0, t0 - storev v0, -IPIntLocalsBaseOffset[t0] + storev v0, [PL, t0] nextIPIntInstruction() end @@ -776,17 +772,16 @@ ipintOp(_local_set, macro() # local.set loadb 1[PC], t0 advancePC(2) - bbaeq t0, 0x80, .ipint_local_set_slow_path + bbaeq t0, 128, _ipint_local_set_slow_path localSetPostDecode() end) macro localTeePostDecode() # Load from stack loadv [sp], v0 - # Store to locals: local[i] = CFR - IPIntLocalsBaseOffset - i * LocalSize + # Store to locals mulq LocalSize, t0 - subp cfr, t0, t0 - storev v0, -IPIntLocalsBaseOffset[t0] + storev v0, [PL, t0] nextIPIntInstruction() end @@ -794,7 +789,7 @@ ipintOp(_local_tee, macro() # local.tee loadb 1[PC], t0 advancePC(2) - bbaeq t0, 0x80, .ipint_local_tee_slow_path + bbaeq t0, 128, _ipint_local_tee_slow_path localTeePostDecode() end) @@ -867,40 +862,44 @@ end) ipintOp(_table_get, macro() # Load pre-computed index from metadata - loadi IPInt::TableAccessMetadata::index[MC], a1 + loadi IPInt::Const32Metadata::value[MC], a1 popInt32(a2) operationCallMayThrow(macro() cCall3(_ipint_extern_table_get) end) pushQuad(r0) - loadb IPInt::TableAccessMetadata::instructionLength[MC], t0 + loadb IPInt::Const32Metadata::instructionLength[MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::TableAccessMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_table_set, macro() # Load pre-computed index from metadata - loadi IPInt::TableAccessMetadata::index[MC], a1 + loadi IPInt::Const32Metadata::value[MC], a1 popQuad(a3) popInt32(a2) operationCallMayThrow(macro() cCall4(_ipint_extern_table_set) end) - loadb IPInt::TableAccessMetadata::instructionLength[MC], t0 + loadb IPInt::Const32Metadata::instructionLength[MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::TableAccessMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) reservedOpcode(0x27) -macro popMemoryIndex(reg) - popInt64(reg) # Note that popInt32 and popInt64 are same implementation. - btbnz JSWebAssemblyInstance::m_cachedIsMemory64[wasmInstance], .done - zxi2q reg, reg +macro popMemoryIndex(reg, tmp) + loadb JSWebAssemblyInstance::m_cachedIsMemory64[wasmInstance], tmp + btiz tmp, .memory32 + popInt64(reg) + jmp .done +.memory32: + popInt32(reg) + ori 0, reg .done: end @@ -910,52 +909,33 @@ macro baddpc(src, dst, label) bpb dst, src, label # unsigned overflow check end +macro memoryOpAdvanceMCAndMakePointer(instrLenReg, wasmAddrReg, size, scratch, scratch2) + # overwrites wasmAddrReg with computed pointer -macro loadStoreMakePointerFast(alignAccess, offsetAccess, wasmAddrReg, size, scratch, scratch2, slowLabel) - # overwrites wasmAddrReg with computed pointer. - # Fast path: alignment byte < 0x40 (single-byte, no multi-memory), - # and offset byte < 0x80 (single-byte). Memory index is 0. - # alignAccess/offsetAccess are memory access patterns for the memarg bytes. - # For non-SIMD: pass (1[PC], 2[PC]). For SIMD: pass ([t4], 1[t4]). - - # Check alignment byte: if >= 0x40, it's multi-memory or unusual alignment - loadb alignAccess, scratch2 # alignment/flags byte - bbaeq scratch2, 0x40, slowLabel - loadb offsetAccess, scratch # offset byte - bbaeq scratch, 0x80, slowLabel - - # Both single-byte, memory index = 0. scratch = offset value. - baddpc(scratch, wasmAddrReg, _ipint_throw_OutOfBoundsMemoryAccess) - move size - 1, scratch2 - baddpc(wasmAddrReg, scratch2, _ipint_throw_OutOfBoundsMemoryAccess) - - bpaeq scratch2, boundsCheckingSize, _ipint_throw_OutOfBoundsMemoryAccess # scratch2 contains wasm address + size - 1 - addp memoryBase, wasmAddrReg -end + loadb JSWebAssemblyInstance::m_cachedIsMemory64[wasmInstance], scratch + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata -# Note: wasmAddrReg (t0) is set by the handler's popMemoryIndex before branching here. -# For store ops, the data register (t3 for int, ft0 for float) is also set by the handler. -macro loadStoreMakePointerSlow(cursor, wasmAddrReg, size, scratch, scratch2, decodeScratch1, decodeScratch2) - # 1. Decode flags/alignment, check multi-memory bit - decodeLEBVarUInt(scratch, cursor, decodeScratch1, decodeScratch2) + btiz scratch, .memory32 + loadq memoryIndexSize + IPInt::Const64Metadata::value[MC], instrLenReg # reuse instrLenReg to store offset + baddpc(instrLenReg, wasmAddrReg, .outOfBounds) # wasmAddrReg contains address + offset + loadb memoryIndexSize + IPInt::Const64Metadata::instructionLength[MC], instrLenReg + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], scratch # scratch contains memory index now + advanceMC(memoryIndexSize + sizeof IPInt::Const64Metadata) + jmp .commonMemoryCalculations - # 2. If multi-memory, decode memory index; otherwise 0 - btiz scratch, 0x40, .memoryIndex0 - decodeLEBVarUInt(scratch, cursor, decodeScratch1, decodeScratch2) - jmp .decodeOffset -.memoryIndex0: - move 0, scratch +.memory32: + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], instrLenReg # reuse instrLenReg to store offset + baddpc(instrLenReg, wasmAddrReg, .outOfBounds) # wasmAddrReg contains address + offset + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], instrLenReg + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], scratch # scratch contains memory index now + advanceMC(memoryIndexSize + sizeof IPInt::Const32Metadata) -.decodeOffset: - # 3. Decode offset - decodeLEBVarUInt(scratch2, cursor, decodeScratch1, decodeScratch2) - - baddpc(scratch2, wasmAddrReg, _ipint_throw_OutOfBoundsMemoryAccess) +.commonMemoryCalculations: move size - 1, scratch2 - baddpc(wasmAddrReg, scratch2, _ipint_throw_OutOfBoundsMemoryAccess) + baddpc(wasmAddrReg, scratch2, .outOfBounds) btinz scratch, .memoryIsNotZero - bpaeq scratch2, boundsCheckingSize, _ipint_throw_OutOfBoundsMemoryAccess # scratch2 contains wasm address + size - 1 + bpaeq scratch2, boundsCheckingSize, .outOfBounds # scratch2 contains wasm address + size - 1 addp memoryBase, wasmAddrReg jmp .done @@ -963,183 +943,199 @@ macro loadStoreMakePointerSlow(cursor, wasmAddrReg, size, scratch, scratch2, dec mulp constexpr (sizeof(JSWebAssemblyInstance::WasmMemoryBaseAndSize)), scratch # FIXME: it's probably worth trying to use a loadpair here, but that requires a separate x86 codepath loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0) + sizeof(void*))) [wasmInstance, scratch], scratch2 # bounds checking size - subp size - 1, scratch2 # wasmAddrReg + (size-1) >= scratch2 is equivalent to wasmAddrReg >= scratch2 - (size-1) - bpaeq wasmAddrReg, scratch2, _ipint_throw_OutOfBoundsMemoryAccess + bpaeq wasmAddrReg, scratch2, .outOfBounds loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0))) [wasmInstance, scratch], scratch2 # memory base addp scratch2, wasmAddrReg + jmp .done + +.outOfBounds: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess) .done: end ipintOp(_i32_load_mem, macro() # i32.load # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i32_load_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) # load memory location loadi [t0], t1 pushInt32(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load_mem, macro() # i32.load # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_i64_load_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) # load memory location loadq [t0], t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_f32_load_mem, macro() # f32.load # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_f32_load_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) # load memory location loadf [t0], ft0 pushFloat32(ft0) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_f64_load_mem, macro() # f64.load # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_f64_load_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) # load memory location loadd [t0], ft0 pushFloat64(ft0) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i32_load8s_mem, macro() # i32.load8_s # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i32_load8s_mem_slow_path) - loadbsi [t0], t1 + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + loadb [t0], t1 + sxb2i t1, t1 pushInt32(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i32_load8u_mem, macro() # i32.load8_u # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i32_load8u_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + # load memory location loadb [t0], t1 pushInt32(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i32_load16s_mem, macro() # i32.load16_s # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i32_load16s_mem_slow_path) - loadhsi [t0], t1 + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + # load memory location + loadh [t0], t1 + sxh2i t1, t1 pushInt32(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i32_load16u_mem, macro() # i32.load16_u # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i32_load16u_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + # load memory location loadh [t0], t1 pushInt32(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load8s_mem, macro() # i64.load8_s # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i64_load8s_mem_slow_path) - loadbsq [t0], t1 + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + # load memory location + loadb [t0], t1 + sxb2q t1, t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load8u_mem, macro() # i64.load8_u # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i64_load8u_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + # load memory location loadb [t0], t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load16s_mem, macro() # i64.load16_s # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i64_load16s_mem_slow_path) - loadhsq [t0], t1 + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + # load memory location + loadh [t0], t1 + sxh2q t1, t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load16u_mem, macro() # i64.load16_u # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i64_load16u_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + # load memory location loadh [t0], t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load32s_mem, macro() # i64.load32_s # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i64_load32s_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + # load memory location loadi [t0], t1 sxi2q t1, t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_i64_load32u_mem, macro() # i64.load8_s # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i64_load32u_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + # load memory location loadi [t0], t1 pushInt64(t1) - advancePC(3) + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1148,10 +1144,12 @@ ipintOp(_i32_store_mem, macro() # pop data popInt32(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i32_store_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + # load memory location storei t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1160,10 +1158,12 @@ ipintOp(_i64_store_mem, macro() # pop data popInt64(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_i64_store_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + # load memory location storeq t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1172,10 +1172,12 @@ ipintOp(_f32_store_mem, macro() # pop data popFloat32(ft0) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_f32_store_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + # load memory location storef ft0, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1184,10 +1186,12 @@ ipintOp(_f64_store_mem, macro() # pop data popFloat64(ft0) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 8, t1, t2, .ipint_f64_store_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + # load memory location stored ft0, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1196,10 +1200,12 @@ ipintOp(_i32_store8_mem, macro() # pop data popInt32(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i32_store8_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + # load memory location storeb t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1208,10 +1214,12 @@ ipintOp(_i32_store16_mem, macro() # pop data popInt32(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i32_store16_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + # load memory location storeh t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1220,10 +1228,12 @@ ipintOp(_i64_store8_mem, macro() # pop data popInt64(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 1, t1, t2, .ipint_i64_store8_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + # load memory location storeb t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1232,10 +1242,12 @@ ipintOp(_i64_store16_mem, macro() # pop data popInt64(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 2, t1, t2, .ipint_i64_store16_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + # load memory location storeh t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) @@ -1244,16 +1256,18 @@ ipintOp(_i64_store32_mem, macro() # pop data popInt64(t3) # pop index - popMemoryIndex(t0) - loadStoreMakePointerFast(1[PC], 2[PC], t0, 4, t1, t2, .ipint_i64_store32_mem_slow_path) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + # load memory location storei t3, [t0] - advancePC(3) + + advancePCByReg(t4) nextIPIntInstruction() end) ipintOp(_memory_size, macro() - loadb IPInt::MemorySizeMetadata::memoryIndex[MC], t0 - advanceMC(constexpr (sizeof(IPInt::MemorySizeMetadata))) + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0 + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata))) btinz t0, .callMemorySize loadp constexpr (JSWebAssemblyInstance::offsetOfCachedMemory0Size())[wasmInstance], t0 # size of memory 0 jmp .doneLoadingMemorySize @@ -1271,8 +1285,8 @@ end) ipintOp(_memory_grow, macro() popInt32(a1) - loadb IPInt::MemoryGrowMetadata::memoryIndex[MC], a2 - advanceMC(constexpr (sizeof(IPInt::MemoryGrowMetadata))) + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a2 + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata))) operationCall(macro() cCall3(_ipint_extern_memory_grow) end) pushInt32(r0) ipintReloadMemory() @@ -1285,26 +1299,38 @@ end) ################################ ipintOp(_i32_const, macro() - # i32.const - decode signed LEB128 from bytecode + # i32.const + loadb IPInt::InstructionLengthMetadata::length[MC], t1 + bigteq t1, 2, .ipint_i32_const_slowpath loadb 1[PC], t0 - bbaeq t0, 0x80, .ipint_i32_const_slow_path - # single byte: sign extend from 7 bits - lshifti 25, t0 - rshifti 25, t0 + lshiftq 7, t1 + orq t1, t0 + sxb2i t0, t0 pushInt32(t0) advancePC(2) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) + nextIPIntInstruction() +.ipint_i32_const_slowpath: + # Load pre-computed value from metadata + loadi IPInt::Const32Metadata::value[MC], t0 + # Push to stack + pushInt32(t0) + + advancePCByReg(t1) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_i64_const, macro() - # i64.const - decode signed LEB128 from bytecode - loadb 1[PC], t0 - bbaeq t0, 0x80, .ipint_i64_const_slow_path - # single byte: sign extend from 7 bits - lshiftq 57, t0 - rshiftq 57, t0 + # i64.const + # Load pre-computed value from metadata + loadq IPInt::Const64Metadata::value[MC], t0 + # Push to stack pushInt64(t0) - advancePC(2) + loadb IPInt::Const64Metadata::instructionLength[MC], t0 + + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::Const64Metadata))) nextIPIntInstruction() end) @@ -1753,10 +1779,10 @@ ipintOp(_i32_div_s, macro() # i32.div_s popInt32(t1) popInt32(t0) - btiz t1, _ipint_throw_DivisionByZero + btiz t1, .ipint_i32_div_s_throwDivisionByZero bineq t1, -1, .ipint_i32_div_s_safe - bieq t0, constexpr INT32_MIN, _ipint_throw_IntegerOverflow + bieq t0, constexpr INT32_MIN, .ipint_i32_div_s_throwIntegerOverflow .ipint_i32_div_s_safe: if X86_64 @@ -1772,13 +1798,19 @@ ipintOp(_i32_div_s, macro() pushInt32(t0) advancePC(1) nextIPIntInstruction() + +.ipint_i32_div_s_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) + +.ipint_i32_div_s_throwIntegerOverflow: + handleDebuggerTrapIfNeededAndThrowWasmTrap(IntegerOverflow) end) ipintOp(_i32_div_u, macro() # i32.div_u popInt32(t1) popInt32(t0) - btiz t1, _ipint_throw_DivisionByZero + btiz t1, .ipint_i32_div_u_throwDivisionByZero if X86_64 xori t2, t2 @@ -1791,6 +1823,9 @@ ipintOp(_i32_div_u, macro() pushInt32(t0) advancePC(1) nextIPIntInstruction() + +.ipint_i32_div_u_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) end) ipintOp(_i32_rem_s, macro() @@ -1798,7 +1833,7 @@ ipintOp(_i32_rem_s, macro() popInt32(t1) popInt32(t0) - btiz t1, _ipint_throw_DivisionByZero + btiz t1, .ipint_i32_rem_s_throwDivisionByZero bineq t1, -1, .ipint_i32_rem_s_safe bineq t0, constexpr INT32_MIN, .ipint_i32_rem_s_safe @@ -1826,13 +1861,16 @@ ipintOp(_i32_rem_s, macro() pushInt32(t2) advancePC(1) nextIPIntInstruction() + +.ipint_i32_rem_s_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) end) ipintOp(_i32_rem_u, macro() # i32.rem_u popInt32(t1) popInt32(t0) - btiz t1, _ipint_throw_DivisionByZero + btiz t1, .ipint_i32_rem_u_throwDivisionByZero if X86_64 xori t2, t2 @@ -1849,6 +1887,9 @@ ipintOp(_i32_rem_u, macro() pushInt32(t2) advancePC(1) nextIPIntInstruction() + +.ipint_i32_rem_u_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) end) ipintOp(_i32_and, macro() @@ -2010,10 +2051,10 @@ ipintOp(_i64_div_s, macro() # i64.div_s popInt64(t1) popInt64(t0) - btqz t1, _ipint_throw_DivisionByZero + btqz t1, .ipint_i64_div_s_throwDivisionByZero bqneq t1, -1, .ipint_i64_div_s_safe - bqeq t0, constexpr INT64_MIN, _ipint_throw_IntegerOverflow + bqeq t0, constexpr INT64_MIN, .ipint_i64_div_s_throwIntegerOverflow .ipint_i64_div_s_safe: if X86_64 @@ -2029,13 +2070,19 @@ ipintOp(_i64_div_s, macro() pushInt64(t0) advancePC(1) nextIPIntInstruction() + +.ipint_i64_div_s_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) + +.ipint_i64_div_s_throwIntegerOverflow: + handleDebuggerTrapIfNeededAndThrowWasmTrap(IntegerOverflow) end) ipintOp(_i64_div_u, macro() # i64.div_u popInt64(t1) popInt64(t0) - btqz t1, _ipint_throw_DivisionByZero + btqz t1, .ipint_i64_div_u_throwDivisionByZero if X86_64 xorq t2, t2 @@ -2048,6 +2095,9 @@ ipintOp(_i64_div_u, macro() pushInt64(t0) advancePC(1) nextIPIntInstruction() + +.ipint_i64_div_u_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) end) ipintOp(_i64_rem_s, macro() @@ -2055,7 +2105,7 @@ ipintOp(_i64_rem_s, macro() popInt64(t1) popInt64(t0) - btqz t1, _ipint_throw_DivisionByZero + btqz t1, .ipint_i64_rem_s_throwDivisionByZero bqneq t1, -1, .ipint_i64_rem_s_safe bqneq t0, constexpr INT64_MIN, .ipint_i64_rem_s_safe @@ -2083,13 +2133,16 @@ ipintOp(_i64_rem_s, macro() pushInt64(t2) advancePC(1) nextIPIntInstruction() + +.ipint_i64_rem_s_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) end) ipintOp(_i64_rem_u, macro() # i64.rem_u popInt64(t1) popInt64(t0) - btqz t1, _ipint_throw_DivisionByZero + btqz t1, .ipint_i64_rem_u_throwDivisionByZero if X86_64 xorq t2, t2 @@ -2106,6 +2159,9 @@ ipintOp(_i64_rem_u, macro() pushInt64(t2) advancePC(1) nextIPIntInstruction() + +.ipint_i64_rem_u_throwDivisionByZero: + handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) end) ipintOp(_i64_and, macro() @@ -2621,67 +2677,76 @@ ipintOp(_i32_trunc_f32_s, macro() popFloat32(ft0) move 0xcf000000, t0 # INT32_MIN (Note that INT32_MIN - 1.0 in float is the same as INT32_MIN in float). fi2f t0, ft1 - bfltun ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfltun ft0, ft1, .ipint_trunc_i32_f32_s_outOfBoundsTrunc move 0x4f000000, t0 # -INT32_MIN fi2f t0, ft1 - bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfgtequn ft0, ft1, .ipint_trunc_i32_f32_s_outOfBoundsTrunc truncatef2is ft0, t0 pushInt32(t0) advancePC(1) nextIPIntInstruction() +.ipint_trunc_i32_f32_s_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i32_trunc_f32_u, macro() popFloat32(ft0) move 0xbf800000, t0 # -1.0 fi2f t0, ft1 - bfltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfltequn ft0, ft1, .ipint_trunc_i32_f32_u_outOfBoundsTrunc move 0x4f800000, t0 # INT32_MIN * -2.0 fi2f t0, ft1 - bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfgtequn ft0, ft1, .ipint_trunc_i32_f32_u_outOfBoundsTrunc truncatef2i ft0, t0 pushInt32(t0) advancePC(1) nextIPIntInstruction() +.ipint_trunc_i32_f32_u_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i32_trunc_f64_s, macro() popFloat64(ft0) move 0xc1e0000000200000, t0 # INT32_MIN - 1.0 fq2d t0, ft1 - bdltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdltequn ft0, ft1, .ipint_trunc_i32_f64_s_outOfBoundsTrunc move 0x41e0000000000000, t0 # -INT32_MIN fq2d t0, ft1 - bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdgtequn ft0, ft1, .ipint_trunc_i32_f64_s_outOfBoundsTrunc truncated2is ft0, t0 pushInt32(t0) advancePC(1) nextIPIntInstruction() +.ipint_trunc_i32_f64_s_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i32_trunc_f64_u, macro() popFloat64(ft0) move 0xbff0000000000000, t0 # -1.0 fq2d t0, ft1 - bdltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdltequn ft0, ft1, .ipint_trunc_i32_f64_u_outOfBoundsTrunc move 0x41f0000000000000, t0 # INT32_MIN * -2.0 fq2d t0, ft1 - bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdgtequn ft0, ft1, .ipint_trunc_i32_f64_u_outOfBoundsTrunc truncated2i ft0, t0 pushInt32(t0) advancePC(1) nextIPIntInstruction() + +.ipint_trunc_i32_f64_u_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i64_extend_i32_s, macro() @@ -2706,68 +2771,76 @@ ipintOp(_i64_trunc_f32_s, macro() popFloat32(ft0) move 0xdf000000, t0 # INT64_MIN fi2f t0, ft1 - bfltun ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfltun ft0, ft1, .ipint_trunc_i64_f32_s_outOfBoundsTrunc move 0x5f000000, t0 # -INT64_MIN fi2f t0, ft1 - bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfgtequn ft0, ft1, .ipint_trunc_i64_f32_s_outOfBoundsTrunc truncatef2qs ft0, t0 pushInt64(t0) advancePC(1) nextIPIntInstruction() +.ipint_trunc_i64_f32_s_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i64_trunc_f32_u, macro() popFloat32(ft0) move 0xbf800000, t0 # -1.0 fi2f t0, ft1 - bfltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfltequn ft0, ft1, .ipint_i64_f32_u_outOfBoundsTrunc move 0x5f800000, t0 # INT64_MIN * -2.0 fi2f t0, ft1 - bfgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bfgtequn ft0, ft1, .ipint_i64_f32_u_outOfBoundsTrunc truncatef2q ft0, t0 pushInt64(t0) advancePC(1) nextIPIntInstruction() +.ipint_i64_f32_u_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i64_trunc_f64_s, macro() popFloat64(ft0) move 0xc3e0000000000000, t0 # INT64_MIN fq2d t0, ft1 - bdltun ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdltun ft0, ft1, .ipint_i64_f64_s_outOfBoundsTrunc move 0x43e0000000000000, t0 # -INT64_MIN fq2d t0, ft1 - bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdgtequn ft0, ft1, .ipint_i64_f64_s_outOfBoundsTrunc truncated2qs ft0, t0 pushInt64(t0) advancePC(1) nextIPIntInstruction() +.ipint_i64_f64_s_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_i64_trunc_f64_u, macro() popFloat64(ft0) move 0xbff0000000000000, t0 # -1.0 fq2d t0, ft1 - bdltequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdltequn ft0, ft1, .ipint_i64_f64_u_outOfBoundsTrunc move 0x43f0000000000000, t0 # INT64_MIN * -2.0 fq2d t0, ft1 - bdgtequn ft0, ft1, _ipint_throw_OutOfBoundsTrunc + bdgtequn ft0, ft1, .ipint_i64_f64_u_outOfBoundsTrunc truncated2q ft0, t0 pushInt64(t0) advancePC(1) nextIPIntInstruction() +.ipint_i64_f64_u_outOfBoundsTrunc: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) end) ipintOp(_f32_convert_i32_s, macro() @@ -2952,11 +3025,11 @@ reservedOpcode(0xcf) ##################### ipintOp(_ref_null_t, macro() - # Push null value, skip heap type LEB128 in bytecode - move ValueNull, t0 + loadi IPInt::Const32Metadata::value[MC], t0 pushQuad(t0) - leap 1[PC], PC - skipLEB128(PC, t0) + loadb IPInt::Const32Metadata::instructionLength[MC], t0 + advancePC(t0) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) @@ -2969,12 +3042,12 @@ ipintOp(_ref_is_null, macro() end) ipintOp(_ref_func, macro() - loadi IPInt::RefFuncMetadata::index[MC], a1 + loadi IPInt::Const32Metadata::value[MC], a1 operationCall(macro() cCall2(_ipint_extern_ref_func) end) pushQuad(r0) - loadb IPInt::RefFuncMetadata::instructionLength[MC], t0 + loadb IPInt::Const32Metadata::instructionLength[MC], t0 advancePC(t0) - advanceMC(constexpr (sizeof(IPInt::RefFuncMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) @@ -2989,9 +3062,11 @@ end) ipintOp(_ref_as_non_null, macro() loadq [sp], t0 - bqeq t0, ValueNull, _ipint_throw_NullRefAsNonNull + bqeq t0, ValueNull, .ref_as_non_null_nullRef advancePC(1) nextIPIntInstruction() +.ref_as_non_null_nullRef: + handleDebuggerTrapIfNeededAndThrowWasmTrap(NullRefAsNonNull) end) ipintOp(_br_on_null, macro() @@ -3061,17 +3136,16 @@ reservedOpcode(0xfa) # the changes should be matched in IPINT_INSTRUCTIONS in Tools/lldb/debug_ipint.py ipintOp(_gc_prefix, macro() - leap 1[PC], t4 - decodeLEBVarUInt(t0, t4, t1, t2) + decodeLEBVarUInt32(1, t0, t1, t2, t3, t4) # Security guarantee: always less than 30 (0x00 -> 0x1e) biaeq t0, 0x1f, .ipint_gc_nonexistent leap _os_script_config_storage, t1 loadp JSC::LLInt::OpcodeConfig::ipint_gc_dispatch_base[t1], t1 if ARM64 or ARM64E - addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + addlshiftp t1, t0, 8, t0 jmp t0 elsif X86_64 - lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + lshiftq 8, t0 addq t1, t0 jmp t0 end @@ -3081,17 +3155,16 @@ ipintOp(_gc_prefix, macro() end) ipintOp(_conversion_prefix, macro() - leap 1[PC], t4 - decodeLEBVarUInt(t0, t4, t1, t2) - # Security guarantee: always less than 23 (0x00 -> 0x16) - biaeq t0, 0x17, .ipint_conversion_nonexistent + decodeLEBVarUInt32(1, t0, t1, t2, t3, t4) + # Security guarantee: always less than 18 (0x00 -> 0x11) + biaeq t0, 0x12, .ipint_conversion_nonexistent leap _os_script_config_storage, t1 loadp JSC::LLInt::OpcodeConfig::ipint_conversion_dispatch_base[t1], t1 if ARM64 or ARM64E - addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + addlshiftp t1, t0, 8, t0 jmp t0 elsif X86_64 - lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + lshiftq 8, t0 addq t1, t0 jmp t0 end @@ -3101,17 +3174,16 @@ ipintOp(_conversion_prefix, macro() end) ipintOp(_simd_prefix, macro() - leap 1[PC], t4 - decodeLEBVarUInt(t0, t4, t1, t2) + decodeLEBVarUInt32(1, t0, t1, t2, t3, t4) # Security guarantee: always less than 256 (0x00 -> 0xff) biaeq t0, 0x100, .ipint_simd_nonexistent leap _os_script_config_storage, t1 loadp JSC::LLInt::OpcodeConfig::ipint_simd_dispatch_base[t1], t1 if ARM64 or ARM64E - addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + addlshiftp t1, t0, 8, t0 jmp t0 elsif X86_64 - lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignIPInt))), t0 + lshiftq 8, t0 addq t1, t0 jmp t0 end @@ -3121,17 +3193,16 @@ ipintOp(_simd_prefix, macro() end) ipintOp(_atomic_prefix, macro() - leap 1[PC], t4 - decodeLEBVarUInt(t0, t4, t1, t2) + decodeLEBVarUInt32(1, t0, t1, t2, t3, t4) # Security guarantee: always less than 78 (0x00 -> 0x4e) biaeq t0, 0x4f, .ipint_atomic_nonexistent leap _os_script_config_storage, t1 loadp JSC::LLInt::OpcodeConfig::ipint_atomic_dispatch_base[t1], t1 if ARM64 or ARM64E - addlshiftp t1, t0, (constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt))), t0 + addlshiftp t1, t0, constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt)), t0 jmp t0 elsif X86_64 - lshiftq (constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt))), t0 + lshiftq constexpr (WTF::fastLog2(JSC::IPInt::alignAtomicIPInt)), t0 addq t1, t0 jmp t0 end @@ -3352,13 +3423,16 @@ end) ipintOp(_array_len, macro() popQuad(t0) # array into t0 - bqeq t0, ValueNull, _ipint_throw_NullAccess + bqeq t0, ValueNull, .nullArray loadi JSWebAssemblyArray::m_size[t0], t0 pushInt32(t0) loadb IPInt::InstructionLengthMetadata::length[MC], t0 advancePCByReg(t0) advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() + +.nullArray: + handleDebuggerTrapIfNeededAndThrowWasmTrap(NullAccess) end) ipintOp(_array_fill, macro() @@ -3536,18 +3610,20 @@ end) ipintOp(_i31_get_s, macro() popQuad(t0) - bqeq t0, ValueNull, _ipint_throw_NullI31Get + bqeq t0, ValueNull, .i31_get_throw pushInt32(t0) loadb IPInt::InstructionLengthMetadata::length[MC], t0 advancePCByReg(t0) advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() +.i31_get_throw: + handleDebuggerTrapIfNeededAndThrowWasmTrap(NullI31Get) end) ipintOp(_i31_get_u, macro() popQuad(t0) - bqeq t0, ValueNull, _ipint_throw_NullI31Get + bqeq t0, ValueNull, .i31_get_throw andq 0x7fffffff, t0 pushInt32(t0) @@ -3555,6 +3631,8 @@ ipintOp(_i31_get_u, macro() advancePCByReg(t0) advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() +.i31_get_throw: + handleDebuggerTrapIfNeededAndThrowWasmTrap(NullI31Get) end) ############################# @@ -3834,56 +3912,60 @@ end) ipintOp(_memory_init, macro() # memory.init - loadb IPInt::MemoryInitMetadata::memoryIndex[MC], a3 + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a3 + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata))) move sp, a2 - loadi IPInt::MemoryInitMetadata::dataIndex[MC], a1 + loadi 1[MC], a1 operationCallMayThrow(macro() cCall4(_ipint_extern_memory_init) end) addq 3 * StackValueSize, sp - loadb IPInt::MemoryInitMetadata::instructionLength[MC], t0 + loadb [MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::MemoryInitMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) # xxx check nextIPIntInstruction() end) ipintOp(_data_drop, macro() # data.drop - loadi IPInt::DataAccessMetadata::index[MC], a1 + loadi 1[MC], a1 operationCall(macro() cCall2(_ipint_extern_data_drop) end) - loadb IPInt::DataAccessMetadata::instructionLength[MC], t0 + loadb [MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::DataAccessMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) # xxx check nextIPIntInstruction() end) ipintOp(_memory_copy, macro() # memory.copy - loadb IPInt::MemoryCopyMetadata::dstMemoryIndex[MC], a1 + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a1 + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata))) pushQuad(a1) - loadb IPInt::MemoryCopyMetadata::srcMemoryIndex[MC], a1 + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a1 + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata))) pushQuad(a1) move sp, a1 # starting at top of stack: src memory index, dst memory index, n, s, d operationCallMayThrow(macro() cCall2(_ipint_extern_memory_copy) end) addq 5 * StackValueSize, sp - loadb IPInt::MemoryCopyMetadata::instructionLength[MC], t0 + loadb IPInt::InstructionLengthMetadata::length[MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::MemoryCopyMetadata))) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_memory_fill, macro() # memory.fill - loadb IPInt::MemoryFillMetadata::memoryIndex[MC], a1 + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], a1 + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata))) pushQuad(a1) move sp, a1 # starting at top of stack: memory index, n, val, d operationCallMayThrow(macro() cCall2(_ipint_extern_memory_fill) end) addq 4 * StackValueSize, sp - loadb IPInt::MemoryFillMetadata::instructionLength[MC], t0 + loadb IPInt::InstructionLengthMetadata::length[MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::MemoryFillMetadata))) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -3901,11 +3983,11 @@ end) ipintOp(_elem_drop, macro() # elem.drop - loadi IPInt::ElemDropMetadata::index[MC], a1 + loadi IPInt::Const32Metadata::value[MC], a1 operationCall(macro() cCall2(_ipint_extern_elem_drop) end) - loadb IPInt::ElemDropMetadata::instructionLength[MC], t0 + loadb IPInt::Const32Metadata::instructionLength[MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::ElemDropMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) @@ -3936,12 +4018,12 @@ end) ipintOp(_table_size, macro() # table.size - loadi IPInt::TableAccessMetadata::index[MC], a1 + loadi IPInt::Const32Metadata::value[MC], a1 operationCall(macro() cCall2(_ipint_extern_table_size) end) pushQuad(r0) - loadb IPInt::TableAccessMetadata::instructionLength[MC], t0 + loadb IPInt::Const32Metadata::instructionLength[MC], t0 advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::TableAccessMetadata))) + advanceMC(constexpr (sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) @@ -3957,347 +4039,232 @@ ipintOp(_table_fill, macro() nextIPIntInstruction() end) -reservedOpcode(misc_0x12) - break - -################################## -## Wide Arithmetic Instructions ## -################################## - -ipintOp(_i64_add128, macro() - # i64.add128: [lhsLo lhsHi rhsLo rhsHi] -> [resultLo resultHi] - # Stack layout (top first): sp[0]=rhsHi, sp[1]=rhsLo, sp[2]=lhsHi, sp[3]=lhsLo - popQuad(t3) # rhsHi - popQuad(t2) # rhsLo - popQuad(t1) # lhsHi - popQuad(t0) # lhsLo - if ARM64 or ARM64E - addqs t0, t2, t0 # resultLo = lhsLo + rhsLo, sets carry flag - adcq t1, t3, t1 # resultHi = lhsHi + rhsHi + carry flag - elsif X86_64 - addq t2, t0 # resultLo = lhsLo + rhsLo, sets carry flag - adcq t3, t1 # resultHi = lhsHi + rhsHi + carry flag - end - pushQuad(t0) - pushQuad(t1) - move t4, PC - nextIPIntInstruction() -end) - -ipintOp(_i64_sub128, macro() - # i64.sub128: [lhsLo lhsHi rhsLo rhsHi] -> [resultLo resultHi] - # Stack layout (top first): sp[0]=rhsHi, sp[1]=rhsLo, sp[2]=lhsHi, sp[3]=lhsLo - popQuad(t3) # rhsHi - popQuad(t2) # rhsLo - popQuad(t1) # lhsHi - popQuad(t0) # lhsLo - if ARM64 or ARM64E - subqs t0, t2, t0 # resultLo = lhsLo - rhsLo, sets carry flag (borrow) - sbcq t1, t3, t1 # resultHi = lhsHi - rhsHi - carry flag - elsif X86_64 - subq t2, t0 # resultLo = lhsLo - rhsLo, sets carry flag (borrow) - sbcq t3, t1 # resultHi = lhsHi - rhsHi - carry flag - end - pushQuad(t0) - pushQuad(t1) - move t4, PC - nextIPIntInstruction() -end) - -ipintOp(_i64_mul_wide_s, macro() - # i64.mul_wide_s: [lhs rhs] -> [resultLo resultHi] - # Stack layout (top first): sp[0]=rhs, sp[1]=lhs - popQuad(t1) # rhs - popQuad(t0) # lhs - if ARM64 or ARM64E - smulhq t0, t1, t2 # resultHi = smulh(lhs, rhs) - must precede mulq - mulq t1, t0 # resultLo = lhs * rhs - elsif X86_64 - # t0 = rax - # t2 = rdx - smulhq t1 # imulq %rsi: rdx:rax = rax * rsi -> t0=resultLo, t2=resultHi - end - pushQuad(t0) - pushQuad(t2) - move t4, PC - nextIPIntInstruction() -end) - -ipintOp(_i64_mul_wide_u, macro() - # i64.mul_wide_u: [lhs rhs] -> [resultLo resultHi] - # Stack layout (top first): sp[0]=rhs, sp[1]=lhs - popQuad(t1) # rhs - popQuad(t0) # lhs - if ARM64 or ARM64E - umulhq t0, t1, t2 # resultHi = umulh(lhs, rhs) - must precede mulq - mulq t1, t0 # resultLo = lhs * rhs - elsif X86_64 - # t0 = rax - # t2 = rdx - umulhq t1 # mulq %rsi: rdx:rax = rax * rsi -> t0=resultLo, t2=resultHi - end - pushQuad(t0) - pushQuad(t2) - move t4, PC - nextIPIntInstruction() -end) - ####################### ## SIMD Instructions ## ####################### -const ImmLaneIdxOffset = 0 # Offset from t4 (points past the decoded SIMD opcode) +const ImmLaneIdxOffset = 2 # Offset in bytecode const ImmLaneIdx16Mask = 0xf const ImmLaneIdx8Mask = 0x7 const ImmLaneIdx4Mask = 0x3 const ImmLaneIdx2Mask = 0x1 -# Platform-specific SIMD load macros (shared between fast and slow paths). -# Input: t0 = host pointer (rax on x86_64). Output: v0 = loaded vector. -# Clobbers: ft0 (ARM64), t1 (splat ops on ARM64). - -macro simdLoad8x8s() - if ARM64 or ARM64E - loadd [t0], ft0 - emit "sxtl v16.8h, v0.8b" - elsif X86_64 - emit "pmovsxbw (%rax), %xmm0" - else - break - end -end - -macro simdLoad8x8u() - if ARM64 or ARM64E - loadd [t0], ft0 - emit "uxtl v16.8h, v0.8b" - elsif X86_64 - emit "pmovzxbw (%rax), %xmm0" - else - break - end -end - -macro simdLoad16x4s() - if ARM64 or ARM64E - loadd [t0], ft0 - emit "sxtl v16.4s, v0.4h" - elsif X86_64 - emit "pmovsxwd (%rax), %xmm0" - else - break - end -end - -macro simdLoad16x4u() - if ARM64 or ARM64E - loadd [t0], ft0 - emit "uxtl v16.4s, v0.4h" - elsif X86_64 - emit "pmovzxwd (%rax), %xmm0" - else - break - end -end - -macro simdLoad32x2s() - if ARM64 or ARM64E - loadd [t0], ft0 - emit "sxtl v16.2d, v0.2s" - elsif X86_64 - emit "pmovsxdq (%rax), %xmm0" - else - break - end -end - -macro simdLoad32x2u() - if ARM64 or ARM64E - loadd [t0], ft0 - emit "uxtl v16.2d, v0.2s" - elsif X86_64 - emit "pmovzxdq (%rax), %xmm0" - else - break - end -end - -macro simdLoadSplat8() - if ARM64 or ARM64E - loadb [t0], t1 - emit "dup v16.16b, w1" - elsif X86_64 - emit "vpinsrb $0, (%rax), %xmm0, %xmm0" - emit "vpxor %xmm1, %xmm1, %xmm1" - emit "vpshufb %xmm1, %xmm0, %xmm0" - else - break - end -end +# 0xFD 0x00 - 0xFD 0x0B: memory -macro simdLoadSplat16() - if ARM64 or ARM64E - loadh [t0], t1 - emit "dup v16.8h, w1" - elsif X86_64 - emit "vpinsrw $0, (%rax), %xmm0, %xmm0" - emit "vpshuflw $0, %xmm0, %xmm0" - emit "vpunpcklqdq %xmm0, %xmm0, %xmm0" - else - break - end -end +# Wrapper for SIMD load/store operations. Places linear address in t0 for memOp() +macro simdMemoryOp(accessSize, memOp) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, accessSize, t1, t2) -macro simdLoadSplat32() - if ARM64 or ARM64E - loadi [t0], t1 - emit "dup v16.4s, w1" - elsif X86_64 - emit "vbroadcastss (%rax), %xmm0" - else - break - end -end + # memOp must not clobber t4 + memOp() -macro simdLoadSplat64() - if ARM64 or ARM64E - loadq [t0], t1 - emit "dup v16.2d, x1" - elsif X86_64 - emit "vmovddup (%rax), %xmm0" - else - break - end + advancePCByReg(t4) + nextIPIntInstruction() end -# 0xFD 0x00 - 0xFD 0x0B: memory - ipintOp(_simd_v128_load_mem, macro() # v128.load - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 16, t1, t2, .simd_v128_load_slow_path) - loadv [t0], v0 - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + simdMemoryOp(16, macro() + loadv [t0], v0 + pushVec(v0) + end) end) ipintOp(_simd_v128_load_8x8s_mem, macro() - # v128.load8x8_s - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_8x8s_slow_path) - simdLoad8x8s() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load8x8_s - load 8 8-bit values, sign-extend each to i16 + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadd [t0], ft0 + # offlineasm ft0 = ARM v0 + # offlineasm v0 = ARM v16 + emit "sxtl v16.8h, v0.8b" + elsif X86_64 + # t0 is eax + emit "pmovsxbw (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load_8x8u_mem, macro() - # v128.load8x8_u - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_8x8u_slow_path) - simdLoad8x8u() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load8x8_u - load 8 8-bit values, zero-extend each to i16 + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadd [t0], ft0 + # offlineasm ft0 = ARM v0 + # offlineasm v0 = ARM v16 + emit "uxtl v16.8h, v0.8b" + elsif X86_64 + # t0 is eax + emit "pmovzxbw (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load_16x4s_mem, macro() - # v128.load16x4_s - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_16x4s_slow_path) - simdLoad16x4s() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load16x4_s - load 4 16-bit values, sign-extend each to i32 + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadd [t0], ft0 + # offlineasm ft0 = ARM v0 + # offlineasm v0 = ARM v16 + emit "sxtl v16.4s, v0.4h" + elsif X86_64 + # t0 is eax + emit "pmovsxwd (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load_16x4u_mem, macro() - # v128.load16x4_u - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_16x4u_slow_path) - simdLoad16x4u() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load16x4_u - load 4 16-bit values, zero-extend each to i32 + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadd [t0], ft0 + # offlineasm ft0 = ARM v0 + # offlineasm v0 = ARM v16 + emit "uxtl v16.4s, v0.4h" + elsif X86_64 + # t0 is eax + emit "pmovzxwd (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load_32x2s_mem, macro() - # v128.load32x2_s - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_32x2s_slow_path) - simdLoad32x2s() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load32x2_s - load 2 32-bit values, sign-extend each to i64 + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadd [t0], ft0 + # offlineasm ft0 = ARM v0 + # offlineasm v0 = ARM v16 + emit "sxtl v16.2d, v0.2s" + elsif X86_64 + # t0 is eax + emit "pmovsxdq (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load_32x2u_mem, macro() - # v128.load32x2_u - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load_32x2u_slow_path) - simdLoad32x2u() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load32x2_u - load 2 32-bit values, zero-extend each to i64 + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadd [t0], ft0 + # offlineasm ft0 = ARM v0 + # offlineasm v0 = ARM v16 + emit "uxtl v16.2d, v0.2s" + elsif X86_64 + # t0 is eax + emit "pmovzxdq (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load8_splat_mem, macro() - # v128.load8_splat - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .simd_v128_load8_splat_slow_path) - simdLoadSplat8() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load8_splat - load 1 8-bit value and splat to all 16 lanes + simdMemoryOp(1, macro() + if ARM64 or ARM64E + loadb [t0], t1 + emit "dup v16.16b, w1" + elsif X86_64 + # t0 is eax + emit "vpinsrb $0, (%rax), %xmm0, %xmm0" + emit "vpxor %xmm1, %xmm1, %xmm1" + emit "vpshufb %xmm1, %xmm0, %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load16_splat_mem, macro() - # v128.load16_splat - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .simd_v128_load16_splat_slow_path) - simdLoadSplat16() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load16_splat - load 1 16-bit value and splat to all 8 lanes + simdMemoryOp(2, macro() + if ARM64 or ARM64E + loadh [t0], t1 + emit "dup v16.8h, w1" + elsif X86_64 + # t0 is eax + emit "vpinsrw $0, (%rax), %xmm0, %xmm0" + emit "vpshuflw $0, %xmm0, %xmm0" + emit "vpunpcklqdq %xmm0, %xmm0, %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load32_splat_mem, macro() - # v128.load32_splat - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_load32_splat_slow_path) - simdLoadSplat32() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load32_splat - load 1 32-bit value and splat to all 4 lanes + simdMemoryOp(4, macro() + if ARM64 or ARM64E + loadi [t0], t1 + emit "dup v16.4s, w1" + elsif X86_64 + # Load and broadcast 32-bit value directly from memory to all 4 dwords + # t0 is eax + emit "vbroadcastss (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_load64_splat_mem, macro() - # v128.load64_splat - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load64_splat_slow_path) - simdLoadSplat64() - pushVec(v0) - leap 2[t4], PC - nextIPIntInstruction() + # v128.load64_splat - load 1 64-bit value and splat to all 2 lanes + simdMemoryOp(8, macro() + if ARM64 or ARM64E + loadq [t0], t1 + emit "dup v16.2d, x1" + elsif X86_64 + # Load and broadcast 64-bit value directly from memory to both qwords + # t0 is eax + emit "vmovddup (%rax), %xmm0" + else + break # Not implemented + end + pushVec(v0) + end) end) ipintOp(_simd_v128_store_mem, macro() # v128.store popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 16, t1, t2, .simd_v128_store_slow_path) - storev v0, [t0] - leap 2[t4], PC - nextIPIntInstruction() + simdMemoryOp(16, macro() + storev v0, [t0] + end) end) # 0xFD 0x0C: v128.const ipintOp(_simd_v128_const, macro() # v128.const - loadv [t4], v0 + loadv 2[PC], v0 pushVec(v0) - leap 16[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4308,7 +4275,7 @@ ipintOp(_simd_i8x16_shuffle, macro() if ARM64 or ARM64E popVec(v1) popVec(v0) - loadv [t4], v2 + loadv ImmLaneIdxOffset[PC], v2 emit "tbl v16.16b, {v16.16b, v17.16b}, v18.16b" pushVec(v0) else @@ -4319,7 +4286,7 @@ ipintOp(_simd_i8x16_shuffle, macro() move 0, t0 .shuffleLoop: - loadb [t4, t0, 1], t1 + loadb ImmLaneIdxOffset[PC, t0, 1], t1 bigt t1, 31, .outOfBounds bigt t1, 15, .useRightVector @@ -4350,7 +4317,9 @@ ipintOp(_simd_i8x16_shuffle, macro() addp 2 * V128ISize, sp # Pop temp result and right vector end - leap 16[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4377,7 +4346,9 @@ ipintOp(_simd_i8x16_swizzle, macro() end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4398,7 +4369,9 @@ ipintOp(_simd_i8x16_splat, macro() end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4418,9 +4391,11 @@ ipintOp(_simd_i16x8_splat, macro() end pushVec(v0) - move t4, PC - nextIPIntInstruction() -end) + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) + nextIPIntInstruction() +end) ipintOp(_simd_i32x4_splat, macro() # i32x4.splat - splat i32 value to all 4 32-bit lanes @@ -4437,7 +4412,9 @@ ipintOp(_simd_i32x4_splat, macro() end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4456,7 +4433,9 @@ ipintOp(_simd_i64x2_splat, macro() end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4474,7 +4453,9 @@ ipintOp(_simd_f32x4_splat, macro() end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4492,156 +4473,186 @@ ipintOp(_simd_f64x2_splat, macro() end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) # 0xFD 0x15 - 0xFD 0x22: extract and replace lanes ipintOp(_simd_i8x16_extract_lane_s, macro() # i8x16.extract_lane_s (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx16Mask, t0 loadbsi [sp, t0], t0 addp V128ISize, sp pushInt32(t0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i8x16_extract_lane_u, macro() # i8x16.extract_lane_u (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx16Mask, t0 loadb [sp, t0], t0 addp V128ISize, sp pushInt32(t0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i8x16_replace_lane, macro() # i8x16.replace_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx16Mask, t0 popInt32(t1) # value to replace with storeb t1, [sp, t0] # replace the byte at lane index - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i16x8_extract_lane_s, macro() # i16x8.extract_lane_s (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx8Mask, t0 loadhsi [sp, t0, 2], t0 addp V128ISize, sp pushInt32(t0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i16x8_extract_lane_u, macro() # i16x8.extract_lane_u (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx8Mask, t0 loadh [sp, t0, 2], t0 addp V128ISize, sp pushInt32(t0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i16x8_replace_lane, macro() # i16x8.replace_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx8Mask, t0 popInt32(t1) # value to replace with storeh t1, [sp, t0, 2] # replace the 16-bit value at lane index - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i32x4_extract_lane, macro() # i32x4.extract_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx4Mask, t0 loadi [sp, t0, 4], t0 addp V128ISize, sp pushInt32(t0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i32x4_replace_lane, macro() # i32x4.replace_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx4Mask, t0 popInt32(t1) # value to replace with storei t1, [sp, t0, 4] # replace the 32-bit value at lane index - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i64x2_extract_lane, macro() # i64x2.extract_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx2Mask, t0 loadq [sp, t0, 8], t0 addp V128ISize, sp pushInt64(t0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_i64x2_replace_lane, macro() # i64x2.replace_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx2Mask, t0 popInt64(t1) # value to replace with storeq t1, [sp, t0, 8] # replace the 64-bit value at lane index - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_f32x4_extract_lane, macro() # f32x4.extract_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx4Mask, t0 loadf [sp, t0, 4], ft0 addp V128ISize, sp pushFloat32(ft0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_f32x4_replace_lane, macro() # f32x4.replace_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx4Mask, t0 popFloat32(ft0) # value to replace with storef ft0, [sp, t0, 4] # replace the 32-bit float at lane index - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_f64x2_extract_lane, macro() # f64x2.extract_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx2Mask, t0 loadd [sp, t0, 8], ft0 addp V128ISize, sp pushFloat64(ft0) - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) ipintOp(_simd_f64x2_replace_lane, macro() # f64x2.replace_lane (lane) - loadb ImmLaneIdxOffset[t4], t0 + loadb ImmLaneIdxOffset[PC], t0 andi ImmLaneIdx2Mask, t0 popFloat64(ft0) # value to replace with stored ft0, [sp, t0, 8] # replace the 64-bit float at lane index - leap 1[t4], PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4658,7 +4669,9 @@ ipintOp(_simd_i8x16_eq, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4679,7 +4692,9 @@ ipintOp(_simd_i8x16_ne, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4697,7 +4712,9 @@ ipintOp(_simd_i8x16_lt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4718,7 +4735,9 @@ ipintOp(_simd_i8x16_lt_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4734,7 +4753,9 @@ ipintOp(_simd_i8x16_gt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4754,7 +4775,9 @@ ipintOp(_simd_i8x16_gt_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4774,7 +4797,9 @@ ipintOp(_simd_i8x16_le_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4793,7 +4818,9 @@ ipintOp(_simd_i8x16_le_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4812,7 +4839,9 @@ ipintOp(_simd_i8x16_ge_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4830,7 +4859,9 @@ ipintOp(_simd_i8x16_ge_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4848,7 +4879,9 @@ ipintOp(_simd_i16x8_eq, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4868,7 +4901,9 @@ ipintOp(_simd_i16x8_ne, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4886,7 +4921,9 @@ ipintOp(_simd_i16x8_lt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4907,7 +4944,9 @@ ipintOp(_simd_i16x8_lt_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4923,7 +4962,9 @@ ipintOp(_simd_i16x8_gt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4943,7 +4984,9 @@ ipintOp(_simd_i16x8_gt_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4963,7 +5006,9 @@ ipintOp(_simd_i16x8_le_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -4982,7 +5027,9 @@ ipintOp(_simd_i16x8_le_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5001,7 +5048,9 @@ ipintOp(_simd_i16x8_ge_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5019,7 +5068,9 @@ ipintOp(_simd_i16x8_ge_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5036,7 +5087,9 @@ ipintOp(_simd_i32x4_eq, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5056,7 +5109,9 @@ ipintOp(_simd_i32x4_ne, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5074,7 +5129,9 @@ ipintOp(_simd_i32x4_lt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5095,7 +5152,9 @@ ipintOp(_simd_i32x4_lt_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5111,7 +5170,9 @@ ipintOp(_simd_i32x4_gt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5131,7 +5192,9 @@ ipintOp(_simd_i32x4_gt_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5151,7 +5214,9 @@ ipintOp(_simd_i32x4_le_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5170,7 +5235,9 @@ ipintOp(_simd_i32x4_le_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5189,7 +5256,9 @@ ipintOp(_simd_i32x4_ge_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5207,7 +5276,9 @@ ipintOp(_simd_i32x4_ge_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5224,7 +5295,9 @@ ipintOp(_simd_f32x4_eq, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5241,7 +5314,9 @@ ipintOp(_simd_f32x4_ne, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5258,7 +5333,9 @@ ipintOp(_simd_f32x4_lt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5274,7 +5351,9 @@ ipintOp(_simd_f32x4_gt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5291,7 +5370,9 @@ ipintOp(_simd_f32x4_le, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5307,7 +5388,9 @@ ipintOp(_simd_f32x4_ge, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5324,7 +5407,9 @@ ipintOp(_simd_f64x2_eq, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5341,7 +5426,9 @@ ipintOp(_simd_f64x2_ne, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5358,7 +5445,9 @@ ipintOp(_simd_f64x2_lt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5374,7 +5463,9 @@ ipintOp(_simd_f64x2_gt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5391,7 +5482,9 @@ ipintOp(_simd_f64x2_le, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5407,7 +5500,9 @@ ipintOp(_simd_f64x2_ge, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5425,7 +5520,9 @@ ipintOp(_simd_v128_not, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5441,7 +5538,9 @@ ipintOp(_simd_v128_and, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5457,7 +5556,9 @@ ipintOp(_simd_v128_andnot, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5473,7 +5574,9 @@ ipintOp(_simd_v128_or, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5489,7 +5592,9 @@ ipintOp(_simd_v128_xor, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5514,7 +5619,9 @@ ipintOp(_simd_v128_bitselect, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5537,150 +5644,290 @@ ipintOp(_simd_v128_any_true, macro() break # Not implemented end pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) # 0xFD 0x54 - 0xFD 0x5D: v128 load/store lane -# For load_lane: stack is [v128, i32_addr]. Pop addr, do memarg, load from memory, -# read lane index, replace lane in the v128 still on stack. -# For store_lane: stack is [v128, i32_addr]. Pop addr, do memarg, read lane index, -# extract value from v128 on stack, pop v128, store to memory. -# Lane index is the last byte of the instruction, right after the memarg. + +# If simd ops used memoryOpAdvanceMCAndMakePointer the macro would read +# memory index and advance MC and then the handler would read the constant +# and advance MC, so there is a performance optimization here to only +# advance MC once + +macro ipintCheckMemoryBoundAndMakePointer(whichMemory, mem, scratch, size) + # overwrites mem with computed pointer + btiz whichMemory, .checkBounds + # overwrites whichMemory + mulp (constexpr (sizeof(JSWebAssemblyInstance::WasmMemoryBaseAndSize))), whichMemory + addp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0))), whichMemory + addp wasmInstance, whichMemory + loadp [whichMemory], memoryBase + loadp (constexpr (sizeof(void*)))[whichMemory], boundsCheckingSize + move 1, whichMemory # restore base and size registers afterward if using nonzero memory +.checkBounds: + # Memory indices are 32 bit + leap size - 1[mem], scratch + bpb scratch, boundsCheckingSize, .continuation + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess) +.continuation: + addp memoryBase, mem + btiz whichMemory, .done + loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0))) [wasmInstance], memoryBase + loadp (constexpr (JSWebAssemblyInstance::offsetOfCachedMemoryBaseSizePair(0) + sizeof(void*))) [wasmInstance], boundsCheckingSize +.done: +end ipintOp(_simd_v128_load8_lane_mem, macro() + # v128.load8_lane - load 8-bit value from memory and replace lane in existing vector + popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .simd_v128_load8_lane_slow_path) + popMemoryIndex(t0, t2) + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 1) loadb [t0], t0 - loadb 2[t4], t1 + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1 + advancePCByReg(t1) + loadb -1[PC], t1 andi ImmLaneIdx16Mask, t1 + + # Push the result and then replace one lane of the result with the loaded value pushVec(v0) storeb t0, [sp, t1] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_load16_lane_mem, macro() + # v128.load16_lane - load 16-bit value from memory and replace lane in existing vector + popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .simd_v128_load16_lane_slow_path) + popMemoryIndex(t0, t2) + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 2) loadh [t0], t0 - loadb 2[t4], t1 + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1 + advancePCByReg(t1) + loadb -1[PC], t1 andi ImmLaneIdx8Mask, t1 + + # Push the result and then replace one lane of the result with the loaded value pushVec(v0) storeh t0, [sp, t1, 2] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_load32_lane_mem, macro() + # v128.load32_lane - load 32-bit value from memory and replace lane in existing vector + popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_load32_lane_slow_path) + popMemoryIndex(t0, t2) + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 4) loadi [t0], t0 - loadb 2[t4], t1 + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1 + advancePCByReg(t1) + loadb -1[PC], t1 andi ImmLaneIdx4Mask, t1 + + # Push the result and then replace one lane of the result with the loaded value pushVec(v0) storei t0, [sp, t1, 4] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_load64_lane_mem, macro() + # v128.load64_lane - load 64-bit value from memory and replace lane in existing vector + popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load64_lane_slow_path) + popMemoryIndex(t0, t2) + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 8) loadq [t0], t0 - loadb 2[t4], t1 + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t1 + advancePCByReg(t1) + loadb -1[PC], t1 andi ImmLaneIdx2Mask, t1 + + # Push the result and then replace one lane of the result with the loaded value pushVec(v0) storeq t0, [sp, t1, 8] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_store8_lane_mem, macro() - # Stack: [addr, v128] with v128 on top. Pop both, parse memarg, extract lane, store. - popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .simd_v128_store8_lane_slow_path) - loadb 2[t4], t1 + # v128.store8_lane - extract 8-bit value from lane and store to memory + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + loadb -1[PC], t1 andi ImmLaneIdx16Mask, t1 - # Extract byte from v0 via temp push - pushVec(v0) - loadb [sp, t1], t1 - addp V128ISize, sp + + loadb [sp, t1], t1 # Load value from lane in vector on stack + addp V128ISize, sp # Pop the vector + + popMemoryIndex(t0, t2) + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 1) + storeb t1, [t0] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_store16_lane_mem, macro() - popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .simd_v128_store16_lane_slow_path) - loadb 2[t4], t1 + # v128.store16_lane - extract 16-bit value from lane and store to memory + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + loadb -1[PC], t1 andi ImmLaneIdx8Mask, t1 - pushVec(v0) - loadh [sp, t1, 2], t1 - addp V128ISize, sp - storeh t1, [t0] - leap 3[t4], PC - nextIPIntInstruction() -end) -ipintOp(_simd_v128_store32_lane_mem, macro() - popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_store32_lane_slow_path) - loadb 2[t4], t1 + loadh [sp, t1, 2], t1 # Load value from lane in vector on stack + addp V128ISize, sp # Pop the vector + + popMemoryIndex(t0, t2) + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 2) + + storeh t1, [t0] + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) + nextIPIntInstruction() +end) + +ipintOp(_simd_v128_store32_lane_mem, macro() + # v128.store32_lane - extract 32-bit value from lane and store to memory + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + loadb -1[PC], t1 andi ImmLaneIdx4Mask, t1 - pushVec(v0) - loadi [sp, t1, 4], t1 - addp V128ISize, sp + + loadi [sp, t1, 4], t1 # Load value from lane in vector on stack + addp V128ISize, sp # Pop the vector + + popMemoryIndex(t0, t2) + + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 4) + storei t1, [t0] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_store64_lane_mem, macro() - popVec(v0) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_store64_lane_slow_path) - loadb 2[t4], t1 + # v128.store64_lane - extract 64-bit value from lane and store to memory + + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t3 + const memoryIndexSize = sizeof IPInt::MemoryIndexMetadata + + # The lane index comes after the variable length memory offset, so find it by + # advancing the PC and loading the byte before the next instruction. + loadb memoryIndexSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + loadb -1[PC], t1 andi ImmLaneIdx2Mask, t1 - pushVec(v0) - loadq [sp, t1, 8], t1 - addp V128ISize, sp + + loadq [sp, t1, 8], t1 # Load value from lane in vector on stack + addp V128ISize, sp # Pop the vector + + popMemoryIndex(t0, t2) + loadi memoryIndexSize + IPInt::Const32Metadata::value[MC], t2 + addp t2, t0 + ipintCheckMemoryBoundAndMakePointer(t3, t0, t2, 8) + storeq t1, [t0] - leap 3[t4], PC + + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) nextIPIntInstruction() end) ipintOp(_simd_v128_load32_zero_mem, macro() # v128.load32_zero - load 32-bit value from memory and zero-pad to 128 bits - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .simd_v128_load32_zero_slow_path) - loadi [t0], t0 - subp V128ISize, sp - storei t0, [sp] - storei 0, 4[sp] - storeq 0, 8[sp] - leap 2[t4], PC - nextIPIntInstruction() + simdMemoryOp(4, macro() + loadi [t0], t0 + + subp V128ISize, sp + storei t0, [sp] + storei 0, 4[sp] + storeq 0, 8[sp] + end) end) ipintOp(_simd_v128_load64_zero_mem, macro() # v128.load64_zero - load 64-bit value from memory and zero-pad to 128 bits - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .simd_v128_load64_zero_slow_path) - loadq [t0], t0 - subp V128ISize, sp - storeq t0, [sp] - storeq 0, 8[sp] - leap 2[t4], PC - nextIPIntInstruction() + simdMemoryOp(8, macro() + loadq [t0], t0 + + subp V128ISize, sp + storeq t0, [sp] + storeq 0, 8[sp] + end) end) # 0xFD 0x5E - 0xFD 0x5F: f32x4/f64x2 conversion @@ -5699,7 +5946,9 @@ ipintOp(_simd_f32x4_demote_f64x2_zero, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5714,7 +5963,9 @@ ipintOp(_simd_f64x2_promote_low_f32x4, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5731,7 +5982,9 @@ ipintOp(_simd_i8x16_abs, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5748,7 +6001,9 @@ ipintOp(_simd_i8x16_neg, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5790,7 +6045,9 @@ ipintOp(_simd_i8x16_popcnt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5815,7 +6072,9 @@ ipintOp(_simd_i8x16_all_true, macro() break # Not implemented end pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5843,7 +6102,9 @@ ipintOp(_simd_i8x16_bitmask, macro() addp V128ISize, sp # Pop the vector pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5861,7 +6122,9 @@ ipintOp(_simd_i8x16_narrow_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5879,7 +6142,9 @@ ipintOp(_simd_i8x16_narrow_i16x8_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5896,7 +6161,9 @@ ipintOp(_simd_f32x4_ceil, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5911,7 +6178,9 @@ ipintOp(_simd_f32x4_floor, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5926,7 +6195,9 @@ ipintOp(_simd_f32x4_trunc, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5941,7 +6212,9 @@ ipintOp(_simd_f32x4_nearest, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -5989,7 +6262,9 @@ ipintOp(_simd_i8x16_shl, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6031,7 +6306,9 @@ ipintOp(_simd_i8x16_shr_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6073,7 +6350,9 @@ ipintOp(_simd_i8x16_shr_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6089,7 +6368,9 @@ ipintOp(_simd_i8x16_add, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6105,7 +6386,9 @@ ipintOp(_simd_i8x16_add_sat_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6121,7 +6404,9 @@ ipintOp(_simd_i8x16_add_sat_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6137,7 +6422,9 @@ ipintOp(_simd_i8x16_sub, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6153,7 +6440,9 @@ ipintOp(_simd_i8x16_sub_sat_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6169,7 +6458,9 @@ ipintOp(_simd_i8x16_sub_sat_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6186,7 +6477,9 @@ ipintOp(_simd_f64x2_ceil, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6201,7 +6494,9 @@ ipintOp(_simd_f64x2_floor, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6218,7 +6513,9 @@ ipintOp(_simd_i8x16_min_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6234,7 +6531,9 @@ ipintOp(_simd_i8x16_min_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6250,7 +6549,9 @@ ipintOp(_simd_i8x16_max_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6266,7 +6567,9 @@ ipintOp(_simd_i8x16_max_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6283,7 +6586,9 @@ ipintOp(_simd_f64x2_trunc, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6301,7 +6606,9 @@ ipintOp(_simd_i8x16_avgr_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6321,7 +6628,9 @@ ipintOp(_simd_i16x8_extadd_pairwise_i8x16_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6339,7 +6648,9 @@ ipintOp(_simd_i16x8_extadd_pairwise_i8x16_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6357,7 +6668,9 @@ ipintOp(_simd_i32x4_extadd_pairwise_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6374,7 +6687,9 @@ ipintOp(_simd_i32x4_extadd_pairwise_i16x8_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6391,7 +6706,9 @@ ipintOp(_simd_i16x8_abs, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6408,7 +6725,9 @@ ipintOp(_simd_i16x8_neg, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6432,7 +6751,9 @@ ipintOp(_simd_i16x8_q15mulr_sat_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6459,7 +6780,9 @@ ipintOp(_simd_i16x8_all_true, macro() break # Not implemented end pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6487,7 +6810,9 @@ ipintOp(_simd_i16x8_bitmask, macro() addp V128ISize, sp # Pop the vector pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6505,7 +6830,9 @@ ipintOp(_simd_i16x8_narrow_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6523,7 +6850,9 @@ ipintOp(_simd_i16x8_narrow_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6538,7 +6867,9 @@ ipintOp(_simd_i16x8_extend_low_i8x16_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6555,7 +6886,9 @@ ipintOp(_simd_i16x8_extend_high_i8x16_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6570,7 +6903,9 @@ ipintOp(_simd_i16x8_extend_low_i8x16_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6587,7 +6922,9 @@ ipintOp(_simd_i16x8_extend_high_i8x16_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6612,7 +6949,9 @@ ipintOp(_simd_i16x8_shl, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6639,7 +6978,9 @@ ipintOp(_simd_i16x8_shr_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6664,7 +7005,9 @@ ipintOp(_simd_i16x8_shr_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6680,7 +7023,9 @@ ipintOp(_simd_i16x8_add, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6696,7 +7041,9 @@ ipintOp(_simd_i16x8_add_sat_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6712,7 +7059,9 @@ ipintOp(_simd_i16x8_add_sat_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6728,7 +7077,9 @@ ipintOp(_simd_i16x8_sub, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6744,7 +7095,9 @@ ipintOp(_simd_i16x8_sub_sat_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6760,7 +7113,9 @@ ipintOp(_simd_i16x8_sub_sat_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6777,7 +7132,9 @@ ipintOp(_simd_f64x2_nearest, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6795,7 +7152,9 @@ ipintOp(_simd_i16x8_mul, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6811,7 +7170,9 @@ ipintOp(_simd_i16x8_min_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6827,7 +7188,9 @@ ipintOp(_simd_i16x8_min_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6843,7 +7206,9 @@ ipintOp(_simd_i16x8_max_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6859,12 +7224,13 @@ ipintOp(_simd_i16x8_max_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) reservedOpcode(0xfd9a01) - ipintOp(_simd_i16x8_avgr_u, macro() # i16x8.avgr_u - average of 8 16-bit unsigned integers with rounding popVec(v1) @@ -6877,7 +7243,9 @@ ipintOp(_simd_i16x8_avgr_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6896,7 +7264,9 @@ ipintOp(_simd_i16x8_extmul_low_i8x16_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6917,7 +7287,9 @@ ipintOp(_simd_i16x8_extmul_high_i8x16_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6936,7 +7308,9 @@ ipintOp(_simd_i16x8_extmul_low_i8x16_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6956,7 +7330,9 @@ ipintOp(_simd_i16x8_extmul_high_i8x16_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6973,7 +7349,9 @@ ipintOp(_simd_i32x4_abs, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -6990,7 +7368,9 @@ ipintOp(_simd_i32x4_neg, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7019,7 +7399,9 @@ ipintOp(_simd_i32x4_all_true, macro() break # Not implemented end pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7047,7 +7429,9 @@ ipintOp(_simd_i32x4_bitmask, macro() addp V128ISize, sp # Pop the vector pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7065,7 +7449,9 @@ ipintOp(_simd_i32x4_extend_low_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7082,7 +7468,9 @@ ipintOp(_simd_i32x4_extend_high_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7097,7 +7485,9 @@ ipintOp(_simd_i32x4_extend_low_i16x8_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7114,7 +7504,9 @@ ipintOp(_simd_i32x4_extend_high_i16x8_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7137,7 +7529,9 @@ ipintOp(_simd_i32x4_shl, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7162,7 +7556,9 @@ ipintOp(_simd_i32x4_shr_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7187,7 +7583,9 @@ ipintOp(_simd_i32x4_shr_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7203,7 +7601,9 @@ ipintOp(_simd_i32x4_add, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7222,7 +7622,9 @@ ipintOp(_simd_i32x4_sub, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7242,7 +7644,9 @@ ipintOp(_simd_i32x4_mul, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7258,7 +7662,9 @@ ipintOp(_simd_i32x4_min_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7274,7 +7680,9 @@ ipintOp(_simd_i32x4_min_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7290,7 +7698,9 @@ ipintOp(_simd_i32x4_max_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7306,7 +7716,9 @@ ipintOp(_simd_i32x4_max_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7327,7 +7739,9 @@ ipintOp(_simd_i32x4_dot_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) reservedOpcode(0xfdbb01) @@ -7347,7 +7761,9 @@ ipintOp(_simd_i32x4_extmul_low_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7366,7 +7782,9 @@ ipintOp(_simd_i32x4_extmul_high_i16x8_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7385,7 +7803,9 @@ ipintOp(_simd_i32x4_extmul_low_i16x8_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7404,7 +7824,9 @@ ipintOp(_simd_i32x4_extmul_high_i16x8_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7426,7 +7848,9 @@ ipintOp(_simd_i64x2_abs, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7443,7 +7867,9 @@ ipintOp(_simd_i64x2_neg, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7472,7 +7898,9 @@ ipintOp(_simd_i64x2_all_true, macro() break # Not implemented end pushInt32(t0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7502,7 +7930,9 @@ ipintOp(_simd_i64x2_bitmask, macro() .bitmask_i64x2_done: pushInt32(t2) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7520,7 +7950,9 @@ ipintOp(_simd_i64x2_extend_low_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7537,7 +7969,9 @@ ipintOp(_simd_i64x2_extend_high_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7552,7 +7986,9 @@ ipintOp(_simd_i64x2_extend_low_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7569,7 +8005,9 @@ ipintOp(_simd_i64x2_extend_high_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7592,7 +8030,9 @@ ipintOp(_simd_i64x2_shl, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7610,7 +8050,9 @@ ipintOp(_simd_i64x2_shr_s, macro() rshiftq t0, t1 storeq t1, [sp] - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7635,7 +8077,9 @@ ipintOp(_simd_i64x2_shr_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7651,7 +8095,9 @@ ipintOp(_simd_i64x2_add, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7670,7 +8116,9 @@ ipintOp(_simd_i64x2_sub, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7695,7 +8143,9 @@ ipintOp(_simd_i64x2_mul, macro() # Pop vector1, result in vector0 addp V128ISize, sp # Remove first vector from stack, leaving result - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7711,7 +8161,9 @@ ipintOp(_simd_i64x2_eq, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7731,7 +8183,9 @@ ipintOp(_simd_i64x2_ne, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7749,7 +8203,9 @@ ipintOp(_simd_i64x2_lt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7765,7 +8221,9 @@ ipintOp(_simd_i64x2_gt_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7785,7 +8243,9 @@ ipintOp(_simd_i64x2_le_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7804,7 +8264,9 @@ ipintOp(_simd_i64x2_ge_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7823,7 +8285,9 @@ ipintOp(_simd_i64x2_extmul_low_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7842,7 +8306,9 @@ ipintOp(_simd_i64x2_extmul_high_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7861,7 +8327,9 @@ ipintOp(_simd_i64x2_extmul_low_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7880,7 +8348,9 @@ ipintOp(_simd_i64x2_extmul_high_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7901,7 +8371,9 @@ ipintOp(_simd_f32x4_abs, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7920,7 +8392,9 @@ ipintOp(_simd_f32x4_neg, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7937,7 +8411,9 @@ ipintOp(_simd_f32x4_sqrt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7953,7 +8429,9 @@ ipintOp(_simd_f32x4_add, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7969,7 +8447,9 @@ ipintOp(_simd_f32x4_sub, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -7985,7 +8465,9 @@ ipintOp(_simd_f32x4_mul, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8001,7 +8483,9 @@ ipintOp(_simd_f32x4_div, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8030,7 +8514,9 @@ ipintOp(_simd_f32x4_min, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8064,7 +8550,9 @@ ipintOp(_simd_f32x4_max, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8084,7 +8572,9 @@ ipintOp(_simd_f32x4_pmin, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8104,7 +8594,9 @@ ipintOp(_simd_f32x4_pmax, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8125,7 +8617,9 @@ ipintOp(_simd_f64x2_abs, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8144,7 +8638,9 @@ ipintOp(_simd_f64x2_neg, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8161,7 +8657,9 @@ ipintOp(_simd_f64x2_sqrt, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8177,7 +8675,9 @@ ipintOp(_simd_f64x2_add, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8193,7 +8693,9 @@ ipintOp(_simd_f64x2_sub, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8209,7 +8711,9 @@ ipintOp(_simd_f64x2_mul, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8225,7 +8729,9 @@ ipintOp(_simd_f64x2_div, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8254,7 +8760,9 @@ ipintOp(_simd_f64x2_min, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8288,7 +8796,9 @@ ipintOp(_simd_f64x2_max, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8308,7 +8818,9 @@ ipintOp(_simd_f64x2_pmin, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8328,7 +8840,9 @@ ipintOp(_simd_f64x2_pmax, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8357,7 +8871,9 @@ ipintOp(_simd_i32x4_trunc_sat_f32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8391,7 +8907,9 @@ ipintOp(_simd_i32x4_trunc_sat_f32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8406,7 +8924,9 @@ ipintOp(_simd_f32x4_convert_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8429,7 +8949,9 @@ ipintOp(_simd_f32x4_convert_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8458,7 +8980,9 @@ ipintOp(_simd_i32x4_trunc_sat_f64x2_s_zero, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8493,7 +9017,9 @@ ipintOp(_simd_i32x4_trunc_sat_f64x2_u_zero, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8510,7 +9036,9 @@ ipintOp(_simd_f64x2_convert_low_i32x4_s, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8542,7 +9070,9 @@ ipintOp(_simd_f64x2_convert_low_i32x4_u, macro() break # Not implemented end pushVec(v0) - move t4, PC + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) nextIPIntInstruction() end) @@ -8565,2785 +9095,1842 @@ macro checkAlignment8(mem, label) btpnz mem, 7, label end -macro weakCASLoopByte(mem, value, scratch1AndOldValue, scratch2, fn) - validateOpcodeConfig(scratch1AndOldValue) - if X86_64 - loadb [mem], scratch1AndOldValue - .loop: - move scratch1AndOldValue, scratch2 - fn(value, scratch2) - batomicweakcasb scratch1AndOldValue, scratch2, [mem], .loop - else - .loop: - loadlinkacqb [mem], scratch1AndOldValue - fn(value, scratch1AndOldValue, scratch2) - storecondrelb ws2, scratch2, [mem] - bineq ws2, 0, .loop - end -end +ipintAtomicOp(_memory_atomic_notify, macro() + # starting at sp: count, pointer + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0 + pushInt32(t0) + const miMetaSize = sizeof IPInt::MemoryIndexMetadata + loadi miMetaSize + IPInt::Const32Metadata::value[MC], t0 + pushInt32(t0) # offset -macro weakCASLoopHalf(mem, value, scratch1AndOldValue, scratch2, fn) - validateOpcodeConfig(scratch1AndOldValue) - if X86_64 - loadh [mem], scratch1AndOldValue - .loop: - move scratch1AndOldValue, scratch2 - fn(value, scratch2) - batomicweakcash scratch1AndOldValue, scratch2, [mem], .loop - else - .loop: - loadlinkacqh [mem], scratch1AndOldValue - fn(value, scratch1AndOldValue, scratch2) - storecondrelh ws2, scratch2, [mem] - bineq ws2, 0, .loop - end -end + move sp, a1 -macro weakCASLoopInt(mem, value, scratch1AndOldValue, scratch2, fn) - validateOpcodeConfig(scratch1AndOldValue) - if X86_64 - loadi [mem], scratch1AndOldValue - .loop: - move scratch1AndOldValue, scratch2 - fn(value, scratch2) - batomicweakcasi scratch1AndOldValue, scratch2, [mem], .loop - else - .loop: - loadlinkacqi [mem], scratch1AndOldValue - fn(value, scratch1AndOldValue, scratch2) - storecondreli ws2, scratch2, [mem] - bineq ws2, 0, .loop - end -end + operationCall(macro() cCall2(_ipint_extern_memory_atomic_notify) end) + bilt r0, 0, .atomic_notify_throw -macro weakCASLoopQuad(mem, value, scratch1AndOldValue, scratch2, fn) - validateOpcodeConfig(scratch1AndOldValue) - if X86_64 - loadq [mem], scratch1AndOldValue - .loop: - move scratch1AndOldValue, scratch2 - fn(value, scratch2) - batomicweakcasq scratch1AndOldValue, scratch2, [mem], .loop + addq (StackValueSize * 4), sp + + pushInt32(r0) + loadb miMetaSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) + nextIPIntInstruction() + +.atomic_notify_throw: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess) +end) + +ipintAtomicOp(_memory_atomic_wait32, macro() + # starting at sp: timeout, value, pointer + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0 + pushInt32(t0) + loadq (StackValueSize * 3)[sp], t0 + const miMetaSize = sizeof IPInt::MemoryIndexMetadata + loadi miMetaSize + IPInt::Const32Metadata::value[MC], t1 + addq t1, t0 + storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset + + move sp, a1 + + operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait32) end) + bilt r0, 0, .atomic_wait32_throw + + addq (StackValueSize * 4), sp + + pushInt32(r0) + loadb miMetaSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) + nextIPIntInstruction() + +.atomic_wait32_throw: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess) +end) + +ipintAtomicOp(_memory_atomic_wait64, macro() + # starting at sp: timeout, value, pointer + loadb IPInt::MemoryIndexMetadata::memoryIndex[MC], t0 + pushInt32(t0) + loadq (StackValueSize * 3)[sp], t0 + const miMetaSize = sizeof IPInt::MemoryIndexMetadata + loadi miMetaSize + IPInt::Const32Metadata::value[MC], t1 + addq t1, t0 + storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset + + move sp, a1 + + operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait64) end) + bilt r0, 0, .atomic_wait64_throw + + addq (StackValueSize * 4), sp + + pushInt32(r0) + loadb miMetaSize + IPInt::Const32Metadata::instructionLength[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::MemoryIndexMetadata) + sizeof(IPInt::Const32Metadata))) + nextIPIntInstruction() + +.atomic_wait64_throw: + handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess) +end) + +ipintAtomicOp(_atomic_fence, macro() + fence + + loadb IPInt::InstructionLengthMetadata::length[MC], t0 + advancePCByReg(t0) + advanceMC(constexpr (sizeof(IPInt::InstructionLengthMetadata))) + nextIPIntInstruction() +end) + +reservedAtomicOpcode(atomic_0x4) +reservedAtomicOpcode(atomic_0x5) +reservedAtomicOpcode(atomic_0x6) +reservedAtomicOpcode(atomic_0x7) +reservedAtomicOpcode(atomic_0x8) +reservedAtomicOpcode(atomic_0x9) +reservedAtomicOpcode(atomic_0xa) +reservedAtomicOpcode(atomic_0xb) +reservedAtomicOpcode(atomic_0xc) +reservedAtomicOpcode(atomic_0xd) +reservedAtomicOpcode(atomic_0xe) +reservedAtomicOpcode(atomic_0xf) + +ipintAtomicOp(_i32_atomic_load, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + if ARM64 or ARM64E or X86_64 + atomicloadi [t0], t2 else - .loop: - loadlinkacqq [mem], scratch1AndOldValue - fn(value, scratch1AndOldValue, scratch2) - storecondrelq ws2, scratch2, [mem] - bineq ws2, 0, .loop + error end -end + pushInt32(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicLoad(mem, dst) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) +ipintAtomicOp(_i64_atomic_load, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) if ARM64 or ARM64E or X86_64 - atomicloadi [mem], dst + atomicloadq [t0], t2 else error end -end + pushInt64(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicLoad(mem, dst) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) +ipintAtomicOp(_i32_atomic_load8_u, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) if ARM64 or ARM64E or X86_64 - atomicloadq [mem], dst + atomicloadb [t0], t2 else error end -end + pushInt32(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicLoad8(mem, dst) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) +ipintAtomicOp(_i32_atomic_load16_u, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) if ARM64 or ARM64E or X86_64 - atomicloadb [mem], dst + atomicloadh [t0], t2 else error end -end + pushInt32(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicLoad16(mem, dst) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) +ipintAtomicOp(_i64_atomic_load8_u, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) if ARM64 or ARM64E or X86_64 - atomicloadh [mem], dst + atomicloadb [t0], t2 else error end -end + pushInt64(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicLoad8(mem, dst) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) +ipintAtomicOp(_i64_atomic_load16_u, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) if ARM64 or ARM64E or X86_64 - atomicloadb [mem], dst + atomicloadh [t0], t2 else error end -end + pushInt64(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicLoad16(mem, dst) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) +ipintAtomicOp(_i64_atomic_load32_u, macro() + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) if ARM64 or ARM64E or X86_64 - atomicloadh [mem], dst + atomicloadi [t0], t2 else error end + pushInt64(t2) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) + +macro weakCASLoopByte(mem, value, scratch1AndOldValue, scratch2, fn) + validateOpcodeConfig(scratch1AndOldValue) + if X86_64 + loadb [mem], scratch1AndOldValue + .loop: + move scratch1AndOldValue, scratch2 + fn(value, scratch2) + batomicweakcasb scratch1AndOldValue, scratch2, [mem], .loop + else + .loop: + loadlinkacqb [mem], scratch1AndOldValue + fn(value, scratch1AndOldValue, scratch2) + storecondrelb ws2, scratch2, [mem] + bineq ws2, 0, .loop + end end -macro doI64AtomicLoad32(mem, dst) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - if ARM64 or ARM64E or X86_64 - atomicloadi [mem], dst +macro weakCASLoopHalf(mem, value, scratch1AndOldValue, scratch2, fn) + validateOpcodeConfig(scratch1AndOldValue) + if X86_64 + loadh [mem], scratch1AndOldValue + .loop: + move scratch1AndOldValue, scratch2 + fn(value, scratch2) + batomicweakcash scratch1AndOldValue, scratch2, [mem], .loop else - error + .loop: + loadlinkacqh [mem], scratch1AndOldValue + fn(value, scratch1AndOldValue, scratch2) + storecondrelh ws2, scratch2, [mem] + bineq ws2, 0, .loop end end -macro doI32AtomicStore(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - if ARM64E - atomicxchgi val, [memCopy], val - elsif X86_64 - atomicxchgi val, [memCopy] - elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) - move value, newValue - end) +macro weakCASLoopInt(mem, value, scratch1AndOldValue, scratch2, fn) + validateOpcodeConfig(scratch1AndOldValue) + if X86_64 + loadi [mem], scratch1AndOldValue + .loop: + move scratch1AndOldValue, scratch2 + fn(value, scratch2) + batomicweakcasi scratch1AndOldValue, scratch2, [mem], .loop else - error + .loop: + loadlinkacqi [mem], scratch1AndOldValue + fn(value, scratch1AndOldValue, scratch2) + storecondreli ws2, scratch2, [mem] + bineq ws2, 0, .loop + end +end + +macro weakCASLoopQuad(mem, value, scratch1AndOldValue, scratch2, fn) + validateOpcodeConfig(scratch1AndOldValue) + if X86_64 + loadq [mem], scratch1AndOldValue + .loop: + move scratch1AndOldValue, scratch2 + fn(value, scratch2) + batomicweakcasq scratch1AndOldValue, scratch2, [mem], .loop + else + .loop: + loadlinkacqq [mem], scratch1AndOldValue + fn(value, scratch1AndOldValue, scratch2) + storecondrelq ws2, scratch2, [mem] + bineq ws2, 0, .loop end end -macro doI64AtomicStore(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy + +ipintAtomicOp(_i32_atomic_store, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgq val, [memCopy], val + atomicxchgi t3, [t2], t3 elsif X86_64 - atomicxchgq val, [memCopy] + atomicxchgi t3, [t2] elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicStore8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_store, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgb val, [memCopy], val + atomicxchgq t3, [t2], t3 elsif X86_64 - atomicxchgb val, [memCopy] + atomicxchgq t3, [t2] elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicStore16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_store8_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgh val, [memCopy], val + atomicxchgb t3, [t2], t3 elsif X86_64 - atomicxchgh val, [memCopy] + atomicxchgb t3, [t2] elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicStore8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_store16_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgb val, [memCopy], val + atomicxchgh t3, [t2], t3 elsif X86_64 - atomicxchgb val, [memCopy] + atomicxchgh t3, [t2] elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicStore16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_store8_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgh val, [memCopy], val + atomicxchgb t3, [t2], t3 elsif X86_64 - atomicxchgh val, [memCopy] + atomicxchgb t3, [t2] elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicStore32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_store16_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgi val, [memCopy], val + atomicxchgh t3, [t2], t3 elsif X86_64 - atomicxchgi val, [memCopy] + atomicxchgh t3, [t2] elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwAdd(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_store32_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddi val, [memCopy], mem + atomicxchgi t3, [t2], t3 elsif X86_64 - atomicxchgaddi val, [memCopy] - move val, mem + atomicxchgi t3, [t2] elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) - addi value, oldValue, newValue + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) + move value, newValue end) else error end -end + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAdd(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw_add, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddq val, [memCopy], mem + atomicxchgaddi t3, [t2], t0 elsif X86_64 - atomicxchgaddq val, [memCopy] - move val, mem + atomicxchgaddi t3, [t2] + move t3, t0 elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) - addq value, oldValue, newValue + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) + addi value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) + +ipintAtomicOp(_i64_atomic_rmw_add, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 + if ARM64E + atomicxchgaddq t3, [t2], t0 + elsif X86_64 + atomicxchgaddq t3, [t2] + move t3, t0 + elsif ARM64 + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) + addq value, oldValue, newValue + end) + else + error + end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwAdd8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw8_add_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddb val, [memCopy], mem + atomicxchgaddb t3, [t2], t0 elsif X86_64 - atomicxchgaddb val, [memCopy] - move val, mem - andi 0xff, mem + atomicxchgaddb t3, [t2] + move t3, t0 + andi 0xff, t0 elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) addi value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwAdd16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw16_add_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddh val, [memCopy], mem + atomicxchgaddh t3, [t2], t0 elsif X86_64 - atomicxchgaddh val, [memCopy] - move val, mem - andi 0xffff, mem + atomicxchgaddh t3, [t2] + move t3, t0 + andi 0xffff, t0 elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) addi value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAdd8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw8_add_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddb val, [memCopy], mem + atomicxchgaddb t3, [t2], t0 elsif X86_64 - atomicxchgaddb val, [memCopy] - move val, mem - andi 0xff, mem + atomicxchgaddb t3, [t2] + move t3, t0 + andi 0xff, t0 elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) addi value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAdd16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw16_add_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddh val, [memCopy], mem + atomicxchgaddh t3, [t2], t0 elsif X86_64 - atomicxchgaddh val, [memCopy] - move val, mem - andi 0xffff, mem + atomicxchgaddh t3, [t2] + move t3, t0 + andi 0xffff, t0 elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) addi value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAdd32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw32_add_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgaddi val, [memCopy], mem + atomicxchgaddi t3, [t2], t0 elsif X86_64 - atomicxchgaddi val, [memCopy] - move val, mem - ori 0, mem + atomicxchgaddi t3, [t2] + move t3, t0 + ori 0, t0 elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) addi value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwSub(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw_sub, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - negi val - atomicxchgaddi val, [memCopy], mem + negi t3 + atomicxchgaddi t3, [t2], t0 elsif X86_64 - negi val - atomicxchgaddi val, [memCopy] - move val, mem + negi t3 + atomicxchgaddi t3, [t2] + move t3, t0 elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) subi oldValue, value, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwSub(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw_sub, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 if ARM64E - negq val - atomicxchgaddq val, [memCopy], mem + negq t3 + atomicxchgaddq t3, [t2], t0 elsif X86_64 - negq val - atomicxchgaddq val, [memCopy] - move val, mem + negq t3 + atomicxchgaddq t3, [t2] + move t3, t0 elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) subq oldValue, value, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwSub8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw8_sub_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - negi val - atomicxchgaddb val, [memCopy], mem + negi t3 + atomicxchgaddb t3, [t2], t0 elsif X86_64 - negi val - atomicxchgaddb val, [memCopy] - move val, mem - andi 0xff, mem + negi t3 + atomicxchgaddb t3, [t2] + move t3, t0 + andi 0xff, t0 elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) subi oldValue, value, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwSub16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw16_sub_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - negi val - atomicxchgaddh val, [memCopy], mem + negi t3 + atomicxchgaddh t3, [t2], t0 elsif X86_64 - negi val - atomicxchgaddh val, [memCopy] - move val, mem - andi 0xffff, mem + negi t3 + atomicxchgaddh t3, [t2] + move t3, t0 + andi 0xffff, t0 elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) subi oldValue, value, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwSub8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw8_sub_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - negq val - atomicxchgaddb val, [memCopy], mem + negq t3 + atomicxchgaddb t3, [t2], t0 elsif X86_64 - negq val - atomicxchgaddb val, [memCopy] - move val, mem - andi 0xff, mem + negq t3 + atomicxchgaddb t3, [t2] + move t3, t0 + andi 0xff, t0 elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) subi oldValue, value, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwSub16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw16_sub_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - negq val - atomicxchgaddh val, [memCopy], mem + negq t3 + atomicxchgaddh t3, [t2], t0 elsif X86_64 - negq val - atomicxchgaddh val, [memCopy] - move val, mem - andi 0xffff, mem + negq t3 + atomicxchgaddh t3, [t2] + move t3, t0 + andi 0xffff, t0 elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) subi oldValue, value, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwSub32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw32_sub_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - negq val - atomicxchgaddi val, [memCopy], mem + negq t3 + atomicxchgaddi t3, [t2], t0 elsif X86_64 - negq val - atomicxchgaddi val, [memCopy] - move val, mem - ori 0, mem + negq t3 + atomicxchgaddi t3, [t2] + move t3, t0 + ori 0, t0 elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) subi oldValue, value, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwAnd(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw_and, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - noti val - atomicxchgcleari val, [memCopy], mem + noti t3 + atomicxchgcleari t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) andi value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAnd(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw_and, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 if ARM64E - notq val - atomicxchgclearq val, [memCopy], mem + notq t3 + atomicxchgclearq t3, [t2], t0 elsif X86_64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) andq value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwAnd8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw8_and_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - noti val - atomicxchgclearb val, [memCopy], mem + noti t3 + atomicxchgclearb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) andi value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwAnd16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw16_and_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - noti val - atomicxchgclearh val, [memCopy], mem + noti t3 + atomicxchgclearh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) andi value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAnd8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw8_and_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - notq val - atomicxchgclearb val, [memCopy], mem + notq t3 + atomicxchgclearb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) andi value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAnd16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw16_and_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - notq val - atomicxchgclearh val, [memCopy], mem + notq t3 + atomicxchgclearh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) andi value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwAnd32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw32_and_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - notq val - atomicxchgcleari val, [memCopy], mem + notq t3 + atomicxchgcleari t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) andq value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) andi value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwOr(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw_or, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgori val, [memCopy], mem + atomicxchgori t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) ori value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) ori value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwOr(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw_or, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgorq val, [memCopy], mem + atomicxchgorq t3, [t2], t0 elsif X86_64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst) orq value, dst end) elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) orq value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwOr8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw8_or_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgorb val, [memCopy], mem + atomicxchgorb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) orq value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) ori value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwOr16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw16_or_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgorh val, [memCopy], mem + atomicxchgorh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) orq value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) ori value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwOr8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw8_or_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgorb val, [memCopy], mem + atomicxchgorb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) orq value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) ori value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwOr16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw16_or_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgorh val, [memCopy], mem + atomicxchgorh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) orq value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) ori value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwOr32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw32_or_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgori val, [memCopy], mem + atomicxchgori t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) orq value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) ori value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwXor(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw_xor, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxori val, [memCopy], mem + atomicxchgxori t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) xori value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXor(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw_xor, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxorq val, [memCopy], mem + atomicxchgxorq t3, [t2], t0 elsif X86_64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) xorq value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwXor8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw8_xor_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxorb val, [memCopy], mem + atomicxchgxorb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) xori value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwXor16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw16_xor_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxorh val, [memCopy], mem + atomicxchgxorh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) xori value, oldValue, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXor8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw8_xor_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxorb val, [memCopy], mem + atomicxchgxorb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) xori value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXor16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw16_xor_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxorh val, [memCopy], mem + atomicxchgxorh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) xori value, oldValue, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXor32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw32_xor_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgxori val, [memCopy], mem + atomicxchgxori t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) xorq value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) xori value, oldValue, newValue end) else error end -end - -macro doI32AtomicRmwXchg(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - if ARM64E - atomicxchgi val, [memCopy], mem - elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) - move value, dst - end) - elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) - move value, newValue - end) - else - error - end -end - -macro doI64AtomicRmwXchg(mem, val, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - if ARM64E - atomicxchgq val, [memCopy], mem - elsif X86_64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro (value, dst) - move value, dst - end) - elsif ARM64 - weakCASLoopQuad(memCopy, val, mem, scratch, macro(value, oldValue, newValue) - move value, newValue - end) - else - error - end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwXchg8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw_xchg, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgb val, [memCopy], mem + atomicxchgi t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) move value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI32AtomicRmwXchg16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw_xchg, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgh val, [memCopy], mem + atomicxchgq t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopQuad(t2, t3, t0, t1, macro (value, dst) move value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopQuad(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + pushInt64(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXchg8(mem, val, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw8_xchg_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgb val, [memCopy], mem + atomicxchgb t3, [t2], t0 elsif X86_64 - weakCASLoopByte(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) move value, dst end) elsif ARM64 - weakCASLoopByte(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXchg16(mem, val, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i32_atomic_rmw16_xchg_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgh val, [memCopy], mem + atomicxchgh t3, [t2], t0 elsif X86_64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) move value, dst end) elsif ARM64 - weakCASLoopHalf(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end + pushInt32(t0) + advancePCByReg(t4) + nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -macro doI64AtomicRmwXchg32(mem, val, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy +ipintAtomicOp(_i64_atomic_rmw8_xchg_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 if ARM64E - atomicxchgi val, [memCopy], mem + atomicxchgb t3, [t2], t0 elsif X86_64 - weakCASLoopInt(memCopy, val, mem, scratch, macro (value, dst) + weakCASLoopByte(t2, t3, t0, t1, macro (value, dst) move value, dst end) elsif ARM64 - weakCASLoopInt(memCopy, val, mem, scratch, macro(value, oldValue, newValue) + weakCASLoopByte(t2, t3, t0, t1, macro(value, oldValue, newValue) move value, newValue end) else error end -end - -macro doI32AtomicCmpxchg(mem, expected, newVal, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - andq 0xffffffff, mem - if ARM64E or X86_64 - atomicweakcasi mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeInt(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -macro doI64AtomicCmpxchg(mem, expected, newVal, memCopy, scratch) - checkAlignment8(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - if ARM64E or X86_64 - atomicweakcasq mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeQuad(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -macro doI32AtomicCmpxchg8(mem, expected, newVal, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - andq 0xff, mem - if ARM64E or X86_64 - atomicweakcasb mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeByte(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -macro doI32AtomicCmpxchg16(mem, expected, newVal, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - andq 0xffff, mem - if ARM64E or X86_64 - atomicweakcash mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeHalf(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -macro doI64AtomicCmpxchg8(mem, expected, newVal, memCopy, scratch) - noAlignmentCheck(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - andq 0xff, mem - if ARM64E or X86_64 - atomicweakcasb mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeByte(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -macro doI64AtomicCmpxchg16(mem, expected, newVal, memCopy, scratch) - checkAlignment2(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - andq 0xffff, mem - if ARM64E or X86_64 - atomicweakcash mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeHalf(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -macro doI64AtomicCmpxchg32(mem, expected, newVal, memCopy, scratch) - checkAlignment4(mem, _ipint_throw_UnalignedMemoryAccess) - move mem, memCopy - move expected, mem - andq 0xffffffff, mem - if ARM64E or X86_64 - atomicweakcasi mem, newVal, [memCopy] - elsif ARM64 - weakCASExchangeInt(memCopy, newVal, mem, scratch, expected) - else - error - end -end - -ipintAtomicOp(_memory_atomic_notify, macro() - # starting at sp: count, pointer - loadb IPInt::AtomicMemoryAccessMetadata::memoryIndex[MC], t0 - pushInt32(t0) - loadq IPInt::AtomicMemoryAccessMetadata::offset[MC], t0 - pushInt32(t0) # offset - - move sp, a1 - - operationCall(macro() cCall2(_ipint_extern_memory_atomic_notify) end) - bilt r0, 0, _ipint_throw_OutOfBoundsMemoryAccess - - addq (StackValueSize * 4), sp - - pushInt32(r0) - loadb IPInt::AtomicMemoryAccessMetadata::instructionLength[MC], t0 - advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::AtomicMemoryAccessMetadata))) - nextIPIntInstruction() -end) - -ipintAtomicOp(_memory_atomic_wait32, macro() - # starting at sp: timeout, value, pointer - loadb IPInt::AtomicMemoryAccessMetadata::memoryIndex[MC], t0 - pushInt32(t0) - loadq (StackValueSize * 3)[sp], t0 - loadq IPInt::AtomicMemoryAccessMetadata::offset[MC], t1 - addq t1, t0 - storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset - - move sp, a1 - - operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait32) end) - bilt r0, 0, _ipint_throw_OutOfBoundsMemoryAccess - - addq (StackValueSize * 4), sp - - pushInt32(r0) - loadb IPInt::AtomicMemoryAccessMetadata::instructionLength[MC], t0 - advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::AtomicMemoryAccessMetadata))) - nextIPIntInstruction() -end) - -ipintAtomicOp(_memory_atomic_wait64, macro() - # starting at sp: timeout, value, pointer - loadb IPInt::AtomicMemoryAccessMetadata::memoryIndex[MC], t0 - pushInt32(t0) - loadq (StackValueSize * 3)[sp], t0 - loadq IPInt::AtomicMemoryAccessMetadata::offset[MC], t1 - addq t1, t0 - storeq t0, (StackValueSize * 3)[sp] # replace pointer with pointer + offset - - move sp, a1 - - operationCall(macro() cCall2(_ipint_extern_memory_atomic_wait64) end) - bilt r0, 0, _ipint_throw_OutOfBoundsMemoryAccess - - addq (StackValueSize * 4), sp - - pushInt32(r0) - loadb IPInt::AtomicMemoryAccessMetadata::instructionLength[MC], t0 - advancePCByReg(t0) - advanceMC(constexpr (sizeof(IPInt::AtomicMemoryAccessMetadata))) - nextIPIntInstruction() -end) - -ipintAtomicOp(_atomic_fence, macro() - fence - leap 1[t4], PC - nextIPIntInstruction() -end) - -reservedAtomicOpcode(atomic_0x4) -reservedAtomicOpcode(atomic_0x5) -reservedAtomicOpcode(atomic_0x6) -reservedAtomicOpcode(atomic_0x7) -reservedAtomicOpcode(atomic_0x8) -reservedAtomicOpcode(atomic_0x9) -reservedAtomicOpcode(atomic_0xa) -reservedAtomicOpcode(atomic_0xb) -reservedAtomicOpcode(atomic_0xc) -reservedAtomicOpcode(atomic_0xd) -reservedAtomicOpcode(atomic_0xe) -reservedAtomicOpcode(atomic_0xf) - -ipintAtomicOp(_i32_atomic_load, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_load_slow_path) - doI32AtomicLoad(t0, t2) - pushInt32(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_load, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_load_slow_path) - doI64AtomicLoad(t0, t2) - pushInt64(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_load8_u, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_load8_u_slow_path) - doI32AtomicLoad8(t0, t2) - pushInt32(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_load16_u, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_load16_u_slow_path) - doI32AtomicLoad16(t0, t2) - pushInt32(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_load8_u, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_load8_u_slow_path) - doI64AtomicLoad8(t0, t2) - pushInt64(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_load16_u, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_load16_u_slow_path) - doI64AtomicLoad16(t0, t2) - pushInt64(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_load32_u, macro() - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_load32_u_slow_path) - doI64AtomicLoad32(t0, t2) - pushInt64(t2) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_store, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_store_slow_path) - doI32AtomicStore(t0, t3, t2, t1) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_store, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_store_slow_path) - doI64AtomicStore(t0, t3, t2, t1) - leap 2[t4], PC + pushInt64(t0) + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) end) -ipintAtomicOp(_i32_atomic_store8_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_store8_u_slow_path) - doI32AtomicStore8(t0, t3, t2, t1) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_store16_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_store16_u_slow_path) - doI32AtomicStore16(t0, t3, t2, t1) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_store8_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_store8_u_slow_path) - doI64AtomicStore8(t0, t3, t2, t1) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_store16_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_store16_u_slow_path) - doI64AtomicStore16(t0, t3, t2, t1) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_store32_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_store32_u_slow_path) - doI64AtomicStore32(t0, t3, t2, t1) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw_add, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_add_slow_path) - doI32AtomicRmwAdd(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_add, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_add_slow_path) - doI64AtomicRmwAdd(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_add_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_add_u_slow_path) - doI32AtomicRmwAdd8(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_add_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_add_u_slow_path) - doI32AtomicRmwAdd16(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_add_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_add_u_slow_path) - doI64AtomicRmwAdd8(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_add_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_add_u_slow_path) - doI64AtomicRmwAdd16(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_add_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_add_u_slow_path) - doI64AtomicRmwAdd32(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw_sub, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_sub_slow_path) - doI32AtomicRmwSub(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_sub, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_sub_slow_path) - doI64AtomicRmwSub(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_sub_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_sub_u_slow_path) - doI32AtomicRmwSub8(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_sub_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_sub_u_slow_path) - doI32AtomicRmwSub16(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_sub_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_sub_u_slow_path) - doI64AtomicRmwSub8(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_sub_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_sub_u_slow_path) - doI64AtomicRmwSub16(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_sub_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_sub_u_slow_path) - doI64AtomicRmwSub32(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw_and, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_and_slow_path) - doI32AtomicRmwAnd(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_and, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_and_slow_path) - doI64AtomicRmwAnd(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_and_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_and_u_slow_path) - doI32AtomicRmwAnd8(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_and_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_and_u_slow_path) - doI32AtomicRmwAnd16(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_and_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_and_u_slow_path) - doI64AtomicRmwAnd8(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_and_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_and_u_slow_path) - doI64AtomicRmwAnd16(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_and_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_and_u_slow_path) - doI64AtomicRmwAnd32(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw_or, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_or_slow_path) - doI32AtomicRmwOr(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_or, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_or_slow_path) - doI64AtomicRmwOr(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_or_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_or_u_slow_path) - doI32AtomicRmwOr8(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_or_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_or_u_slow_path) - doI32AtomicRmwOr16(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_or_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_or_u_slow_path) - doI64AtomicRmwOr8(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_or_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_or_u_slow_path) - doI64AtomicRmwOr16(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_or_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_or_u_slow_path) - doI64AtomicRmwOr32(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw_xor, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_xor_slow_path) - doI32AtomicRmwXor(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_xor, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_xor_slow_path) - doI64AtomicRmwXor(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_xor_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_xor_u_slow_path) - doI32AtomicRmwXor8(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_xor_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_xor_u_slow_path) - doI32AtomicRmwXor16(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_xor_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_xor_u_slow_path) - doI64AtomicRmwXor8(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_xor_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_xor_u_slow_path) - doI64AtomicRmwXor16(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_xor_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_xor_u_slow_path) - doI64AtomicRmwXor32(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw_xchg, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_xchg_slow_path) - doI32AtomicRmwXchg(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_xchg, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_xchg_slow_path) - doI64AtomicRmwXchg(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_xchg_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_xchg_u_slow_path) - doI32AtomicRmwXchg8(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_xchg_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_xchg_u_slow_path) - doI32AtomicRmwXchg16(t0, t3, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_xchg_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_xchg_u_slow_path) - doI64AtomicRmwXchg8(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_xchg_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_xchg_u_slow_path) - doI64AtomicRmwXchg16(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_xchg_u, macro() - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_xchg_u_slow_path) - doI64AtomicRmwXchg32(t0, t3, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -macro weakCASExchangeByte(mem, value, expected, scratch, scratch2) - if ARM64 - validateOpcodeConfig(scratch2) - .loop: - loadlinkacqb [mem], scratch2 - bqneq expected, scratch2, .fail - storecondrelb scratch, value, [mem] - bieq scratch, 0, .done - jmp .loop - .fail: - storecondrelb scratch, scratch2, [mem] - bieq scratch, 0, .done - jmp .loop - .done: - move scratch2, expected - else - error - end -end - -macro weakCASExchangeHalf(mem, value, expected, scratch, scratch2) - if ARM64 - validateOpcodeConfig(scratch2) - .loop: - loadlinkacqh [mem], scratch2 - bqneq expected, scratch2, .fail - storecondrelh scratch, value, [mem] - bieq scratch, 0, .done - jmp .loop - .fail: - storecondrelh scratch, scratch2, [mem] - bieq scratch, 0, .done - jmp .loop - .done: - move scratch2, expected - else - error - end -end - -macro weakCASExchangeInt(mem, value, expected, scratch, scratch2) - if ARM64 - validateOpcodeConfig(scratch2) - .loop: - loadlinkacqi [mem], scratch2 - bqneq expected, scratch2, .fail - storecondreli scratch, value, [mem] - bieq scratch, 0, .done - jmp .loop - .fail: - storecondreli scratch, scratch2, [mem] - bieq scratch, 0, .done - jmp .loop - .done: - move scratch2, expected - else - error - end -end - -macro weakCASExchangeQuad(mem, value, expected, scratch, scratch2) - if ARM64 - validateOpcodeConfig(scratch2) - .loop: - loadlinkacqq [mem], scratch2 - bqneq expected, scratch2, .fail - storecondrelq scratch, value, [mem] - bieq scratch, 0, .done - jmp .loop - .fail: - storecondrelq scratch, scratch2, [mem] - bieq scratch, 0, .done - jmp .loop - .done: - move scratch2, expected - else - error - end -end - -ipintAtomicOp(_i32_atomic_rmw_cmpxchg, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i32_atomic_rmw_cmpxchg_slow_path) - doI32AtomicCmpxchg(t0, t3, t7, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw_cmpxchg, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 8, t1, t2, .ipint_i64_atomic_rmw_cmpxchg_slow_path) - doI64AtomicCmpxchg(t0, t3, t7, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw8_cmpxchg_u, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i32_atomic_rmw8_cmpxchg_u_slow_path) - doI32AtomicCmpxchg8(t0, t3, t7, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i32_atomic_rmw16_cmpxchg_u, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i32_atomic_rmw16_cmpxchg_u_slow_path) - doI32AtomicCmpxchg16(t0, t3, t7, t2, t1) - pushInt32(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw8_cmpxchg_u, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 1, t1, t2, .ipint_i64_atomic_rmw8_cmpxchg_u_slow_path) - doI64AtomicCmpxchg8(t0, t3, t7, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw16_cmpxchg_u, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 2, t1, t2, .ipint_i64_atomic_rmw16_cmpxchg_u_slow_path) - doI64AtomicCmpxchg16(t0, t3, t7, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -ipintAtomicOp(_i64_atomic_rmw32_cmpxchg_u, macro() - popInt64(t7) - popInt64(t3) - popMemoryIndex(t0) - loadStoreMakePointerFast([t4], 1[t4], t0, 4, t1, t2, .ipint_i64_atomic_rmw32_cmpxchg_u_slow_path) - doI64AtomicCmpxchg32(t0, t3, t7, t2, t1) - pushInt64(t0) - leap 2[t4], PC - nextIPIntInstruction() -end) - -####################################### -## ULEB128 decoding logic for locals ## -####################################### - -macro decodeULEB128(result) - # result should already be the first byte. - andq 0x7f, result - move 7, t2 # t1 holds the shift. - validateOpcodeConfig(t3) -.loop: - loadb [PC], t3 - andq t3, 0x7f, t1 - lshiftq t2, t1 - orq t1, result - addq 7, t2 - advancePC(1) - bbaeq t3, 128, .loop -end - -.ipint_local_get_slow_path: - decodeULEB128(t0) - localGetPostDecode() - -.ipint_local_set_slow_path: - decodeULEB128(t0) - localSetPostDecode() - -.ipint_local_tee_slow_path: - decodeULEB128(t0) - localTeePostDecode() - -########################################## -## Out-of-line LEB128 decode slow paths ## -########################################## - -.ipint_i32_const_slow_path: - leap 1[PC], t4 - decodeLEBVarSInt32(t0, t4, t1, t2) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_const_slow_path: - leap 1[PC], t4 - decodeLEBVarSInt64(t0, t4, t1, t2) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -################################################## -## Out-of-line slow paths for memory load/store ## -################################################## - -# The handler's fast path pops values and branches here on multi-byte memarg. -# t0 = wasm address (from popMemoryIndex), t3 = data value (for int stores), -# ft0 = data value (for float stores). These must survive loadStoreMakePointerSlow. -# For int stores, t3 is saved/restored around the macro since t3 is used as scratch. - -.ipint_i32_load_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadi [t0], t1 - pushInt32(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - loadq [t0], t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_f32_load_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadf [t0], ft0 - pushFloat32(ft0) - move t4, PC - nextIPIntInstruction() - -.ipint_f64_load_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - loadd [t0], ft0 - pushFloat64(ft0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_load8s_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - loadbsi [t0], t1 - pushInt32(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_load8u_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - loadb [t0], t1 - pushInt32(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_load16s_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - loadhsi [t0], t1 - pushInt32(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_load16u_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - loadh [t0], t1 - pushInt32(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load8s_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - loadbsq [t0], t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load8u_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - loadb [t0], t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load16s_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - loadhsq [t0], t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load16u_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - loadh [t0], t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load32s_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadi [t0], t1 - sxi2q t1, t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_load32u_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadi [t0], t1 - pushInt64(t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_store_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - storei t3, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_i64_store_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - storeq t3, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_f32_store_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - storef ft0, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_f64_store_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - stored ft0, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_i32_store8_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - storeb t3, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_i32_store16_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - storeh t3, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_i64_store8_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - storeb t3, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_i64_store16_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - storeh t3, [t0] - move t4, PC - nextIPIntInstruction() - -.ipint_i64_store32_mem_slow_path: - leap 1[PC], t4 - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - storei t3, [t0] - move t4, PC - nextIPIntInstruction() - -################################################### -## Out-of-line slow paths for SIMD memory access ## -################################################### - -# t0 = wasm address (from popMemoryIndex before branching). -# t4 = cursor pointing to start of memarg (past SIMD opcode, set by simd_prefix). -# After loadStoreMakePointerSlow, t4 points past the memarg. - -.simd_v128_load_slow_path: - loadStoreMakePointerSlow(t4, t0, 16, t1, t2, t5, t6) - loadv [t0], v0 - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load_8x8s_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoad8x8s() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load_8x8u_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoad8x8u() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load_16x4s_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoad16x4s() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load_16x4u_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoad16x4u() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load_32x2s_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoad32x2s() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load_32x2u_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoad32x2u() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load8_splat_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - simdLoadSplat8() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load16_splat_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - simdLoadSplat16() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load32_splat_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - simdLoadSplat32() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_load64_splat_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - simdLoadSplat64() - pushVec(v0) - move t4, PC - nextIPIntInstruction() - -.simd_v128_store_slow_path: - loadStoreMakePointerSlow(t4, t0, 16, t1, t2, t5, t6) - storev v0, [t0] - move t4, PC - nextIPIntInstruction() - -.simd_v128_load32_zero_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadi [t0], t0 - subp V128ISize, sp - storei t0, [sp] - storei 0, 4[sp] - storeq 0, 8[sp] - move t4, PC - nextIPIntInstruction() - -.simd_v128_load64_zero_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - loadq [t0], t0 - subp V128ISize, sp - storeq t0, [sp] - storeq 0, 8[sp] - move t4, PC - nextIPIntInstruction() - -# Load lane slow paths: v0 = vector (already popped), t0 = wasm addr. -# t4 points past memarg after loadStoreMakePointerSlow. Lane index is at [t4]. - -.simd_v128_load8_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - loadb [t0], t0 - loadb [t4], t1 - andi ImmLaneIdx16Mask, t1 - pushVec(v0) - storeb t0, [sp, t1] - leap 1[t4], PC - nextIPIntInstruction() - -.simd_v128_load16_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - loadh [t0], t0 - loadb [t4], t1 - andi ImmLaneIdx8Mask, t1 - pushVec(v0) - storeh t0, [sp, t1, 2] - leap 1[t4], PC - nextIPIntInstruction() - -.simd_v128_load32_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadi [t0], t0 - loadb [t4], t1 - andi ImmLaneIdx4Mask, t1 - pushVec(v0) - storei t0, [sp, t1, 4] - leap 1[t4], PC - nextIPIntInstruction() - -.simd_v128_load64_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - loadq [t0], t0 - loadb [t4], t1 - andi ImmLaneIdx2Mask, t1 - pushVec(v0) - storeq t0, [sp, t1, 8] - leap 1[t4], PC - nextIPIntInstruction() - -# Store lane slow paths: v0 = vector (already popped), t0 = wasm addr. -# t4 points past memarg. Lane index is at [t4]. - -.simd_v128_store8_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - loadb [t4], t1 - andi ImmLaneIdx16Mask, t1 - pushVec(v0) - loadb [sp, t1], t1 - addp V128ISize, sp - storeb t1, [t0] - leap 1[t4], PC - nextIPIntInstruction() - -.simd_v128_store16_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - loadb [t4], t1 - andi ImmLaneIdx8Mask, t1 - pushVec(v0) - loadh [sp, t1, 2], t1 - addp V128ISize, sp - storeh t1, [t0] - leap 1[t4], PC - nextIPIntInstruction() - -.simd_v128_store32_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - loadb [t4], t1 - andi ImmLaneIdx4Mask, t1 - pushVec(v0) - loadi [sp, t1, 4], t1 - addp V128ISize, sp - storei t1, [t0] - leap 1[t4], PC - nextIPIntInstruction() - -.simd_v128_store64_lane_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - loadb [t4], t1 - andi ImmLaneIdx2Mask, t1 - pushVec(v0) - loadq [sp, t1, 8], t1 - addp V128ISize, sp - storeq t1, [t0] - leap 1[t4], PC - nextIPIntInstruction() - -######################################################### -## Out-of-line slow paths for atomic memory operations ## -######################################################### - -# t0 = wasm address (from popMemoryIndex before branching). -# t4 = cursor pointing to start of memarg (past atomic sub-opcode, set by atomic_prefix). -# t3 = data value (for store/RMW ops, survives loadStoreMakePointerSlow). -# t7 = new value for CAS (must be push/popped around loadStoreMakePointerSlow). -# After loadStoreMakePointerSlow, t4 points past the memarg. - -.ipint_i32_atomic_load_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicLoad(t0, t2) - pushInt32(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_load_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicLoad(t0, t2) - pushInt64(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_load8_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicLoad8(t0, t2) - pushInt32(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_load16_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicLoad16(t0, t2) - pushInt32(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_load8_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicLoad8(t0, t2) - pushInt64(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_load16_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicLoad16(t0, t2) - pushInt64(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_load32_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicLoad32(t0, t2) - pushInt64(t2) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_store_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicStore(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_store_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicStore(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_store8_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicStore8(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_store16_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicStore16(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_store8_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicStore8(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_store16_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicStore16(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_store32_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicStore32(t0, t3, t2, t1) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw_add_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicRmwAdd(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw_add_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicRmwAdd(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw8_add_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicRmwAdd8(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw16_add_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicRmwAdd16(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw8_add_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicRmwAdd8(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw16_add_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicRmwAdd16(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw32_add_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicRmwAdd32(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw_sub_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicRmwSub(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw_sub_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicRmwSub(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw8_sub_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicRmwSub8(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw16_sub_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicRmwSub16(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw8_sub_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicRmwSub8(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw16_sub_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicRmwSub16(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw32_sub_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicRmwSub32(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw_and_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicRmwAnd(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw_and_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicRmwAnd(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw8_and_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicRmwAnd8(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw16_and_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicRmwAnd16(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw8_and_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicRmwAnd8(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw16_and_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicRmwAnd16(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw32_and_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicRmwAnd32(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw_or_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicRmwOr(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw_or_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicRmwOr(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw8_or_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicRmwOr8(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw16_or_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicRmwOr16(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw8_or_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicRmwOr8(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw16_or_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicRmwOr16(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw32_or_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicRmwOr32(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw_xor_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicRmwXor(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw_xor_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicRmwXor(t0, t3, t2, t1) +ipintAtomicOp(_i64_atomic_rmw16_xchg_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 + if ARM64E + atomicxchgh t3, [t2], t0 + elsif X86_64 + weakCASLoopHalf(t2, t3, t0, t1, macro (value, dst) + move value, dst + end) + elsif ARM64 + weakCASLoopHalf(t2, t3, t0, t1, macro(value, oldValue, newValue) + move value, newValue + end) + else + error + end pushInt64(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i32_atomic_rmw8_xor_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicRmwXor8(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC +ipintAtomicOp(_i64_atomic_rmw32_xchg_u, macro() + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 + if ARM64E + atomicxchgi t3, [t2], t0 + elsif X86_64 + weakCASLoopInt(t2, t3, t0, t1, macro (value, dst) + move value, dst + end) + elsif ARM64 + weakCASLoopInt(t2, t3, t0, t1, macro(value, oldValue, newValue) + move value, newValue + end) + else + error + end + pushInt64(t0) + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i32_atomic_rmw16_xor_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicRmwXor16(t0, t3, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() +macro weakCASExchangeByte(mem, value, expected, scratch, scratch2) + if ARM64 + validateOpcodeConfig(scratch2) + .loop: + loadlinkacqb [mem], scratch2 + bqneq expected, scratch2, .fail + storecondrelb scratch, value, [mem] + bieq scratch, 0, .done + jmp .loop + .fail: + storecondrelb scratch, scratch2, [mem] + bieq scratch, 0, .done + jmp .loop + .done: + move scratch2, expected + else + error + end +end -.ipint_i64_atomic_rmw8_xor_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicRmwXor8(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() +macro weakCASExchangeHalf(mem, value, expected, scratch, scratch2) + if ARM64 + validateOpcodeConfig(scratch2) + .loop: + loadlinkacqh [mem], scratch2 + bqneq expected, scratch2, .fail + storecondrelh scratch, value, [mem] + bieq scratch, 0, .done + jmp .loop + .fail: + storecondrelh scratch, scratch2, [mem] + bieq scratch, 0, .done + jmp .loop + .done: + move scratch2, expected + else + error + end +end -.ipint_i64_atomic_rmw16_xor_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicRmwXor16(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() +macro weakCASExchangeInt(mem, value, expected, scratch, scratch2) + if ARM64 + validateOpcodeConfig(scratch2) + .loop: + loadlinkacqi [mem], scratch2 + bqneq expected, scratch2, .fail + storecondreli scratch, value, [mem] + bieq scratch, 0, .done + jmp .loop + .fail: + storecondreli scratch, scratch2, [mem] + bieq scratch, 0, .done + jmp .loop + .done: + move scratch2, expected + else + error + end +end -.ipint_i64_atomic_rmw32_xor_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicRmwXor32(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() +macro weakCASExchangeQuad(mem, value, expected, scratch, scratch2) + if ARM64 + validateOpcodeConfig(scratch2) + .loop: + loadlinkacqq [mem], scratch2 + bqneq expected, scratch2, .fail + storecondrelq scratch, value, [mem] + bieq scratch, 0, .done + jmp .loop + .fail: + storecondrelq scratch, scratch2, [mem] + bieq scratch, 0, .done + jmp .loop + .done: + move scratch2, expected + else + error + end +end -.ipint_i32_atomic_rmw_xchg_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicRmwXchg(t0, t3, t2, t1) +ipintAtomicOp(_i32_atomic_rmw_cmpxchg, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + andq 0xffffffff, t0 + if ARM64E or X86_64 + atomicweakcasi t0, t7, [t2] + elsif ARM64 + weakCASExchangeInt(t2, t7, t0, t1, t3) + else + error + end pushInt32(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i64_atomic_rmw_xchg_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicRmwXchg(t0, t3, t2, t1) +ipintAtomicOp(_i64_atomic_rmw_cmpxchg, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 8, t1, t2) + checkAlignment8(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + if ARM64E or X86_64 + atomicweakcasq t0, t7, [t2] + elsif ARM64 + weakCASExchangeQuad(t2, t7, t0, t1, t3) + else + error + end pushInt64(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i32_atomic_rmw8_xchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicRmwXchg8(t0, t3, t2, t1) +ipintAtomicOp(_i32_atomic_rmw8_cmpxchg_u, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + andq 0xff, t0 + if ARM64E or X86_64 + atomicweakcasb t0, t7, [t2] + elsif ARM64 + weakCASExchangeByte(t2, t7, t0, t1, t3) + else + error + end pushInt32(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i32_atomic_rmw16_xchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicRmwXchg16(t0, t3, t2, t1) +ipintAtomicOp(_i32_atomic_rmw16_cmpxchg_u, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + andq 0xffff, t0 + if ARM64E or X86_64 + atomicweakcash t0, t7, [t2] + elsif ARM64 + weakCASExchangeHalf(t2, t7, t0, t1, t3) + else + error + end pushInt32(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i64_atomic_rmw8_xchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicRmwXchg8(t0, t3, t2, t1) - pushInt64(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i64_atomic_rmw16_xchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicRmwXchg16(t0, t3, t2, t1) +ipintAtomicOp(_i64_atomic_rmw8_cmpxchg_u, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 1, t1, t2) + noAlignmentCheck(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + andq 0xff, t0 + if ARM64E or X86_64 + atomicweakcasb t0, t7, [t2] + elsif ARM64 + weakCASExchangeByte(t2, t7, t0, t1, t3) + else + error + end pushInt64(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i64_atomic_rmw32_xchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicRmwXchg32(t0, t3, t2, t1) +ipintAtomicOp(_i64_atomic_rmw16_cmpxchg_u, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 2, t1, t2) + checkAlignment2(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + andq 0xffff, t0 + if ARM64E or X86_64 + atomicweakcash t0, t7, [t2] + elsif ARM64 + weakCASExchangeHalf(t2, t7, t0, t1, t3) + else + error + end pushInt64(t0) - move t4, PC - nextIPIntInstruction() - -.ipint_i32_atomic_rmw_cmpxchg_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI32AtomicCmpxchg(t0, t3, t7, t2, t1) - pushInt32(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i64_atomic_rmw_cmpxchg_slow_path: - loadStoreMakePointerSlow(t4, t0, 8, t1, t2, t5, t6) - doI64AtomicCmpxchg(t0, t3, t7, t2, t1) +ipintAtomicOp(_i64_atomic_rmw32_cmpxchg_u, macro() + # t7 is safe for value: PL is t6 on ARM64, t5 on x86, csr10 on RISCV64. + # ARMv7 (where PL=t7) does not run 64-bit atomic instructions. + popInt64(t7) + popInt64(t3) + popMemoryIndex(t0, t2) + memoryOpAdvanceMCAndMakePointer(t4, t0, 4, t1, t2) + checkAlignment4(t0, .throwUnaligned) + move t0, t2 + move t3, t0 + andq 0xffffffff, t0 + if ARM64E or X86_64 + atomicweakcasi t0, t7, [t2] + elsif ARM64 + weakCASExchangeInt(t2, t7, t0, t1, t3) + else + error + end pushInt64(t0) - move t4, PC + advancePCByReg(t4) nextIPIntInstruction() +.throwUnaligned: + handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) +end) -.ipint_i32_atomic_rmw8_cmpxchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI32AtomicCmpxchg8(t0, t3, t7, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() +####################################### +## ULEB128 decoding logic for locals ## +####################################### -.ipint_i32_atomic_rmw16_cmpxchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI32AtomicCmpxchg16(t0, t3, t7, t2, t1) - pushInt32(t0) - move t4, PC - nextIPIntInstruction() +macro decodeULEB128(result) + # result should already be the first byte. + andq 0x7f, result + move 7, t2 # t1 holds the shift. + validateOpcodeConfig(t3) +.loop: + loadb [PC], t3 + andq t3, 0x7f, t1 + lshiftq t2, t1 + orq t1, result + addq 7, t2 + advancePC(1) + bbaeq t3, 128, .loop +end -.ipint_i64_atomic_rmw8_cmpxchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 1, t1, t2, t5, t6) - doI64AtomicCmpxchg8(t0, t3, t7, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() +slowPathLabel(_local_get) + decodeULEB128(t0) + localGetPostDecode() -.ipint_i64_atomic_rmw16_cmpxchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 2, t1, t2, t5, t6) - doI64AtomicCmpxchg16(t0, t3, t7, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() +slowPathLabel(_local_set) + decodeULEB128(t0) + localSetPostDecode() -.ipint_i64_atomic_rmw32_cmpxchg_u_slow_path: - loadStoreMakePointerSlow(t4, t0, 4, t1, t2, t5, t6) - doI64AtomicCmpxchg32(t0, t3, t7, t2, t1) - pushInt64(t0) - move t4, PC - nextIPIntInstruction() +slowPathLabel(_local_tee) + decodeULEB128(t0) + localTeePostDecode() ################################## ## "Out of line" logic for call ## @@ -11456,6 +11043,8 @@ end # t3 is not used after this subp cfr, t3 push t3, PC + # ditto for PL, t3 is okay to use as scratch + subp PL, cfr, t3 push t3, wasmInstance # set up the call frame @@ -11470,7 +11059,7 @@ end # reserved # reserved # (first_non_arg_addr - cfr), PC - # unused, wasmInstance <- t2 = native argument stack (pushed by mINT) + # (PL - cfr), wasmInstance <- t2 = native argument stack (pushed by mINT) # call frame # call frame # call frame @@ -11786,6 +11375,7 @@ mintAlign(_tail_call) # CallArgumentBytecode::Call (0x1b) mintAlign(_call) pop wasmInstance, ws0 + # pop targetInstance, targetEntrypoint # Save stack pointer, if we tail call someone who changes the frame above's stack argument size. # Store its value relative to cfp so stack frames can be easily relocated for JSPI. @@ -11793,11 +11383,16 @@ mintAlign(_call) subp cfr, sc1 storep sc1, ThisArgumentOffset[cfr] + # Swap instances + # move targetInstance, wasmInstance + # Set up memory push t2, t3 ipintReloadMemory() pop t3, t2 + # move targetEntrypoint, ws0 + # Make the call if ARM64E leap _g_config, ws1 @@ -11973,7 +11568,7 @@ mintAlign(_end) # return result # return result <- mintRetDst => new SP # (first_non_arg_addr - cfr), PC - # unused, wasmInstance <- sc3 + # (PL - cfr), wasmInstance <- sc3 # call frame # call frame # call frame @@ -11993,6 +11588,7 @@ end loadp Callee[cfr], ws0 unboxWasmCallee(ws0, ws1) storep ws0, UnboxedWasmCalleeStackSlot[cfr] + addp t3, cfr, PL # Restore memory ipintReloadMemory() @@ -12139,36 +11735,6 @@ _ipint_mint_ret_dispatch_err: move 0x88, a0 break -_ipint_throw_Unreachable: - handleDebuggerTrapIfNeededAndThrowWasmTrap(Unreachable) - -_ipint_throw_NullExnrefReference: - handleDebuggerTrapIfNeededAndThrowWasmTrap(NullExnrefReference) - -_ipint_throw_OutOfBoundsMemoryAccess: - handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsMemoryAccess) - -_ipint_throw_DivisionByZero: - handleDebuggerTrapIfNeededAndThrowWasmTrap(DivisionByZero) - -_ipint_throw_IntegerOverflow: - handleDebuggerTrapIfNeededAndThrowWasmTrap(IntegerOverflow) - -_ipint_throw_OutOfBoundsTrunc: - handleDebuggerTrapIfNeededAndThrowWasmTrap(OutOfBoundsTrunc) - -_ipint_throw_NullRefAsNonNull: - handleDebuggerTrapIfNeededAndThrowWasmTrap(NullRefAsNonNull) - -_ipint_throw_NullAccess: - handleDebuggerTrapIfNeededAndThrowWasmTrap(NullAccess) - -_ipint_throw_NullI31Get: - handleDebuggerTrapIfNeededAndThrowWasmTrap(NullI31Get) - -_ipint_throw_UnalignedMemoryAccess: - handleDebuggerTrapIfNeededAndThrowWasmTrap(UnalignedMemoryAccess) - ########################################### # uINT: function return value interpreter # ########################################### @@ -12279,18 +11845,18 @@ uintAlign(_ret) argumINTAlign(_a0) _argumINT_begin: storeq wa0, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_a1) storeq wa1, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_a2) if ARM64 or ARM64E or X86_64 storeq wa2, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() else break @@ -12300,7 +11866,7 @@ end argumINTAlign(_a3) if ARM64 or ARM64E or X86_64 storeq wa3, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() else break @@ -12309,7 +11875,7 @@ end argumINTAlign(_a4) if ARM64 or ARM64E or X86_64 storeq wa4, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() else break @@ -12318,7 +11884,7 @@ end argumINTAlign(_a5) if ARM64 or ARM64E or X86_64 storeq wa5, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() else break @@ -12327,7 +11893,7 @@ end argumINTAlign(_a6) if ARM64 or ARM64E storeq wa6, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() else break @@ -12336,7 +11902,7 @@ end argumINTAlign(_a7) if ARM64 or ARM64E storeq wa7, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() else break @@ -12344,49 +11910,49 @@ end argumINTAlign(_fa0) storev wfa0, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa1) storev wfa1, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa2) storev wfa2, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa3) storev wfa3, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa4) storev wfa4, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa5) storev wfa5, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa6) storev wfa6, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_fa7) storev wfa7, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_stack) loadq [argumINTSrc], csr0 addp SlotSize, argumINTSrc storeq csr0, [argumINTDst] - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_stack_vector) @@ -12395,7 +11961,7 @@ argumINTAlign(_stack_vector) loadq 8[argumINTSrc], csr0 storeq csr0, 8[argumINTDst] addp 2 * SlotSize, argumINTSrc - subp LocalSize, argumINTDst + addp LocalSize, argumINTDst argumINTDispatch() argumINTAlign(_end) diff --git a/Source/JavaScriptCore/offlineasm/arm64.rb b/Source/JavaScriptCore/offlineasm/arm64.rb index e792bb0d52b6..7f776a73487f 100644 --- a/Source/JavaScriptCore/offlineasm/arm64.rb +++ b/Source/JavaScriptCore/offlineasm/arm64.rb @@ -958,18 +958,6 @@ def lowerARM64 emitARM64Add("add", operands, :quad) when 'addlshiftp' emitARM64AddShift("add", operands, :quad) - when 'addqs' - emitARM64Add("adds", operands, :quad) - when 'subqs' - emitARM64Sub("subs", operands, :quad) - when "adcq" - emitARM64TAC("adc", operands, :quad) - when "sbcq" - emitARM64TAC("sbc", operands, :quad) - when "smulhq" - emitARM64TAC("smulh", operands, :quad) - when "umulhq" - emitARM64TAC("umulh", operands, :quad) when "andi" emitARM64TAC("and", operands, :word) when "andp" diff --git a/Source/JavaScriptCore/offlineasm/instructions.rb b/Source/JavaScriptCore/offlineasm/instructions.rb index db7a2c899591..da4d19ab59f0 100644 --- a/Source/JavaScriptCore/offlineasm/instructions.rb +++ b/Source/JavaScriptCore/offlineasm/instructions.rb @@ -383,10 +383,6 @@ "atomicloadi", "atomicloadq", "fence", - "adcq", - "sbcq", - "umulhq", - "smulhq", ] X86_SIMD_INSTRUCTIONS = @@ -468,13 +464,7 @@ "storepaird", "loadpairv", "storepairv", - "addlshiftp", - "addqs", - "subqs", - "adcq", - "sbcq", - "smulhq", - "umulhq" + "addlshiftp" ] ARM64_SIMD_INSTRUCTIONS = diff --git a/Source/JavaScriptCore/offlineasm/x86.rb b/Source/JavaScriptCore/offlineasm/x86.rb index efaf5448c88b..50ae09c235d9 100644 --- a/Source/JavaScriptCore/offlineasm/x86.rb +++ b/Source/JavaScriptCore/offlineasm/x86.rb @@ -1654,14 +1654,6 @@ def lowerX86Common $asm.puts "idiv#{x86Suffix(:quad)} #{operands[0].x86Operand(:quad)}" when "udivq" $asm.puts "div#{x86Suffix(:quad)} #{operands[0].x86Operand(:quad)}" - when "adcq" - $asm.puts "adcq #{x86Operands(:quad, :quad)}" - when "sbcq" - $asm.puts "sbbq #{x86Operands(:quad, :quad)}" - when "umulhq" - $asm.puts "mulq #{operands[0].x86Operand(:quad)}" - when "smulhq" - $asm.puts "imulq #{operands[0].x86Operand(:quad)}" when "popcnti" $asm.puts "popcnt#{x86Suffix(:int)} #{x86Operands(:int, :int)}" when "popcntq" diff --git a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp index 42009e9b0ecb..7f0496e69eea 100644 --- a/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp +++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp @@ -1031,9 +1031,9 @@ PartialResult BBQJIT::addLocal(Type type, uint32_t numberOfLocals) // Globals -Value BBQJIT::topValue(TypeKind type, unsigned offset) +Value BBQJIT::topValue(TypeKind type) { - return Value::fromTemp(type, currentControlData().enclosedHeight() + currentControlData().implicitSlots() + m_parser->expressionStack().size() + offset); + return Value::fromTemp(type, currentControlData().enclosedHeight() + currentControlData().implicitSlots() + m_parser->expressionStack().size()); } Value BBQJIT::exception(const ControlData& control) @@ -4330,7 +4330,7 @@ template void BBQJIT::returnValuesFromCall(Vector& results, const FunctionSignature& functionType, const CallInformation& callInfo) { for (size_t i = 0; i < callInfo.results.size(); i ++) { - Value result = topValue(functionType.returnType(i).kind, i); + Value result = Value::fromTemp(functionType.returnType(i).kind, currentControlData().enclosedHeight() + currentControlData().implicitSlots() + m_parser->expressionStack().size() + i); Location returnLocation = Location::fromArgumentLocation(callInfo.results[i], result.type()); if (returnLocation.isRegister()) { RegisterBinding currentBinding; diff --git a/Source/JavaScriptCore/wasm/WasmBBQJIT.h b/Source/JavaScriptCore/wasm/WasmBBQJIT.h index 15a502d297b8..0d1b4c63da60 100644 --- a/Source/JavaScriptCore/wasm/WasmBBQJIT.h +++ b/Source/JavaScriptCore/wasm/WasmBBQJIT.h @@ -1133,7 +1133,7 @@ class BBQJIT { // Globals - Value NODELETE topValue(TypeKind type, unsigned offset = 0); + Value NODELETE topValue(TypeKind type); Value NODELETE exception(const ControlData& control); @@ -1405,11 +1405,6 @@ class BBQJIT { [[nodiscard]] PartialResult truncTrapping(OpType truncationOp, Value operand, Value& result, Type returnType, Type operandType); [[nodiscard]] PartialResult truncSaturated(Ext1OpType truncationOp, Value operand, Value& result, Type returnType, Type operandType); - // Wide arithmetic - [[nodiscard]] PartialResult addI64Add128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi); - [[nodiscard]] PartialResult addI64Sub128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi); - [[nodiscard]] PartialResult addI64MulWideS(Value lhs, Value rhs, Value& resultLo, Value& resultHi); - [[nodiscard]] PartialResult addI64MulWideU(Value lhs, Value rhs, Value& resultLo, Value& resultHi); // GC [[nodiscard]] PartialResult addRefI31(ExpressionType value, ExpressionType& result); diff --git a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp index 6ed7a0895d38..31c1d9106e59 100644 --- a/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp +++ b/Source/JavaScriptCore/wasm/WasmBBQJIT64.cpp @@ -2461,202 +2461,6 @@ void BBQJIT::emitRefTestOrCast(CastKind castKind, const TypedExpression& typedVa ); } -// Wide Arithmetic - -[[nodiscard]] PartialResult BBQJIT::addI64Add128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi) -{ - Location lhsLoLocation = loadIfNecessary(lhsLo); - Location lhsHiLocation = loadIfNecessary(lhsHi); - Location rhsLoLocation = loadIfNecessary(rhsLo); - Location rhsHiLocation = loadIfNecessary(rhsHi); - consume(lhsLo); - consume(lhsHi); - consume(rhsLo); - consume(rhsHi); - - resultLo = topValue(TypeKind::I64); - resultHi = topValue(TypeKind::I64, 1); - Location resultLoLocation = allocate(resultLo); - Location resultHiLocation = allocate(resultHi); - - LOG_INSTRUCTION("I64Add128", lhsLo, lhsLoLocation, lhsHi, lhsHiLocation, rhsLo, rhsLoLocation, rhsHi, rhsHiLocation, RESULT(resultLo), RESULT(resultHi)); - - if (resultLoLocation.asGPR() == lhsHiLocation.asGPR()) { - m_jit.move(lhsHiLocation.asGPR(), wasmScratchGPR); - lhsHiLocation = Location::fromGPR(wasmScratchGPR); - } else if (resultLoLocation.asGPR() == rhsHiLocation.asGPR()) { - m_jit.move(rhsHiLocation.asGPR(), wasmScratchGPR); - rhsHiLocation = Location::fromGPR(wasmScratchGPR); - } - -#if CPU(X86_64) - if (resultLoLocation.asGPR() == rhsLoLocation.asGPR()) - m_jit.add64(lhsLoLocation.asGPR(), resultLoLocation.asGPR()); - else { - m_jit.move(lhsLoLocation.asGPR(), resultLoLocation.asGPR()); - m_jit.add64(rhsLoLocation.asGPR(), resultLoLocation.asGPR()); - } - if (resultHiLocation.asGPR() == rhsHiLocation.asGPR()) - m_jit.addCarry64(lhsHiLocation.asGPR(), resultHiLocation.asGPR()); - else { - m_jit.move(lhsHiLocation.asGPR(), resultHiLocation.asGPR()); - m_jit.addCarry64(rhsHiLocation.asGPR(), resultHiLocation.asGPR()); - } -#elif CPU(ARM64) - m_jit.add64AndSetFlags(lhsLoLocation.asGPR(), rhsLoLocation.asGPR(), resultLoLocation.asGPR()); - m_jit.addCarry64(lhsHiLocation.asGPR(), rhsHiLocation.asGPR(), resultHiLocation.asGPR()); -#endif - - return { }; -} - -[[nodiscard]] PartialResult BBQJIT::addI64Sub128(Value lhsLo, Value lhsHi, Value rhsLo, Value rhsHi, Value& resultLo, Value& resultHi) -{ - Location lhsLoLocation = loadIfNecessary(lhsLo); - Location lhsHiLocation = loadIfNecessary(lhsHi); - Location rhsLoLocation = loadIfNecessary(rhsLo); - Location rhsHiLocation = loadIfNecessary(rhsHi); - consume(lhsLo); - consume(lhsHi); - consume(rhsLo); - consume(rhsHi); - - resultLo = topValue(TypeKind::I64); - resultHi = topValue(TypeKind::I64, 1); - Location resultLoLocation = allocate(resultLo); - Location resultHiLocation = allocate(resultHi); - - LOG_INSTRUCTION("I64Sub128", lhsLo, lhsLoLocation, lhsHi, lhsHiLocation, rhsLo, rhsLoLocation, rhsHi, rhsHiLocation, RESULT(resultLo), RESULT(resultHi)); - - if (resultLoLocation.asGPR() == lhsHiLocation.asGPR()) { - m_jit.move(lhsHiLocation.asGPR(), wasmScratchGPR); - lhsHiLocation = Location::fromGPR(wasmScratchGPR); - } else if (resultLoLocation.asGPR() == rhsHiLocation.asGPR()) { - m_jit.move(rhsHiLocation.asGPR(), wasmScratchGPR); - rhsHiLocation = Location::fromGPR(wasmScratchGPR); - } - -#if CPU(X86_64) - if (resultLoLocation.asGPR() == rhsLoLocation.asGPR()) { - m_jit.move(lhsLoLocation.asGPR(), wasmScratchGPR); - m_jit.sub64(rhsLoLocation.asGPR(), wasmScratchGPR); - m_jit.move(wasmScratchGPR, resultLoLocation.asGPR()); - } else { - m_jit.move(lhsLoLocation.asGPR(), resultLoLocation.asGPR()); - m_jit.sub64(rhsLoLocation.asGPR(), resultLoLocation.asGPR()); - } - if (resultHiLocation.asGPR() == rhsHiLocation.asGPR()) { - m_jit.move(lhsHiLocation.asGPR(), wasmScratchGPR); - m_jit.subBorrow64(rhsHiLocation.asGPR(), wasmScratchGPR); - m_jit.move(wasmScratchGPR, resultHiLocation.asGPR()); - } else { - m_jit.move(lhsHiLocation.asGPR(), resultHiLocation.asGPR()); - m_jit.subBorrow64(rhsHiLocation.asGPR(), resultHiLocation.asGPR()); - } -#elif CPU(ARM64) - m_jit.sub64AndSetFlags(lhsLoLocation.asGPR(), rhsLoLocation.asGPR(), resultLoLocation.asGPR()); - m_jit.subBorrow64(lhsHiLocation.asGPR(), rhsHiLocation.asGPR(), resultHiLocation.asGPR()); -#endif - - return { }; -} - -[[nodiscard]] PartialResult BBQJIT::addI64MulWideU(Value lhs, Value rhs, Value& resultLo, Value& resultHi) -{ - Location lhsLocation = loadIfNecessary(lhs); - Location rhsLocation = loadIfNecessary(rhs); - consume(lhs); - consume(rhs); - -#if CPU(X86_64) - for (JSC::Reg reg : clobbersForDivX86()) - clobber(reg); -#endif - - resultLo = topValue(TypeKind::I64); - resultHi = topValue(TypeKind::I64, 1); - Location resultLoLocation = allocate(resultLo); - Location resultHiLocation = allocate(resultHi); - - LOG_INSTRUCTION("I64MulWideU", lhs, lhsLocation, rhs, rhsLocation, RESULT(resultLo), RESULT(resultHi)); - -#if CPU(X86_64) - // x86 mul: rax * src -> rdx:rax - m_jit.move(lhsLocation.asGPR(), X86Registers::eax); - m_jit.x86UMulHigh64(rhsLocation.asGPR(), X86Registers::eax, X86Registers::edx); - if (resultLoLocation.asGPR() != X86Registers::edx) { - m_jit.move(X86Registers::eax, resultLoLocation.asGPR()); - m_jit.move(X86Registers::edx, resultHiLocation.asGPR()); - } else { - m_jit.move(X86Registers::edx, resultHiLocation.asGPR()); - m_jit.move(X86Registers::eax, resultLoLocation.asGPR()); - } -#elif CPU(ARM64) - if (resultHiLocation.asGPR() == lhsLocation.asGPR()) { - m_jit.move(lhsLocation.asGPR(), wasmScratchGPR); - m_jit.uMulHigh64(wasmScratchGPR, rhsLocation.asGPR(), resultHiLocation.asGPR()); - m_jit.mul64(wasmScratchGPR, rhsLocation.asGPR(), resultLoLocation.asGPR()); - } else if (resultHiLocation.asGPR() == rhsLocation.asGPR()) { - m_jit.move(rhsLocation.asGPR(), wasmScratchGPR); - m_jit.uMulHigh64(lhsLocation.asGPR(), wasmScratchGPR, resultHiLocation.asGPR()); - m_jit.mul64(lhsLocation.asGPR(), wasmScratchGPR, resultLoLocation.asGPR()); - } else { - m_jit.uMulHigh64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultHiLocation.asGPR()); - m_jit.mul64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultLoLocation.asGPR()); - } -#endif - - return { }; -} - -[[nodiscard]] PartialResult BBQJIT::addI64MulWideS(Value lhs, Value rhs, Value& resultLo, Value& resultHi) -{ - Location lhsLocation = loadIfNecessary(lhs); - Location rhsLocation = loadIfNecessary(rhs); - consume(lhs); - consume(rhs); - -#if CPU(X86_64) - for (JSC::Reg reg : clobbersForDivX86()) - clobber(reg); -#endif - - resultLo = topValue(TypeKind::I64); - resultHi = topValue(TypeKind::I64, 1); - Location resultLoLocation = allocate(resultLo); - Location resultHiLocation = allocate(resultHi); - - LOG_INSTRUCTION("I64MulWideS", lhs, lhsLocation, rhs, rhsLocation, RESULT(resultLo), RESULT(resultHi)); - -#if CPU(X86_64) - // x86 imul: rax * src -> rdx:rax (signed) - m_jit.move(lhsLocation.asGPR(), X86Registers::eax); - m_jit.x86MulHigh64(rhsLocation.asGPR(), X86Registers::eax, X86Registers::edx); - if (resultLoLocation.asGPR() != X86Registers::edx) { - m_jit.move(X86Registers::eax, resultLoLocation.asGPR()); - m_jit.move(X86Registers::edx, resultHiLocation.asGPR()); - } else { - m_jit.move(X86Registers::edx, resultHiLocation.asGPR()); - m_jit.move(X86Registers::eax, resultLoLocation.asGPR()); - } -#elif CPU(ARM64) - if (resultHiLocation.asGPR() == lhsLocation.asGPR()) { - m_jit.move(lhsLocation.asGPR(), wasmScratchGPR); - m_jit.mulHigh64(wasmScratchGPR, rhsLocation.asGPR(), resultHiLocation.asGPR()); - m_jit.mul64(wasmScratchGPR, rhsLocation.asGPR(), resultLoLocation.asGPR()); - } else if (resultHiLocation.asGPR() == rhsLocation.asGPR()) { - m_jit.move(rhsLocation.asGPR(), wasmScratchGPR); - m_jit.mulHigh64(lhsLocation.asGPR(), wasmScratchGPR, resultHiLocation.asGPR()); - m_jit.mul64(lhsLocation.asGPR(), wasmScratchGPR, resultLoLocation.asGPR()); - } else { - m_jit.mulHigh64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultHiLocation.asGPR()); - m_jit.mul64(lhsLocation.asGPR(), rhsLocation.asGPR(), resultLoLocation.asGPR()); - } -#endif - - return { }; -} - void BBQJIT::emitThrowOnNullReference(ExceptionType type, Location ref) { recordJumpToThrowException(type, m_jit.branchIfNull(ref.asGPR())); diff --git a/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp b/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp index 294d5be676f2..73bd53ba3a63 100644 --- a/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp +++ b/Source/JavaScriptCore/wasm/WasmConstExprGenerator.cpp @@ -294,10 +294,6 @@ class ConstExprGenerator { [[nodiscard]] PartialResult atomicFence(ExtAtomicOpType, uint8_t) CONST_EXPR_STUB [[nodiscard]] PartialResult truncTrapping(OpType, ExpressionType, ExpressionType&, Type, Type) CONST_EXPR_STUB [[nodiscard]] PartialResult truncSaturated(Ext1OpType, ExpressionType, ExpressionType&, Type, Type) CONST_EXPR_STUB - [[nodiscard]] PartialResult addI64Add128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB - [[nodiscard]] PartialResult addI64Sub128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB - [[nodiscard]] PartialResult addI64MulWideS(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB - [[nodiscard]] PartialResult addI64MulWideU(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) CONST_EXPR_STUB [[nodiscard]] PartialResult NODELETE addRefI31(ExpressionType value, ExpressionType& result) { diff --git a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp index 381f35d9bf40..ffb98d359244 100644 --- a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp +++ b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.cpp @@ -58,95 +58,85 @@ void FunctionIPIntMetadataGenerator::addLength(size_t length) WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, instructionLength, IPInt::InstructionLengthMetadata); } -void FunctionIPIntMetadataGenerator::addMemorySize(uint8_t memoryIndex) +void FunctionIPIntMetadataGenerator::addMemoryIndex(uint8_t memoryIndex) { - IPInt::MemorySizeMetadata md { + IPInt::MemoryIndexMetadata mdConst { .memoryIndex = memoryIndex }; - appendMetadata(md); -} - -void FunctionIPIntMetadataGenerator::addMemoryGrow(uint8_t memoryIndex) -{ - IPInt::MemoryGrowMetadata md { - .memoryIndex = memoryIndex - }; - appendMetadata(md); -} - -void FunctionIPIntMetadataGenerator::addTableAccess(uint32_t index, size_t length) -{ - IPInt::TableAccessMetadata md { - .index = index, - .instructionLength = { .length = safeCast(length) } - }; - appendMetadata(md); -} - -void FunctionIPIntMetadataGenerator::addRefFunc(uint32_t index, size_t length) -{ - IPInt::RefFuncMetadata md { - .index = index, - .instructionLength = { .length = safeCast(length) } - }; - appendMetadata(md); -} - -void FunctionIPIntMetadataGenerator::addElemDrop(uint32_t index, size_t length) -{ - IPInt::ElemDropMetadata md { - .index = index, - .instructionLength = { .length = safeCast(length) } - }; - appendMetadata(md); -} - -void FunctionIPIntMetadataGenerator::addDataAccess(uint32_t index, size_t length) -{ - IPInt::DataAccessMetadata md { - .index = index, - .instructionLength = { .length = safeCast(length) } - }; - appendMetadata(md); + size_t size = m_metadata.size(); + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::MemoryIndexMetadata); } -void FunctionIPIntMetadataGenerator::addMemoryInit(uint8_t memoryIndex, uint32_t dataIndex, size_t length) +void FunctionIPIntMetadataGenerator::addLEB128ConstantInt32AndLength(uint32_t value, size_t length) { - IPInt::MemoryInitMetadata md { - .memoryIndex = memoryIndex, - .dataIndex = dataIndex, - .instructionLength = { .length = safeCast(length) } + IPInt::Const32Metadata mdConst { + .instructionLength = { .length = safeCast(length) }, + .value = value }; - appendMetadata(md); + size_t size = m_metadata.size(); + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const32Metadata); } -void FunctionIPIntMetadataGenerator::addMemoryFill(uint8_t memoryIndex, size_t length) +void FunctionIPIntMetadataGenerator::addLEB128ConstantInt64AndLength(uint64_t value, size_t length) { - IPInt::MemoryFillMetadata md { - .memoryIndex = memoryIndex, + IPInt::Const64Metadata mdConst { + .value = value, .instructionLength = { .length = safeCast(length) } }; - appendMetadata(md); + size_t size = m_metadata.size(); + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const64Metadata); } -void FunctionIPIntMetadataGenerator::addMemoryCopy(uint8_t dstMemoryIndex, uint8_t srcMemoryIndex, size_t length) +void FunctionIPIntMetadataGenerator::addLEB128ConstantAndLengthForType(Type type, uint64_t value, size_t length) { - IPInt::MemoryCopyMetadata md { - .dstMemoryIndex = dstMemoryIndex, - .srcMemoryIndex = srcMemoryIndex, - .instructionLength = { .length = safeCast(length) } - }; - appendMetadata(md); + if (type.isI32()) { + size_t size = m_metadata.size(); + if (length == 2) { + IPInt::InstructionLengthMetadata mdConst { + .length = safeCast((value >> 7) & 1) + }; + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::InstructionLengthMetadata); + } else { + IPInt::Const32Metadata mdConst { + .instructionLength = { .length = safeCast(length) }, + .value = static_cast(value) + }; + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const32Metadata); + } + } else if (type.isI64()) { + size_t size = m_metadata.size(); + IPInt::Const64Metadata mdConst { + .value = static_cast(value), + .instructionLength = { .length = safeCast(length) } + }; + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const64Metadata); + } else if (type.isRef() || type.isRefNull() || type.isFuncref()) { + size_t size = m_metadata.size(); + IPInt::Const32Metadata mdConst { + .instructionLength = { .length = safeCast(length) }, + .value = static_cast(value) + }; + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const32Metadata); + } else if (!type.isF32() && !type.isF64()) + ASSERT_NOT_IMPLEMENTED_YET(); } -void FunctionIPIntMetadataGenerator::addAtomicMemoryAccess(uint8_t memoryIndex, uint64_t offset, size_t length) +void FunctionIPIntMetadataGenerator::addLEB128V128Constant(v128_t value, size_t length) { - IPInt::AtomicMemoryAccessMetadata md { - .memoryIndex = memoryIndex, - .offset = offset, + IPInt::Const128Metadata mdConst { + .value = value, .instructionLength = { .length = safeCast(length) } }; - appendMetadata(md); + size_t size = m_metadata.size(); + m_metadata.grow(size + sizeof(mdConst)); + WRITE_TO_METADATA(m_metadata.mutableSpan().data() + size, mdConst, IPInt::Const128Metadata); } void FunctionIPIntMetadataGenerator::addReturnData(const FunctionSignature& sig, const CallInformation& returnCC) diff --git a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h index 1f2693662e17..70ab084beb13 100644 --- a/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h +++ b/Source/JavaScriptCore/wasm/WasmFunctionIPIntMetadataGenerator.h @@ -117,16 +117,11 @@ class FunctionIPIntMetadataGenerator { }; void addLength(size_t length); - void addMemorySize(uint8_t memoryIndex); - void addMemoryGrow(uint8_t memoryIndex); - void addTableAccess(uint32_t index, size_t length); - void addRefFunc(uint32_t index, size_t length); - void addElemDrop(uint32_t index, size_t length); - void addDataAccess(uint32_t index, size_t length); - void addMemoryInit(uint8_t memoryIndex, uint32_t dataIndex, size_t length); - void addMemoryFill(uint8_t memoryIndex, size_t length); - void addMemoryCopy(uint8_t dstMemoryIndex, uint8_t srcMemoryIndex, size_t length); - void addAtomicMemoryAccess(uint8_t memoryIndex, uint64_t offset, size_t length); + void addMemoryIndex(uint8_t memoryIndex); + void addLEB128ConstantInt32AndLength(uint32_t value, size_t length); + void addLEB128ConstantInt64AndLength(uint64_t value, size_t length); + void addLEB128ConstantAndLengthForType(Type, uint64_t value, size_t length); + void addLEB128V128Constant(v128_t value, size_t length); void addReturnData(const FunctionSignature&, const CallInformation&); FunctionCodeIndex m_functionIndex; diff --git a/Source/JavaScriptCore/wasm/WasmFunctionParser.h b/Source/JavaScriptCore/wasm/WasmFunctionParser.h index 7410635c778e..d9b6d5f01671 100644 --- a/Source/JavaScriptCore/wasm/WasmFunctionParser.h +++ b/Source/JavaScriptCore/wasm/WasmFunctionParser.h @@ -2344,56 +2344,6 @@ FOR_EACH_WASM_MEMORY_STORE_OP(CREATE_CASE) FOR_EACH_WASM_TRUNC_SATURATED_OP(CREATE_CASE) #undef CREATE_CASE - case Ext1OpType::I64Add128: - case Ext1OpType::I64Sub128: { - WASM_PARSER_FAIL_IF(!Options::useWasmWideArithmetic(), "wasm wide arithmetic is not enabled"_s); - - TypedExpression rhsHi; - TypedExpression rhsLo; - TypedExpression lhsHi; - TypedExpression lhsLo; - WASM_TRY_POP_EXPRESSION_STACK_INTO(rhsHi, "i64.add128/sub128"_s); - WASM_TRY_POP_EXPRESSION_STACK_INTO(rhsLo, "i64.add128/sub128"_s); - WASM_TRY_POP_EXPRESSION_STACK_INTO(lhsHi, "i64.add128/sub128"_s); - WASM_TRY_POP_EXPRESSION_STACK_INTO(lhsLo, "i64.add128/sub128"_s); - WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != lhsLo.type().kind, "i64.add128/sub128 lhs_lo to type "_s, lhsLo.type(), " expected "_s, TypeKind::I64); - WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != lhsHi.type().kind, "i64.add128/sub128 lhs_hi to type "_s, lhsHi.type(), " expected "_s, TypeKind::I64); - WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != rhsLo.type().kind, "i64.add128/sub128 rhs_lo to type "_s, rhsLo.type(), " expected "_s, TypeKind::I64); - WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != rhsHi.type().kind, "i64.add128/sub128 rhs_hi to type "_s, rhsHi.type(), " expected "_s, TypeKind::I64); - - ExpressionType resultLo; - ExpressionType resultHi; - if (op == Ext1OpType::I64Add128) - WASM_TRY_ADD_TO_CONTEXT(addI64Add128(lhsLo, lhsHi, rhsLo, rhsHi, resultLo, resultHi)); - else - WASM_TRY_ADD_TO_CONTEXT(addI64Sub128(lhsLo, lhsHi, rhsLo, rhsHi, resultLo, resultHi)); - m_expressionStack.constructAndAppend(Types::I64, resultLo); - m_expressionStack.constructAndAppend(Types::I64, resultHi); - break; - } - - case Ext1OpType::I64MulWideS: - case Ext1OpType::I64MulWideU: { - WASM_PARSER_FAIL_IF(!Options::useWasmWideArithmetic(), "wasm wide arithmetic is not enabled"_s); - - TypedExpression rhs; - TypedExpression lhs; - WASM_TRY_POP_EXPRESSION_STACK_INTO(rhs, "i64.mul_wide"_s); - WASM_TRY_POP_EXPRESSION_STACK_INTO(lhs, "i64.mul_wide"_s); - WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != lhs.type().kind, "i64.mul_wide lhs to type "_s, lhs.type(), " expected "_s, TypeKind::I64); - WASM_VALIDATOR_FAIL_IF(TypeKind::I64 != rhs.type().kind, "i64.mul_wide rhs to type "_s, rhs.type(), " expected "_s, TypeKind::I64); - - ExpressionType resultLo; - ExpressionType resultHi; - if (op == Ext1OpType::I64MulWideS) - WASM_TRY_ADD_TO_CONTEXT(addI64MulWideS(lhs, rhs, resultLo, resultHi)); - else - WASM_TRY_ADD_TO_CONTEXT(addI64MulWideU(lhs, rhs, resultLo, resultHi)); - m_expressionStack.constructAndAppend(Types::I64, resultLo); - m_expressionStack.constructAndAppend(Types::I64, resultHi); - break; - } - default: WASM_PARSER_FAIL_IF(true, "invalid 0xfc extended op "_s, m_currentExtOp); break; diff --git a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp index e48de1a9e370..8c8aaee111c4 100644 --- a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp +++ b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.cpp @@ -328,13 +328,6 @@ class IPIntGenerator { [[nodiscard]] PartialResult truncSaturated(Ext1OpType, ExpressionType, ExpressionType&, Type, Type); - // Wide arithmetic - - [[nodiscard]] PartialResult addI64Add128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&); - [[nodiscard]] PartialResult addI64Sub128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&); - [[nodiscard]] PartialResult addI64MulWideS(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&); - [[nodiscard]] PartialResult addI64MulWideU(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&); - // GC [[nodiscard]] PartialResult addRefI31(ExpressionType, ExpressionType&); @@ -701,46 +694,55 @@ IPIntGenerator::IPIntGenerator(ModuleInformation& info, FunctionCodeIndex functi return { }; } -Value IPIntGenerator::addConstant(Type, uint64_t) +Value IPIntGenerator::addConstant(Type type, uint64_t value) { changeStackSize(1); + m_metadata->addLEB128ConstantAndLengthForType(type, value, getCurrentInstructionLength()); return { }; } // SIMD -[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoad(ExpressionType, uint32_t, ExpressionType&, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoad(ExpressionType, uint32_t offset, ExpressionType&, uint8_t memoryIndex) { changeStackSize(0); // Pop address, push v128 value (net change = 0) + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } -[[nodiscard]] PartialResult IPIntGenerator::addSIMDStore(ExpressionType, ExpressionType, uint32_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::addSIMDStore(ExpressionType, ExpressionType, uint32_t offset, uint8_t memoryIndex) { changeStackSize(-2); // Pop address and v128 value + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDSplat(SIMDLane, ExpressionType, ExpressionType&) { + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDShuffle(v128_t, ExpressionType, ExpressionType, ExpressionType&) { changeStackSize(-1); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDShift(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, ExpressionType&) { changeStackSize(-1); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDExtmul(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, ExpressionType&) { changeStackSize(-1); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } @@ -749,15 +751,19 @@ Value IPIntGenerator::addConstant(Type, uint64_t) return addSIMDLoad(pointer, offset, result, memoryIndex); } -[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoadLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t, uint8_t, ExpressionType&, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::addSIMDLoadLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t offset, uint8_t, ExpressionType&, uint8_t memoryIndex) { changeStackSize(-1); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } -[[nodiscard]] PartialResult IPIntGenerator::addSIMDStoreLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t, uint8_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::addSIMDStoreLane(SIMDLaneOperation, ExpressionType, ExpressionType, uint32_t offset, uint8_t, uint8_t memoryIndex) { changeStackSize(-2); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } @@ -774,33 +780,39 @@ Value IPIntGenerator::addConstant(Type, uint64_t) IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) { changeStackSize(1); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDExtractLane(SIMDInfo, uint8_t, ExpressionType, ExpressionType&) { + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDReplaceLane(SIMDInfo, uint8_t, ExpressionType, ExpressionType, ExpressionType&) { changeStackSize(-1); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDI_V(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType&) { + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDV_V(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType&) { + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addSIMDBitwiseSelect(ExpressionType, ExpressionType, ExpressionType, ExpressionType&) { changeStackSize(-2); // 3 operands, 1 result + m_metadata->addLength(getCurrentInstructionLength()); return { }; } @@ -808,6 +820,7 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) [[nodiscard]] PartialResult IPIntGenerator::addSIMDRelOp(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, B3::Air::Arg, ExpressionType&) { changeStackSize(-1); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } #endif @@ -815,6 +828,7 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) [[nodiscard]] PartialResult IPIntGenerator::addSIMDV_VV(SIMDLaneOperation, SIMDInfo, ExpressionType, ExpressionType, ExpressionType&) { changeStackSize(-1); // Pop two v128 values, push one v128 value + m_metadata->addLength(getCurrentInstructionLength()); return { }; } @@ -834,7 +848,7 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) [[nodiscard]] PartialResult IPIntGenerator::addRefFunc(FunctionSpaceIndex index, ExpressionType&) { changeStackSize(1); - m_metadata->addRefFunc(index, getCurrentInstructionLength()); + m_metadata->addLEB128ConstantInt32AndLength(index, getCurrentInstructionLength()); return { }; } @@ -853,14 +867,14 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) [[nodiscard]] PartialResult IPIntGenerator::addTableGet(unsigned index, ExpressionType, ExpressionType&) { - m_metadata->addTableAccess(index, getCurrentInstructionLength()); + m_metadata->addLEB128ConstantInt32AndLength(index, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addTableSet(unsigned index, ExpressionType, ExpressionType) { changeStackSize(-2); - m_metadata->addTableAccess(index, getCurrentInstructionLength()); + m_metadata->addLEB128ConstantInt32AndLength(index, getCurrentInstructionLength()); return { }; } @@ -878,14 +892,14 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) [[nodiscard]] PartialResult IPIntGenerator::addElemDrop(unsigned elementIndex) { - m_metadata->addElemDrop(elementIndex, getCurrentInstructionLength()); + m_metadata->addLEB128ConstantInt32AndLength(elementIndex, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addTableSize(unsigned tableIndex, ExpressionType&) { changeStackSize(1); - m_metadata->addTableAccess(tableIndex, getCurrentInstructionLength()); + m_metadata->addLEB128ConstantInt32AndLength(tableIndex, getCurrentInstructionLength()); return { }; } @@ -1073,14 +1087,24 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) // Loads and Stores -[[nodiscard]] PartialResult IPIntGenerator::load(LoadOpType, ExpressionType, ExpressionType&, uint64_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::load(LoadOpType, ExpressionType, ExpressionType&, uint64_t offset, uint8_t memoryIndex) { + m_metadata->addMemoryIndex(memoryIndex); + if (m_info.memory(memoryIndex).isMemory64()) + m_metadata->addLEB128ConstantInt64AndLength(offset, getCurrentInstructionLength()); + else + m_metadata->addLEB128ConstantInt32AndLength(static_cast(offset), getCurrentInstructionLength()); return { }; } -[[nodiscard]] PartialResult IPIntGenerator::store(StoreOpType, ExpressionType, ExpressionType, uint64_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::store(StoreOpType, ExpressionType, ExpressionType, uint64_t offset, uint8_t memoryIndex) { changeStackSize(-2); + m_metadata->addMemoryIndex(memoryIndex); + if (m_info.memory(memoryIndex).isMemory64()) + m_metadata->addLEB128ConstantInt64AndLength(offset, getCurrentInstructionLength()); + else + m_metadata->addLEB128ConstantInt32AndLength(static_cast(offset), getCurrentInstructionLength()); return { }; } @@ -1088,85 +1112,100 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) [[nodiscard]] PartialResult IPIntGenerator::addGrowMemory(ExpressionType, ExpressionType&, uint8_t memoryIndex) { - m_metadata->addMemoryGrow(memoryIndex); + m_metadata->addMemoryIndex(memoryIndex); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addCurrentMemory(ExpressionType&, uint8_t memoryIndex) { changeStackSize(1); - m_metadata->addMemorySize(memoryIndex); + m_metadata->addMemoryIndex(memoryIndex); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addMemoryFill(ExpressionType, ExpressionType, ExpressionType, uint8_t memoryIndex) { changeStackSize(-3); - m_metadata->addMemoryFill(memoryIndex, getCurrentInstructionLength()); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addMemoryCopy(ExpressionType, ExpressionType, ExpressionType, uint8_t dstMemoryIndex, uint8_t srcMemoryIndex) { changeStackSize(-3); - m_metadata->addMemoryCopy(dstMemoryIndex, srcMemoryIndex, getCurrentInstructionLength()); + m_metadata->addMemoryIndex(dstMemoryIndex); + m_metadata->addMemoryIndex(srcMemoryIndex); + m_metadata->addLength(getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addMemoryInit(unsigned dataIndex, ExpressionType, ExpressionType, ExpressionType, uint8_t memoryIndex) { changeStackSize(-3); - m_metadata->addMemoryInit(memoryIndex, dataIndex, getCurrentInstructionLength()); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(dataIndex, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::addDataDrop(unsigned dataIndex) { - m_metadata->addDataAccess(dataIndex, getCurrentInstructionLength()); + m_metadata->addLEB128ConstantInt32AndLength(dataIndex, getCurrentInstructionLength()); return { }; } // Atomics -[[nodiscard]] PartialResult IPIntGenerator::atomicLoad(ExtAtomicOpType, Type, ExpressionType, ExpressionType&, uint32_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::atomicLoad(ExtAtomicOpType, Type, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex) { + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } -[[nodiscard]] PartialResult IPIntGenerator::atomicStore(ExtAtomicOpType, Type, ExpressionType, ExpressionType, uint32_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::atomicStore(ExtAtomicOpType, Type, ExpressionType, ExpressionType, uint32_t offset, uint8_t memoryIndex) { changeStackSize(-2); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } -[[nodiscard]] PartialResult IPIntGenerator::atomicBinaryRMW(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType&, uint32_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::atomicBinaryRMW(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex) { changeStackSize(-1); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } -[[nodiscard]] PartialResult IPIntGenerator::atomicCompareExchange(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, uint32_t, uint8_t) +[[nodiscard]] PartialResult IPIntGenerator::atomicCompareExchange(ExtAtomicOpType, Type, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex) { changeStackSize(-2); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::atomicWait(ExtAtomicOpType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex) { changeStackSize(-2); - m_metadata->addAtomicMemoryAccess(memoryIndex, offset, getCurrentInstructionLength()); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::atomicNotify(ExtAtomicOpType, ExpressionType, ExpressionType, ExpressionType&, uint32_t offset, uint8_t memoryIndex) { changeStackSize(-1); - m_metadata->addAtomicMemoryAccess(memoryIndex, offset, getCurrentInstructionLength()); + m_metadata->addMemoryIndex(memoryIndex); + m_metadata->addLEB128ConstantInt32AndLength(offset, getCurrentInstructionLength()); return { }; } [[nodiscard]] PartialResult IPIntGenerator::atomicFence(ExtAtomicOpType, uint8_t) { + m_metadata->addLength(getCurrentInstructionLength()); return { }; } @@ -2036,30 +2075,6 @@ IPIntGenerator::ExpressionType IPIntGenerator::addSIMDConstant(v128_t) return { }; } -[[nodiscard]] PartialResult IPIntGenerator::addI64Add128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) -{ - changeStackSize(-2); // pops 4, pushes 2 - return { }; -} - -[[nodiscard]] PartialResult IPIntGenerator::addI64Sub128(ExpressionType, ExpressionType, ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) -{ - changeStackSize(-2); // pops 4, pushes 2 - return { }; -} - -[[nodiscard]] PartialResult IPIntGenerator::addI64MulWideS(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) -{ - changeStackSize(0); // pops 2, pushes 2 - return { }; -} - -[[nodiscard]] PartialResult IPIntGenerator::addI64MulWideU(ExpressionType, ExpressionType, ExpressionType&, ExpressionType&) -{ - changeStackSize(0); // pops 2, pushes 2 - return { }; -} - // Conversions [[nodiscard]] PartialResult IPIntGenerator::addI32WrapI64(ExpressionType, ExpressionType&) diff --git a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h index 67acb64512c0..824c44a04c47 100644 --- a/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h +++ b/Source/JavaScriptCore/wasm/WasmIPIntGenerator.h @@ -82,6 +82,10 @@ struct InstructionLengthMetadata { uint8_t length; // 1B for length of current instruction }; +struct MemoryIndexMetadata { + uint8_t memoryIndex; // 1B for memory index (JS embedding of wasm is limited to 100 memories) +}; + struct BlockMetadata { // Field order is significant, both may be loaded with one 'loadpairi' instruction. // Negative deltas are possible for some Wasm instructions and require sign extension to 64b before the addition. @@ -133,60 +137,25 @@ struct GlobalMetadata { uint8_t isRef; // 1B for ref flag }; -// Metadata for instructions that pass a single index/offset to a C call. -// Each category gets its own named type. - -struct TableAccessMetadata { - uint32_t index; // 4B for table index - InstructionLengthMetadata instructionLength; -}; - -struct RefFuncMetadata { - uint32_t index; // 4B for function space index - InstructionLengthMetadata instructionLength; -}; - -struct ElemDropMetadata { - uint32_t index; // 4B for element index - InstructionLengthMetadata instructionLength; -}; +// Constant metadata structures -struct DataAccessMetadata { - uint32_t index; // 4B for data index +struct Const32Metadata { + // instructionLength needs to go first because we encode small + // i32 as just instructionLength with the value embedded in bytecode. InstructionLengthMetadata instructionLength; + uint32_t value; }; -struct MemoryInitMetadata { - uint8_t memoryIndex; - uint32_t dataIndex; // 4B for data index +struct Const64Metadata { + uint64_t value; InstructionLengthMetadata instructionLength; }; -struct MemoryFillMetadata { - uint8_t memoryIndex; +struct Const128Metadata { + v128_t value; InstructionLengthMetadata instructionLength; }; -struct MemoryCopyMetadata { - uint8_t dstMemoryIndex; - uint8_t srcMemoryIndex; - InstructionLengthMetadata instructionLength; -}; - -struct AtomicMemoryAccessMetadata { - uint8_t memoryIndex; - uint64_t offset; - InstructionLengthMetadata instructionLength; -}; - -struct MemorySizeMetadata { - uint8_t memoryIndex; -}; - -struct MemoryGrowMetadata { - uint8_t memoryIndex; -}; - struct TableInitMetadata { uint32_t elementIndex; // 4B for index of element uint32_t tableIndex; // 4B for index of table diff --git a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp index 5846fc50ea0a..ca23c8915e62 100644 --- a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp +++ b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.cpp @@ -63,33 +63,6 @@ namespace JSC { namespace IPInt { return encodeResult(first, second); \ } while (false) -static constexpr size_t ipintCalleeSaveSpaceStackAligned = WTF::roundUpToMultipleOf((Wasm::numberOfIPIntCalleeSaveRegisters + Wasm::numberOfIPIntInternalRegisters) * sizeof(Register)); -static constexpr size_t ipintLocalsBaseOffset = ipintCalleeSaveSpaceStackAligned + IPInt::LOCAL_SIZE; - -IPIntLocal* FrameAccess::localBase() -{ - // Points to local[0], matching assembly's CFR - IPIntLocalsBaseOffset. - return reinterpret_cast(reinterpret_cast(m_callFrame) - ipintLocalsBaseOffset); -} - -IPIntLocal* FrameAccess::localSlot(unsigned index) -{ - return &localBase()[-static_cast(index)]; -} - -IPIntLocal* FrameAccess::rethrowSlot(unsigned index) -{ - return &localBase()[-static_cast(m_callee->localSizeToAlloc() + index)]; -} - -IPIntStackEntry* FrameAccess::stackEnd() -{ - // CFR - calleeSaveSpace - (localSizeToAlloc + rethrowSlots) * LocalSize - return reinterpret_cast( - reinterpret_cast(m_callFrame) - ipintCalleeSaveSpaceStackAligned - - (m_callee->localSizeToAlloc() + m_callee->rethrowSlots()) * IPInt::LOCAL_SIZE); -} - #define WASM_CALL_RETURN(targetInstance, callTarget) do { \ static_assert(callTarget.getTag() == WasmEntryPtrTag); \ callTarget.validate(); \ @@ -259,7 +232,7 @@ WASM_IPINT_EXTERN_CPP_DECL(prologue_osr, CallFrame* callFrame) } // This needs to be kept in sync with BBQJIT::makeStackMap. -static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopOSR(Wasm::IPIntCallee* ipintCallee, Wasm::BBQCallee* bbqCallee, JSWebAssemblyInstance* instance, const Wasm::IPIntTierUpCounter::OSREntryData& osrEntryData, CallFrame* callFrame, IPIntStackEntry* sp) +static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopOSR(Wasm::IPIntCallee* ipintCallee, Wasm::BBQCallee* bbqCallee, JSWebAssemblyInstance* instance, const Wasm::IPIntTierUpCounter::OSREntryData& osrEntryData, IPIntLocal* pl) { ASSERT(bbqCallee->compilationMode() == Wasm::CompilationMode::BBQMode); size_t osrEntryScratchBufferSize = bbqCallee->osrEntryScratchBufferSize(); @@ -270,8 +243,8 @@ static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopO if (!buffer) return nullptr; auto* currentEntry = buffer; - auto copyValueToBuffer = [&](const auto& entry) ALWAYS_INLINE_LAMBDA { - *std::bit_cast(currentEntry++) = entry.v128; + auto copyValueToBuffer = [&](const IPIntLocal& local) ALWAYS_INLINE_LAMBDA { + *std::bit_cast(currentEntry++) = local.v128; }; // The loop index isn't really an IPIntLocal value, but it occupies the first slot of the OSR scratch buffer @@ -280,14 +253,13 @@ static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopO loopIndexLocal.v128.u64x2[1] = 0; copyValueToBuffer(loopIndexLocal); - FrameAccess frame(callFrame, ipintCallee); for (uint32_t i = 0; i < ipintCallee->numLocals(); ++i) - copyValueToBuffer(*frame.localSlot(i)); + copyValueToBuffer(pl[i]); if (ipintCallee->rethrowSlots()) { ASSERT(osrEntryData.tryDepth <= ipintCallee->rethrowSlots()); for (uint32_t i = 0; i < osrEntryData.tryDepth; ++i) - copyValueToBuffer(*frame.rethrowSlot(i)); + copyValueToBuffer(pl[ipintCallee->localSizeToAlloc() + i]); } else { // If there's no rethrow slots just 0 fill the buffer. IPIntLocal zeroValue = { }; @@ -296,15 +268,15 @@ static ALWAYS_INLINE Wasm::Context::ScratchBufferEntry* buildEntryBufferForLoopO copyValueToBuffer(zeroValue); } - auto stackSlots = std::span { sp, sp + osrEntryData.numberOfStackValues }; - for (auto& value : stackSlots | std::views::reverse) - copyValueToBuffer(value); - + for (uint32_t i = 0; i < osrEntryData.numberOfStackValues; ++i) { + pl -= 1; + copyValueToBuffer(*pl); + } return buffer; } -WASM_IPINT_EXTERN_CPP_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntStackEntry* sp) +WASM_IPINT_EXTERN_CPP_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntLocal* pl) { Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame); Wasm::IPIntTierUpCounter& tierUpCounter = callee->tierUpCounter(); @@ -336,13 +308,13 @@ WASM_IPINT_EXTERN_CPP_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntSta // The BBQ frame may use more stack than the IPInt frame. If there's not enough stack space, // skip OSR and continue executing in IPInt. if (bbqCallee->stackCheckSize() != Wasm::stackCheckNotNeeded) { - auto stackAtOSREntry = reinterpret_cast(sp); + auto stackAtOSREntry = reinterpret_cast(pl - osrEntryData.numberOfStackValues); auto candidateNewStackPointer = reinterpret_cast(stackAtOSREntry - bbqCallee->stackCheckSize()); if (candidateNewStackPointer < instance->softStackLimit()) [[unlikely]] WASM_RETURN_TWO(nullptr, nullptr); } - auto* buffer = buildEntryBufferForLoopOSR(callee, bbqCallee, instance, osrEntryData, callFrame, sp); + auto* buffer = buildEntryBufferForLoopOSR(callee, bbqCallee, instance, osrEntryData, pl); if (!buffer) WASM_RETURN_TWO(nullptr, nullptr); @@ -399,7 +371,7 @@ static void NODELETE copyExceptionPayloadToStack(const Wasm::FunctionSignature& ASSERT(!payloadIndex); } -WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer) +WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer, IPIntLocal* pl) { VM& vm = instance->vm(); auto throwScope = DECLARE_THROW_SCOPE(vm); @@ -408,8 +380,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, I Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame); if (callee->rethrowSlots()) { RELEASE_ASSERT(vm.targetTryDepthForThrow <= callee->rethrowSlots()); - FrameAccess frame(callFrame, callee); - frame.rethrowSlot(vm.targetTryDepthForThrow - 1)->i64 = std::bit_cast(throwScope.exception()->value()); + pl[callee->localSizeToAlloc() + vm.targetTryDepthForThrow - 1].i64 = std::bit_cast(throwScope.exception()->value()); } if (stackPointer) { @@ -427,7 +398,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_and_clear_exception, CallFrame* callFrame, I WASM_RETURN_TWO(nullptr, nullptr); } -WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer) +WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFrame, IPIntStackEntry* stackPointer, IPIntLocal* pl) { VM& vm = instance->vm(); auto throwScope = DECLARE_THROW_SCOPE(vm); @@ -436,8 +407,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFra Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame); if (callee->rethrowSlots()) { RELEASE_ASSERT(vm.targetTryDepthForThrow <= callee->rethrowSlots()); - FrameAccess frame(callFrame, callee); - frame.rethrowSlot(vm.targetTryDepthForThrow - 1)->i64 = std::bit_cast(throwScope.exception()->value()); + pl[callee->localSizeToAlloc() + vm.targetTryDepthForThrow - 1].i64 = std::bit_cast(throwScope.exception()->value()); } Exception* exception = throwScope.exception(); @@ -451,7 +421,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception, CallFrame* callFra WASM_RETURN_TWO(nullptr, nullptr); } -WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame* callFrame, IPIntStackEntry* stackPointer) +WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame* callFrame, IPIntStackEntry* stackPointer, IPIntLocal* pl) { VM& vm = instance->vm(); auto throwScope = DECLARE_THROW_SCOPE(vm); @@ -460,8 +430,7 @@ WASM_IPINT_EXTERN_CPP_DECL(retrieve_clear_and_push_exception_and_arguments, Call Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame); if (callee->rethrowSlots()) { RELEASE_ASSERT(vm.targetTryDepthForThrow <= callee->rethrowSlots()); - FrameAccess frame(callFrame, callee); - frame.rethrowSlot(vm.targetTryDepthForThrow - 1)->i64 = std::bit_cast(throwScope.exception()->value()); + pl[callee->localSizeToAlloc() + vm.targetTryDepthForThrow - 1].i64 = std::bit_cast(throwScope.exception()->value()); } Exception* exception = throwScope.exception(); @@ -506,7 +475,7 @@ WASM_IPINT_EXTERN_CPP_DECL(throw_exception, CallFrame* callFrame, IPIntStackEntr WASM_RETURN_TWO(vm.targetMachinePCForThrow, nullptr); } -WASM_IPINT_EXTERN_CPP_DECL(rethrow_exception, CallFrame* callFrame, unsigned tryDepth) +WASM_IPINT_EXTERN_CPP_DECL(rethrow_exception, CallFrame* callFrame, IPIntStackEntry* pl, unsigned tryDepth) { SlowPathFrameTracer tracer(instance->vm(), callFrame); @@ -516,11 +485,10 @@ WASM_IPINT_EXTERN_CPP_DECL(rethrow_exception, CallFrame* callFrame, unsigned try Wasm::IPIntCallee* callee = IPINT_CALLEE(callFrame); RELEASE_ASSERT(tryDepth <= callee->rethrowSlots()); - FrameAccess frame(callFrame, callee); #if CPU(ADDRESS64) - JSWebAssemblyException* exception = std::bit_cast(frame.rethrowSlot(tryDepth - 1)->i64); + JSWebAssemblyException* exception = std::bit_cast(pl[callee->localSizeToAlloc() + tryDepth - 1].i64); #else - JSWebAssemblyException* exception = std::bit_cast(frame.rethrowSlot(tryDepth - 1)->i32); + JSWebAssemblyException* exception = std::bit_cast(pl[callee->localSizeToAlloc() + tryDepth - 1].i32); #endif RELEASE_ASSERT(exception); throwException(globalObject, throwScope, exception); @@ -1068,13 +1036,11 @@ WASM_IPINT_EXTERN_CPP_DECL(prepare_function_body, CallFrame* callFrame) /** * Given a function index, determine the pointer to its executable code. - * Return a pair of the target wasm instance and the code pointer (via WASM_CALL_RETURN). - * For wasm imports, returns the target instance and the real entrypoint (bypassing the - * wasm_to_wasm wrapper). For JS imports, returns the caller instance and the import stub. + * Return a pair of the wasm instance pointer received as the first argument and the code pointer. * Additionally, store the following into the 'calleeAndWasmInstanceReturn': * * - calleeAndWasmInstanceReturn[0] - the callee to use, goes into the 'callee' slot of the CallFrame. - * - calleeAndWasmInstanceReturn[1] - the wasm instance to use, goes into the 'codeBlock' slot of the CallFrame. For JS this is reused for the function info. + * - calleeAndWasmInstanceReturn[1] - the wasm instance to use, goes into the 'codeBlock' slot of the CallFrame. */ WASM_IPINT_EXTERN_CPP_DECL(prepare_call, CallFrame* callFrame, CallMetadata* call, Register* calleeAndWasmInstanceReturn) { @@ -1088,18 +1054,16 @@ WASM_IPINT_EXTERN_CPP_DECL(prepare_call, CallFrame* callFrame, CallMetadata* cal Register& calleeReturn = calleeAndWasmInstanceReturn[0]; Register& wasmInstanceReturn = calleeAndWasmInstanceReturn[1]; CodePtr codePtr; - JSWebAssemblyInstance* targetInstance = instance; + bool isJSCallee = false; if (functionIndex < importFunctionCount) { auto* functionInfo = instance->importFunctionInfo(functionIndex); + codePtr = functionInfo->importFunctionStub; calleeReturn = functionInfo->boxedCallee.encodedBits(); if (functionInfo->isJS()) { - codePtr = functionInfo->importFunctionStub; + isJSCallee = true; wasmInstanceReturn = reinterpret_cast(functionInfo); - } else { - codePtr = *functionInfo->entrypointLoadLocation; - targetInstance = functionInfo->targetInstance.get(); - wasmInstanceReturn = targetInstance; - } + } else + wasmInstanceReturn = functionInfo->targetInstance.get(); } else { // Target is a wasm function within the same instance codePtr = *instance->calleeGroup()->entrypointLoadLocationFromFunctionIndexSpace(functionIndex); @@ -1108,15 +1072,14 @@ WASM_IPINT_EXTERN_CPP_DECL(prepare_call, CallFrame* callFrame, CallMetadata* cal wasmInstanceReturn = instance; } + JSWebAssemblyInstance* targetInstance = isJSCallee ? nullptr : jsDynamicCast(wasmInstanceReturn.unboxedCell()); IPINT_HANDLE_STEP_INTO_CALL(instance->vm(), CalleeBits(calleeReturn.encodedJSValue()), targetInstance); RELEASE_ASSERT(WTF::isTaggedWith(codePtr)); - WASM_CALL_RETURN(targetInstance, codePtr); + WASM_CALL_RETURN(instance, codePtr); } -// Returns the same outputs as prepare_call: entrypoint and target instance -// via result registers, callee and function-info/instance via the stack slots. WASM_IPINT_EXTERN_CPP_DECL(prepare_call_indirect, CallFrame* callFrame, Wasm::FunctionSpaceIndex* functionIndex, CallIndirectMetadata* call) { auto* callee = IPINT_CALLEE(callFrame); @@ -1345,7 +1308,7 @@ WASM_IPINT_EXTERN_CPP_DECL(check_stack_and_vm_traps, void* candidateNewStackPoin } #if ENABLE(WEBASSEMBLY_DEBUGGER) -static UNUSED_FUNCTION void displayWasmDebugState(JSWebAssemblyInstance* instance, Wasm::IPIntCallee* callee, CallFrame* callFrame, IPIntStackEntry* sp) +static UNUSED_FUNCTION void displayWasmDebugState(JSWebAssemblyInstance* instance, Wasm::IPIntCallee* callee, IPIntStackEntry* sp, IPIntLocal* pl) { dataLogLn("=== WASM Debug State ==="); @@ -1354,14 +1317,12 @@ static UNUSED_FUNCTION void displayWasmDebugState(JSWebAssemblyInstance* instanc auto functionIndex = callee->functionIndex(); const auto& moduleInfo = instance->module().moduleInformation(); const Vector& localTypes = moduleInfo.debugInfo->ensureFunctionDebugInfo(functionIndex).locals; - FrameAccess frame(callFrame, callee); for (uint32_t i = 0; i < numLocals; ++i) - logWasmLocalValue(i, *frame.localSlot(i), localTypes[i]); + logWasmLocalValue(i, pl[i], localTypes[i]); - auto* stackEnd = frame.stackEnd(); - if (sp && std::bit_cast(sp) <= std::bit_cast(stackEnd)) { - constexpr size_t STACK_ENTRY_SIZE = 16; - size_t stackDepth = (reinterpret_cast(stackEnd) - reinterpret_cast(sp)) / STACK_ENTRY_SIZE; + constexpr size_t STACK_ENTRY_SIZE = 16; + if (sp && pl && std::bit_cast(sp) <= std::bit_cast(pl)) { + size_t stackDepth = (reinterpret_cast(pl) - reinterpret_cast(sp)) / STACK_ENTRY_SIZE; dataLogLn("WASM Stack (", stackDepth, " entries - showing all type interpretations):"); IPIntStackEntry* currentEntry = sp; @@ -1387,12 +1348,13 @@ WASM_IPINT_EXTERN_CPP_DECL(handle_debugger_trap_if_needed, CallFrame* callFrame, if (debugServer.hasDebugger()) { uint8_t* pc = static_cast(sp[2].pointer()); uint8_t* mc = static_cast(sp[3].pointer()); + IPIntLocal* pl = static_cast(sp[0].pointer()); auto* callee = static_cast(sp[1].pointer()); auto* stack = std::bit_cast(sp + 4); auto exceptionType = static_cast(callFrame->argumentCountIncludingThis()); if (Options::verboseWasmDebugger() && exceptionType == Wasm::ExceptionType::Unreachable) - displayWasmDebugState(instance, callee, callFrame, stack); - auto trapStatus = debugServer.execution().handleDebuggerTrapIfNeeded(callFrame, instance, callee, pc, mc, stack, exceptionType); + displayWasmDebugState(instance, callee, stack, pl); + auto trapStatus = debugServer.execution().handleDebuggerTrapIfNeeded(callFrame, instance, callee, pc, mc, pl, stack, exceptionType); shouldThrow = trapStatus == Wasm::DebuggerTrapStatus::NotResolvedByDebugger; } } diff --git a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h index 41a9aedbc86c..a4d9c9f6cb90 100644 --- a/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h +++ b/Source/JavaScriptCore/wasm/WasmIPIntSlowPaths.h @@ -70,15 +70,15 @@ static constexpr uintptr_t SlowPathExceptionTag = JSValue::InvalidTag; #if ENABLE(WEBASSEMBLY_BBQJIT) WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(prologue_osr, CallFrame* callFrame); -WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntStackEntry* sp); +WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(loop_osr, CallFrame* callFrame, uint8_t* pc, IPIntLocal* pl); WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(epilogue_osr, CallFrame* callFrame); #endif -WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_and_clear_exception, CallFrame*, IPIntStackEntry* stack); -WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception, CallFrame*, IPIntStackEntry* stack); -WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame*, IPIntStackEntry* stack); +WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_and_clear_exception, CallFrame*, IPIntStackEntry* stack, IPIntLocal* pl); +WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception, CallFrame*, IPIntStackEntry* stack, IPIntLocal* pl); +WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(retrieve_clear_and_push_exception_and_arguments, CallFrame*, IPIntStackEntry* stack, IPIntLocal* pl); WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(throw_exception, CallFrame*, IPIntStackEntry* arguments, unsigned exceptionIndex); -WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(rethrow_exception, CallFrame*, unsigned tryDepth); +WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(rethrow_exception, CallFrame*, IPIntStackEntry* pl, unsigned tryDepth); WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(throw_ref, CallFrame* callFrame, EncodedJSValue exnref); WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(ref_func, unsigned index); @@ -143,29 +143,6 @@ WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(memory_atomic_notify, IPIntStackEntry*); WASM_IPINT_EXTERN_CPP_HIDDEN_DECL(check_stack_and_vm_traps, void* candidateNewStackPointer, Wasm::IPIntCallee*, CallFrame*); WASM_IPINT_EXTERN_CPP_DECL(handle_debugger_trap_if_needed, CallFrame*, Register*); - -class FrameAccess { -public: - FrameAccess(CallFrame* callFrame, const Wasm::IPIntCallee* callee) - : m_callFrame(callFrame) - , m_callee(callee) - { - } - - IPIntLocal* localSlot(unsigned); - IPIntLocal* rethrowSlot(unsigned); - // Past-the-end pointer for the expression stack area (= bottom of rethrow/locals area). - IPIntStackEntry* stackEnd(); - -private: - // Returns pointer to local[0], matching assembly's CFR - IPIntLocalsBaseOffset. - // local[i] = localBase()[-i], rethrow[i] = localBase()[-(localSizeToAlloc + i)]. - IPIntLocal* localBase(); - - CallFrame* m_callFrame; - SUPPRESS_UNCOUNTED_MEMBER const Wasm::IPIntCallee* m_callee; -}; - } } // namespace JSC::IPInt #endif diff --git a/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp b/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp index 832f970bad38..0f4a6a3a096a 100644 --- a/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp +++ b/Source/JavaScriptCore/wasm/WasmOMGIRGenerator.cpp @@ -756,13 +756,6 @@ class OMGIRGenerator { // Saturated truncation. [[nodiscard]] PartialResult truncSaturated(Ext1OpType, ExpressionType operand, ExpressionType& result, Type returnType, Type operandType); - // Wide arithmetic. - [[nodiscard]] PartialResult addI64Add128(ExpressionType lhsLo, ExpressionType lhsHi, ExpressionType rhsLo, ExpressionType rhsHi, ExpressionType& resultLo, ExpressionType& resultHi); - [[nodiscard]] PartialResult addI64Sub128(ExpressionType lhsLo, ExpressionType lhsHi, ExpressionType rhsLo, ExpressionType rhsHi, ExpressionType& resultLo, ExpressionType& resultHi); - [[nodiscard]] PartialResult addI64MulWideS(ExpressionType lhs, ExpressionType rhs, ExpressionType& resultLo, ExpressionType& resultHi); - [[nodiscard]] PartialResult addI64MulWideU(ExpressionType lhs, ExpressionType rhs, ExpressionType& resultLo, ExpressionType& resultHi); - B3::Type int64PairTupleType(); - // GC [[nodiscard]] PartialResult addRefI31(ExpressionType value, ExpressionType& result); [[nodiscard]] PartialResult addI31GetS(TypedExpression ref, ExpressionType& result); @@ -1125,7 +1118,6 @@ class OMGIRGenerator { unsigned* m_osrEntryScratchBufferSize; UncheckedKeyHashMap m_constantPool; UncheckedKeyHashMap m_tupleMap; - B3::Type m_int64PairTupleType { }; InsertionSet m_constantInsertionValues; Value* m_framePointer { nullptr }; bool m_makesCalls { false }; @@ -1560,13 +1552,6 @@ B3::Type OMGIRGenerator::toB3ResultType(const TypeDefinition* returnType) return result.iterator->value; } -B3::Type OMGIRGenerator::int64PairTupleType() -{ - if (m_int64PairTupleType == B3::Void) - m_int64PairTupleType = m_proc.addTuple({ B3::Int64, B3::Int64 }); - return m_int64PairTupleType; -} - auto OMGIRGenerator::addLocal(Type type, uint32_t count) -> PartialResult { size_t newSize = m_locals.size() + count; @@ -3401,140 +3386,6 @@ auto OMGIRGenerator::truncSaturated(Ext1OpType op, ExpressionType argVar, Expres return { }; } -// Wide arithmetic - -auto OMGIRGenerator::addI64Add128(ExpressionType lhsLoVar, ExpressionType lhsHiVar, ExpressionType rhsLoVar, ExpressionType rhsHiVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult -{ - Value* lhsLo = get(lhsLoVar); - Value* lhsHi = get(lhsHiVar); - Value* rhsLo = get(rhsLoVar); - Value* rhsHi = get(rhsHiVar); - - B3::Type tupleType = int64PairTupleType(); - PatchpointValue* patchpoint = m_currentBlock->appendNew(m_proc, tupleType, origin()); - patchpoint->append(lhsLo, ValueRep::SomeRegister); - patchpoint->append(lhsHi, ValueRep::SomeRegister); - patchpoint->append(rhsLo, ValueRep::SomeRegister); - patchpoint->append(rhsHi, ValueRep::SomeRegister); - patchpoint->resultConstraints = { ValueRep::SomeEarlyRegister, isX86() ? ValueRep::SomeEarlyRegister : ValueRep::SomeRegister }; - patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) { - GPRReg resLo = params[0].gpr(); - GPRReg resHi = params[1].gpr(); - GPRReg aLo = params[2].gpr(); - GPRReg aHi = params[3].gpr(); - GPRReg bLo = params[4].gpr(); - GPRReg bHi = params[5].gpr(); -#if CPU(ARM64) - jit.add64AndSetFlags(aLo, bLo, resLo); - jit.addCarry64(aHi, bHi, resHi); -#elif CPU(X86_64) - jit.move(aLo, resLo); - jit.add64(bLo, resLo); - jit.move(aHi, resHi); - jit.addCarry64(bHi, resHi); -#endif - }); - patchpoint->effects = Effects::none(); - - resultLo = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 0)); - resultHi = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 1)); - return { }; -} - -auto OMGIRGenerator::addI64Sub128(ExpressionType lhsLoVar, ExpressionType lhsHiVar, ExpressionType rhsLoVar, ExpressionType rhsHiVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult -{ - Value* lhsLo = get(lhsLoVar); - Value* lhsHi = get(lhsHiVar); - Value* rhsLo = get(rhsLoVar); - Value* rhsHi = get(rhsHiVar); - - B3::Type tupleType = int64PairTupleType(); - PatchpointValue* patchpoint = m_currentBlock->appendNew(m_proc, tupleType, origin()); - patchpoint->append(lhsLo, ValueRep::SomeRegister); - patchpoint->append(lhsHi, ValueRep::SomeRegister); - patchpoint->append(rhsLo, ValueRep::SomeRegister); - patchpoint->append(rhsHi, ValueRep::SomeRegister); - patchpoint->resultConstraints = { ValueRep::SomeEarlyRegister, isX86() ? ValueRep::SomeEarlyRegister : ValueRep::SomeRegister }; - patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) { - GPRReg resLo = params[0].gpr(); - GPRReg resHi = params[1].gpr(); - GPRReg aLo = params[2].gpr(); - GPRReg aHi = params[3].gpr(); - GPRReg bLo = params[4].gpr(); - GPRReg bHi = params[5].gpr(); -#if CPU(ARM64) - jit.sub64AndSetFlags(aLo, bLo, resLo); - jit.subBorrow64(aHi, bHi, resHi); -#elif CPU(X86_64) - jit.move(aLo, resLo); - jit.sub64(bLo, resLo); - jit.move(aHi, resHi); - jit.subBorrow64(bHi, resHi); -#endif - }); - patchpoint->effects = Effects::none(); - - resultLo = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 0)); - resultHi = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 1)); - return { }; -} - -auto OMGIRGenerator::addI64MulWideU(ExpressionType lhsVar, ExpressionType rhsVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult -{ - Value* lhs = get(lhsVar); - Value* rhs = get(rhsVar); - -#if CPU(ARM64) - resultLo = push(m_currentBlock->appendNew(m_proc, Mul, origin(), lhs, rhs)); - resultHi = push(m_currentBlock->appendNew(m_proc, UMulHigh, origin(), lhs, rhs)); - -#elif CPU(X86_64) - // FIXME: We should get B3 on X86 to lower this to one instruction without a patchpoint. - B3::Type tupleType = int64PairTupleType(); - PatchpointValue* patchpoint = m_currentBlock->appendNew(m_proc, tupleType, origin()); - patchpoint->append(lhs, ValueRep::reg(X86Registers::eax)); - patchpoint->append(rhs, ValueRep::SomeRegister); - patchpoint->resultConstraints = { ValueRep::reg(X86Registers::eax), ValueRep::reg(X86Registers::edx) }; - patchpoint->effects = Effects::none(); - patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) { - jit.x86UMulHigh64(params[3].gpr(), params[0].gpr(), params[1].gpr()); - }); - - resultLo = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 0)); - resultHi = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 1)); -#endif - - return { }; -} - -auto OMGIRGenerator::addI64MulWideS(ExpressionType lhsVar, ExpressionType rhsVar, ExpressionType& resultLo, ExpressionType& resultHi) -> PartialResult -{ - Value* lhs = get(lhsVar); - Value* rhs = get(rhsVar); - -#if CPU(ARM64) - resultLo = push(m_currentBlock->appendNew(m_proc, Mul, origin(), lhs, rhs)); - resultHi = push(m_currentBlock->appendNew(m_proc, MulHigh, origin(), lhs, rhs)); - -#elif CPU(X86_64) - // FIXME: We should get B3 on X86 to lower this to one instruction without a patchpoint. - B3::Type tupleType = int64PairTupleType(); - PatchpointValue* patchpoint = m_currentBlock->appendNew(m_proc, tupleType, origin()); - patchpoint->append(lhs, ValueRep::reg(X86Registers::eax)); - patchpoint->append(rhs, ValueRep::SomeRegister); - patchpoint->resultConstraints = { ValueRep::reg(X86Registers::eax), ValueRep::reg(X86Registers::edx) }; - patchpoint->effects = Effects::none(); - patchpoint->setGenerator([=](CCallHelpers& jit, const StackmapGenerationParams& params) { - jit.x86MulHigh64(params[3].gpr(), params[0].gpr(), params[1].gpr()); - }); - - resultLo = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 0)); - resultHi = push(m_currentBlock->appendNew(m_proc, origin(), B3::Int64, patchpoint, 1)); -#endif - - return { }; -} - auto OMGIRGenerator::addRefI31(ExpressionType value, ExpressionType& result) -> PartialResult { ASSERT(value.type() == Int32); diff --git a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp index 54c05bbe8fbf..07d8159e1772 100644 --- a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp +++ b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.cpp @@ -176,6 +176,15 @@ bool getWasmReturnPC(CallFrame* currentFrame, uint8_t*& returnPC, VirtualAddress return true; } +// This is the C++ equivalent of the "# Recompute PL" block in InPlaceInterpreter.asm. +IPInt::IPIntLocal* localsFromFrame(CallFrame* callFrame, const IPIntCallee* callee) +{ + // IPIntCalleeSaveSpaceStackAligned is defined in InPlaceInterpreter.asm. + static constexpr size_t ipintCalleeSaveSpaceStackAligned = WTF::roundUpToMultipleOf((Wasm::numberOfIPIntCalleeSaveRegisters + Wasm::numberOfIPIntInternalRegisters) * sizeof(Register)); + size_t localsAndRethrowSize = (callee->localSizeToAlloc() + callee->rethrowSlots()) * IPInt::LOCAL_SIZE; + auto pl = reinterpret_cast(callFrame) - ipintCalleeSaveSpaceStackAligned - localsAndRethrowSize; + return reinterpret_cast(pl); +} // Walk the full CallFrame chain from a WASM breakpoint, collecting virtual addresses for // every WASM and JS frame. The result is consumed by qWasmCallStack to give LLDB a @@ -338,11 +347,12 @@ StopData::StopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFra { } -StopData::StopData(VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame) +StopData::StopData(VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame) : address(address) , originalBytecode(originalBytecode) , pc(pc) , mc(mc) + , locals(locals) , stack(stack) , callee(callee) , instance(instance) @@ -350,8 +360,8 @@ StopData::StopData(VirtualAddress address, uint8_t originalBytecode, uint8_t* pc { } -StopData::StopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType type) - : StopData(VirtualAddress::toVirtual(instance, callee->functionIndex(), pc), 0, pc, mc, stack, callee, instance, callFrame) +StopData::StopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType type) + : StopData(VirtualAddress::toVirtual(instance, callee->functionIndex(), pc), 0, pc, mc, locals, stack, callee, instance, callFrame) { wasmTrapType = type; } @@ -364,6 +374,7 @@ void StopData::dump(PrintStream& out) const out.print(", originalBytecode:", originalBytecode); out.print(", pc:", RawPointer(pc)); out.print(", mc:", RawPointer(mc)); + out.print(", locals:", RawPointer(locals)); out.print(", stack:", RawPointer(stack)); out.print(", callee:", RawPointer(callee.get())); out.print(", instance:", RawPointer(instance)); diff --git a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h index 6a001af51050..e156c5d96189 100644 --- a/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h +++ b/Source/JavaScriptCore/wasm/debugger/WasmDebugServerUtilities.h @@ -137,11 +137,11 @@ struct Breakpoint { struct StopData { WTF_MAKE_STRUCT_TZONE_ALLOCATED(StopData); - StopData(VirtualAddress, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry*, IPIntCallee*, JSWebAssemblyInstance*, CallFrame*); + StopData(VirtualAddress, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal*, IPInt::IPIntStackEntry*, IPIntCallee*, JSWebAssemblyInstance*, CallFrame*); StopData(IPIntCallee*, JSWebAssemblyInstance*, CallFrame*); // Prologue: no pc/mc - StopData(IPIntCallee*, JSWebAssemblyInstance*, CallFrame*, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry*, Wasm::ExceptionType); // Trap + StopData(IPIntCallee*, JSWebAssemblyInstance*, CallFrame*, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal*, IPInt::IPIntStackEntry*, Wasm::ExceptionType); // Trap ~StopData(); @@ -151,6 +151,7 @@ struct StopData { uint8_t originalBytecode { 0 }; uint8_t* pc { nullptr }; uint8_t* mc { nullptr }; + IPInt::IPIntLocal* locals { nullptr }; IPInt::IPIntStackEntry* stack { nullptr }; RefPtr callee; JSWebAssemblyInstance* instance { nullptr }; @@ -181,7 +182,7 @@ struct DebugState { stopData = makeUnique(callee, instance, callFrame); } - void setBreakpointStopData(Breakpoint::Type type, VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame) + void setBreakpointStopData(Breakpoint::Type type, VirtualAddress address, uint8_t originalBytecode, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame) { switch (type) { case Breakpoint::Type::Step: @@ -191,13 +192,13 @@ struct DebugState { stopReason = Reason::Breakpoint; break; } - stopData = makeUnique(address, originalBytecode, pc, mc, stack, callee, instance, callFrame); + stopData = makeUnique(address, originalBytecode, pc, mc, locals, stack, callee, instance, callFrame); } - void setTrapStopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType wasmTrapType) + void setTrapStopData(IPIntCallee* callee, JSWebAssemblyInstance* instance, CallFrame* callFrame, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType wasmTrapType) { stopReason = Reason::WasmTrap; - stopData = makeUnique(callee, instance, callFrame, pc, mc, stack, wasmTrapType); + stopData = makeUnique(callee, instance, callFrame, pc, mc, locals, stack, wasmTrapType); } // WHERE-based helpers — determined by stopData presence and pc: @@ -296,6 +297,7 @@ struct FrameInfo { Vector collectCallStack(VirtualAddress stopAddress, CallFrame* startFrame, VM&, unsigned maxFrames = 100); +IPInt::IPIntLocal* localsFromFrame(CallFrame*, const IPIntCallee*); inline StringView getErrorReply(ProtocolError error) { diff --git a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp index bd1125eab0f1..c2ba90fa761d 100644 --- a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp +++ b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.cpp @@ -134,13 +134,13 @@ void ExecutionHandler::stopTheWorld(VM& debuggee, StopTheWorldEvent event) VMManager::singleton().notifyVMStop(debuggee, event); } -DebuggerTrapStatus ExecutionHandler::handleDebuggerTrapIfNeeded(CallFrame* callFrame, JSWebAssemblyInstance* instance, IPIntCallee* callee, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType exceptionType) +DebuggerTrapStatus ExecutionHandler::handleDebuggerTrapIfNeeded(CallFrame* callFrame, JSWebAssemblyInstance* instance, IPIntCallee* callee, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal* locals, IPInt::IPIntStackEntry* stack, Wasm::ExceptionType exceptionType) { VM& debuggee = instance->vm(); if (exceptionType == Wasm::ExceptionType::Unreachable && hasBreakpoints()) { VirtualAddress address = VirtualAddress::toVirtual(instance, callee->functionIndex(), pc); if (auto* breakpoint = m_breakpointManager->findBreakpoint(address)) { - debuggee.debugState()->setBreakpointStopData(breakpoint->type, address, breakpoint->originalBytecode, pc, mc, stack, callee, instance, callFrame); + debuggee.debugState()->setBreakpointStopData(breakpoint->type, address, breakpoint->originalBytecode, pc, mc, locals, stack, callee, instance, callFrame); dataLogLnIf(Options::verboseWasmDebugger(), "[Code][handleDebuggerTrapIfNeeded] Breakpoint at ", *breakpoint, " with ", *debuggee.debugState()->stopData); stopTheWorld(debuggee, StopTheWorldEvent::WasmProgramStop); return DebuggerTrapStatus::ResolvedByDebugger; // Don't throw; resume execution at this breakpoint @@ -157,7 +157,7 @@ DebuggerTrapStatus ExecutionHandler::handleDebuggerTrapIfNeeded(CallFrame* callF debuggee.debugState()->stopReason = DebugState::Reason::WasmTrap; debuggee.debugState()->stopData->wasmTrapType = exceptionType; } else - debuggee.debugState()->setTrapStopData(callee, instance, callFrame, pc, mc, stack, exceptionType); + debuggee.debugState()->setTrapStopData(callee, instance, callFrame, pc, mc, locals, stack, exceptionType); dataLogLnIf(Options::verboseWasmDebugger(), "[Code][handleDebuggerTrapIfNeeded] Wasm trap at ", *debuggee.debugState()->stopData); stopTheWorld(debuggee, StopTheWorldEvent::WasmProgramStop); return DebuggerTrapStatus::NotResolvedByDebugger; // Throw; trap was reported, now propagate it @@ -541,7 +541,8 @@ void ExecutionHandler::setStepIntoBreakpointForCall(VM& callerVM, CalleeBits box dataLogLnIf(Options::verboseWasmDebugger(), "[Code][StepIntoEvent] Start for call"); RELEASE_ASSERT(m_debuggerState == DebuggerState::StepRequested); - RELEASE_ASSERT(calleeInstance); + if (!calleeInstance) + return; if (!boxedCallee.isNativeCallee()) return; RefPtr wasmCallee = downcast(boxedCallee.asNativeCallee()); diff --git a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h index cd5e8443cb35..46e0c097793b 100644 --- a/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h +++ b/Source/JavaScriptCore/wasm/debugger/WasmExecutionHandler.h @@ -78,7 +78,7 @@ class ExecutionHandler { ResumeMode stopCode(Locker&, StopTheWorldEvent) WTF_REQUIRES_LOCK(m_lock); - DebuggerTrapStatus handleDebuggerTrapIfNeeded(CallFrame*, JSWebAssemblyInstance*, IPIntCallee*, uint8_t* pc, uint8_t* mc, IPInt::IPIntStackEntry*, Wasm::ExceptionType); + DebuggerTrapStatus handleDebuggerTrapIfNeeded(CallFrame*, JSWebAssemblyInstance*, IPIntCallee*, uint8_t* pc, uint8_t* mc, IPInt::IPIntLocal*, IPInt::IPIntStackEntry*, Wasm::ExceptionType); JS_EXPORT_PRIVATE void resume(); JS_EXPORT_PRIVATE void step(); diff --git a/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp b/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp index 0bde9d27d764..a49490f741b0 100644 --- a/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp +++ b/Source/JavaScriptCore/wasm/debugger/WasmQueryHandler.cpp @@ -366,12 +366,12 @@ void QueryHandler::handleWasmLocal(StringView packet) } auto& stopData = *state->stopData; - CallFrame* localCallFrame = nullptr; + IPInt::IPIntLocal* locals = nullptr; RefPtr localCallee; JSWebAssemblyInstance* instance = nullptr; if (!frameIndex) { - localCallFrame = stopData.callFrame; + locals = stopData.locals; localCallee = stopData.callee; instance = stopData.instance; } else { @@ -381,7 +381,7 @@ void QueryHandler::handleWasmLocal(StringView packet) return; } const auto& frameInfo = frames[frameIndex]; - localCallFrame = frameInfo.wasmCallFrame; + locals = localsFromFrame(frameInfo.wasmCallFrame, frameInfo.wasmCallee.get()); localCallee = frameInfo.wasmCallee; instance = frameInfo.wasmCallFrame->wasmInstance(); } @@ -395,8 +395,7 @@ void QueryHandler::handleWasmLocal(StringView packet) return; } - IPInt::FrameAccess frame(localCallFrame, localCallee.get()); - IPInt::IPIntLocal& local = *frame.localSlot(localIndex); + IPInt::IPIntLocal& local = locals[localIndex]; Type localType = localTypes[localIndex]; logWasmLocalValue(localIndex, local, localType);