From 85f16f488d4a0047e40a885fdacda832d46815e8 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 4 Aug 2021 15:57:56 -0500 Subject: [PATCH] Consolidate address calculations for atomics (#3143) * Consolidate address calculations for atomics This commit consolidates all calcuations of guest addresses into one `prepare_addr` function. This notably remove the atomics-specifics paths as well as the `prepare_load` function (now renamed to `prepare_addr` and folded into `get_heap_addr`). The goal of this commit is to simplify how addresses are managed in the code generator for atomics to use all the shared infrastrucutre of other loads/stores as well. This additionally fixes #3132 via the use of `heap_addr` in clif for all operations. I also added a number of tests for loads/stores with varying alignments. Originally I was going to allow loads/stores to not be aligned since that's what the current formal specification says, but the overview of the threads proposal disagrees with the formal specification, so I figured I'd leave it as-is but adding tests probably doesn't hurt. Closes #3132 * Fix old backend * Guarantee misalignment checks happen before out-of-bounds --- cranelift/wasm/src/code_translator.rs | 289 +++++++----------- cranelift/wasm/src/environ/spec.rs | 8 + crates/environ/src/builtin.rs | 6 +- crates/runtime/src/libcalls.rs | 102 +++++-- tests/all/wast.rs | 5 + .../threads/load-store-alignment.wast | 126 ++++++++ 6 files changed, 328 insertions(+), 208 deletions(-) create mode 100644 tests/misc_testsuite/threads/load-store-alignment.wast diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index da53a5dc48bc..914b7beb0595 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -694,32 +694,32 @@ pub fn translate_operator( translate_load(memarg, ir::Opcode::Load, I8X16, builder, state, environ)?; } Operator::V128Load8x8S { memarg } => { - let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?; + let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; let loaded = builder.ins().sload8x8(flags, base, offset); state.push1(loaded); } Operator::V128Load8x8U { memarg } => { - let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?; + let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; let loaded = builder.ins().uload8x8(flags, base, offset); state.push1(loaded); } Operator::V128Load16x4S { memarg } => { - let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?; + let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; let loaded = builder.ins().sload16x4(flags, base, offset); state.push1(loaded); } Operator::V128Load16x4U { memarg } => { - let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?; + let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; let loaded = builder.ins().uload16x4(flags, base, offset); state.push1(loaded); } Operator::V128Load32x2S { memarg } => { - let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?; + let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; let loaded = builder.ins().sload32x2(flags, base, offset); state.push1(loaded); } Operator::V128Load32x2U { memarg } => { - let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?; + let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; let loaded = builder.ins().uload32x2(flags, base, offset); state.push1(loaded); } @@ -1064,8 +1064,8 @@ pub fn translate_operator( let heap = state.get_heap(builder.func, memarg.memory, environ)?; let timeout = state.pop1(); // 64 (fixed) let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`) - let addr = state.pop1(); // 32 (fixed) - let addr = fold_atomic_mem_addr(addr, memarg, implied_ty, builder); + let (_flags, addr) = + prepare_atomic_addr(memarg, implied_ty.bytes(), builder, state, environ)?; assert!(builder.func.dfg.value_type(expected) == implied_ty); // `fn translate_atomic_wait` can inspect the type of `expected` to figure out what // code it needs to generate, if it wants. @@ -1083,8 +1083,10 @@ pub fn translate_operator( let heap_index = MemoryIndex::from_u32(memarg.memory); let heap = state.get_heap(builder.func, memarg.memory, environ)?; let count = state.pop1(); // 32 (fixed) - let addr = state.pop1(); // 32 (fixed) - let addr = fold_atomic_mem_addr(addr, memarg, I32, builder); + + // `memory.atomic.notify` is defined to have an access size of 4 + // bytes in the spec, even though it doesn't necessarily access memory. + let (_flags, addr) = prepare_atomic_addr(memarg, 4, builder, state, environ)?; let res = environ.translate_atomic_notify(builder.cursor(), heap_index, heap, addr, count)?; state.push1(res); @@ -2146,23 +2148,29 @@ fn translate_unreachable_operator( Ok(()) } -/// Get the address+offset to use for a heap access. -fn get_heap_addr( - heap: ir::Heap, - addr32: ir::Value, - offset: u64, - width: u32, - addr_ty: Type, +/// This function is a generalized helper for validating that a wasm-supplied +/// heap address is in-bounds. +/// +/// This function takes a litany of parameters and requires that the address to +/// be verified is at the top of the stack in `state`. This will generate +/// necessary IR to validate that the heap address is correctly in-bounds, and +/// various parameters are returned describing the valid heap address if +/// execution reaches that point. +fn prepare_addr( + memarg: &MemoryImmediate, + access_size: u32, builder: &mut FunctionBuilder, -) -> (ir::Value, i32) { + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<(MemFlags, Value, Offset32)> { + let addr = state.pop1(); // This function will need updates for 64-bit memories - debug_assert_eq!(builder.func.dfg.value_type(addr32), I32); + debug_assert_eq!(builder.func.dfg.value_type(addr), I32); + let offset = u32::try_from(memarg.offset).unwrap(); + let heap = state.get_heap(builder.func, memarg.memory, environ)?; let offset_guard_size: u64 = builder.func.heaps[heap].offset_guard_size.into(); - // Currently this function only supports 32-bit memories. - let offset = u32::try_from(offset).unwrap(); - // How exactly the bounds check is performed here and what it's performed // on is a bit tricky. Generally we want to rely on access violations (e.g. // segfaults) to generate traps since that means we don't have to bounds @@ -2214,54 +2222,86 @@ fn get_heap_addr( // offsets we're checking here are zero. This means that we'll hit the fast // path and emit zero conditional traps for bounds checks let adjusted_offset = if offset_guard_size == 0 { - u64::from(offset) + u64::from(width) + u64::from(offset) + u64::from(access_size) } else { - assert!(width < 1024); + assert!(access_size < 1024); cmp::max(u64::from(offset) / offset_guard_size * offset_guard_size, 1) }; debug_assert!(adjusted_offset > 0); // want to bounds check at least 1 byte let check_size = u32::try_from(adjusted_offset).unwrap_or(u32::MAX); - let base = builder.ins().heap_addr(addr_ty, heap, addr32, check_size); + let base = builder + .ins() + .heap_addr(environ.pointer_type(), heap, addr, check_size); // Native load/store instructions take a signed `Offset32` immediate, so adjust the base // pointer if necessary. - if offset > i32::MAX as u32 { + let (addr, offset) = if offset > i32::MAX as u32 { // Offset doesn't fit in the load/store instruction. let adj = builder.ins().iadd_imm(base, i64::from(i32::MAX) + 1); (adj, (offset - (i32::MAX as u32 + 1)) as i32) } else { (base, offset as i32) - } + }; + + // Note that we don't set `is_aligned` here, even if the load instruction's + // alignment immediate may says it's aligned, because WebAssembly's + // immediate field is just a hint, while Cranelift's aligned flag needs a + // guarantee. WebAssembly memory accesses are always little-endian. + let mut flags = MemFlags::new(); + flags.set_endianness(ir::Endianness::Little); + + Ok((flags, addr, offset.into())) } -/// Prepare for a load; factors out common functionality between load and load_extend operations. -fn prepare_load( +fn prepare_atomic_addr( memarg: &MemoryImmediate, loaded_bytes: u32, builder: &mut FunctionBuilder, state: &mut FuncTranslationState, environ: &mut FE, -) -> WasmResult<(MemFlags, Value, Offset32)> { - let addr = state.pop1(); +) -> WasmResult<(MemFlags, Value)> { + // Atomic addresses must all be aligned correctly, and for now we check + // alignment before we check out-of-bounds-ness. The order of this check may + // need to be updated depending on the outcome of the official threads + // proposal itself. + // + // Note that with an offset>0 we generate an `iadd_imm` where the result is + // thrown away after the offset check. This may truncate the offset and the + // result may overflow as well, but those conditions won't affect the + // alignment check itself. This can probably be optimized better and we + // should do so in the future as well. + if loaded_bytes > 1 { + let addr = state.pop1(); // "peek" via pop then push + state.push1(addr); + let effective_addr = if memarg.offset == 0 { + addr + } else { + builder + .ins() + .iadd_imm(addr, i64::from(memarg.offset as i32)) + }; + debug_assert!(loaded_bytes.is_power_of_two()); + let misalignment = builder + .ins() + .band_imm(effective_addr, i64::from(loaded_bytes - 1)); + let f = builder.ins().ifcmp_imm(misalignment, 0); + builder + .ins() + .trapif(IntCC::NotEqual, f, ir::TrapCode::HeapMisaligned); + } - let heap = state.get_heap(builder.func, memarg.memory, environ)?; - let (base, offset) = get_heap_addr( - heap, - addr, - memarg.offset, - loaded_bytes, - environ.pointer_type(), - builder, - ); + let (flags, mut addr, offset) = prepare_addr(memarg, loaded_bytes, builder, state, environ)?; - // Note that we don't set `is_aligned` here, even if the load instruction's - // alignment immediate says it's aligned, because WebAssembly's immediate - // field is just a hint, while Cranelift's aligned flag needs a guarantee. - // WebAssembly memory accesses are always little-endian. - let mut flags = MemFlags::new(); - flags.set_endianness(ir::Endianness::Little); + // Currently cranelift IR operations for atomics don't have offsets + // associated with them so we fold the offset into the address itself. Note + // that via the `prepare_addr` helper we know that if execution reaches + // this point that this addition won't overflow. + let offset: i64 = offset.into(); + if offset != 0 { + addr = builder.ins().iadd_imm(addr, offset); + } - Ok((flags, base, offset.into())) + Ok((flags, addr)) } /// Translate a load instruction. @@ -2273,7 +2313,7 @@ fn translate_load( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let (flags, base, offset) = prepare_load( + let (flags, base, offset) = prepare_addr( memarg, mem_op_size(opcode, result_ty), builder, @@ -2293,21 +2333,11 @@ fn translate_store( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let (addr32, val) = state.pop2(); + let val = state.pop1(); let val_ty = builder.func.dfg.value_type(val); - let heap = state.get_heap(builder.func, memarg.memory, environ)?; - let (base, offset) = get_heap_addr( - heap, - addr32, - memarg.offset, - mem_op_size(opcode, val_ty), - environ.pointer_type(), - builder, - ); - // See the comments in `prepare_load` about the flags. - let mut flags = MemFlags::new(); - flags.set_endianness(ir::Endianness::Little); + let (flags, base, offset) = + prepare_addr(memarg, mem_op_size(opcode, val_ty), builder, state, environ)?; builder .ins() .Store(opcode, val_ty, flags, offset.into(), val, base); @@ -2330,92 +2360,6 @@ fn translate_icmp(cc: IntCC, builder: &mut FunctionBuilder, state: &mut FuncTran state.push1(builder.ins().bint(I32, val)); } -fn fold_atomic_mem_addr( - linear_mem_addr: Value, - memarg: &MemoryImmediate, - access_ty: Type, - builder: &mut FunctionBuilder, -) -> Value { - let access_ty_bytes = access_ty.bytes(); - let final_lma = if memarg.offset > 0 { - // Note that 32-bit memories are only supported here at this time, the - // logic here (e.g. the `iadd_imm` will need to check for overflow and - // other bits and pieces for 64-bit memories. - assert!(builder.func.dfg.value_type(linear_mem_addr) == I32); - let linear_mem_addr = builder.ins().uextend(I64, linear_mem_addr); - let a = builder - .ins() - .iadd_imm(linear_mem_addr, i64::try_from(memarg.offset).unwrap()); - let cflags = builder.ins().ifcmp_imm(a, 0x1_0000_0000i64); - builder.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::HeapOutOfBounds, - ); - builder.ins().ireduce(I32, a) - } else { - linear_mem_addr - }; - assert!(access_ty_bytes == 4 || access_ty_bytes == 8); - let final_lma_misalignment = builder - .ins() - .band_imm(final_lma, i64::from(access_ty_bytes - 1)); - let f = builder - .ins() - .ifcmp_imm(final_lma_misalignment, i64::from(0)); - builder - .ins() - .trapif(IntCC::NotEqual, f, ir::TrapCode::HeapMisaligned); - final_lma -} - -// For an atomic memory operation, emit an alignment check for the linear memory address, -// and then compute the final effective address. -fn finalise_atomic_mem_addr( - linear_mem_addr: Value, - memarg: &MemoryImmediate, - access_ty: Type, - builder: &mut FunctionBuilder, - state: &mut FuncTranslationState, - environ: &mut FE, -) -> WasmResult { - // Check the alignment of `linear_mem_addr`. - // - // Note that the `iadd_imm` here and the `try_from` only works for 32-bit - // memories. - let access_ty_bytes = access_ty.bytes(); - assert!(builder.func.dfg.value_type(linear_mem_addr) == I32); - let final_lma = builder - .ins() - .iadd_imm(linear_mem_addr, i64::try_from(memarg.offset).unwrap()); - if access_ty_bytes != 1 { - assert!(access_ty_bytes == 2 || access_ty_bytes == 4 || access_ty_bytes == 8); - let final_lma_misalignment = builder - .ins() - .band_imm(final_lma, i64::from(access_ty_bytes - 1)); - let f = builder - .ins() - .ifcmp_imm(final_lma_misalignment, i64::from(0)); - builder - .ins() - .trapif(IntCC::NotEqual, f, ir::TrapCode::HeapMisaligned); - } - - // Compute the final effective address. - let heap = state.get_heap(builder.func, memarg.memory, environ)?; - let (base, offset) = get_heap_addr( - heap, - final_lma, - /*offset=*/ 0, - access_ty.bytes(), - environ.pointer_type(), - builder, - ); - - let final_effective_address = builder.ins().iadd_imm(base, i64::from(offset)); - Ok(final_effective_address) -} - fn translate_atomic_rmw( widened_ty: Type, access_ty: Type, @@ -2425,7 +2369,7 @@ fn translate_atomic_rmw( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let (linear_mem_addr, mut arg2) = state.pop2(); + let mut arg2 = state.pop1(); let arg2_ty = builder.func.dfg.value_type(arg2); // The operation is performed at type `access_ty`, and the old value is zero-extended @@ -2450,15 +2394,9 @@ fn translate_atomic_rmw( arg2 = builder.ins().ireduce(access_ty, arg2); } - let final_effective_address = - finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?; + let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; - // See the comments in `prepare_load` about the flags. - let mut flags = MemFlags::new(); - flags.set_endianness(ir::Endianness::Little); - let mut res = builder - .ins() - .atomic_rmw(access_ty, flags, op, final_effective_address, arg2); + let mut res = builder.ins().atomic_rmw(access_ty, flags, op, addr, arg2); if access_ty != widened_ty { res = builder.ins().uextend(widened_ty, res); } @@ -2474,7 +2412,7 @@ fn translate_atomic_cas( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let (linear_mem_addr, mut expected, mut replacement) = state.pop3(); + let (mut expected, mut replacement) = state.pop2(); let expected_ty = builder.func.dfg.value_type(expected); let replacement_ty = builder.func.dfg.value_type(replacement); @@ -2504,15 +2442,8 @@ fn translate_atomic_cas( replacement = builder.ins().ireduce(access_ty, replacement); } - let final_effective_address = - finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?; - - // See the comments in `prepare_load` about the flags. - let mut flags = MemFlags::new(); - flags.set_endianness(ir::Endianness::Little); - let mut res = builder - .ins() - .atomic_cas(flags, final_effective_address, expected, replacement); + let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + let mut res = builder.ins().atomic_cas(flags, addr, expected, replacement); if access_ty != widened_ty { res = builder.ins().uextend(widened_ty, res); } @@ -2528,8 +2459,6 @@ fn translate_atomic_load( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let linear_mem_addr = state.pop1(); - // The load is performed at type `access_ty`, and the loaded value is zero extended // to `widened_ty`. match access_ty { @@ -2547,15 +2476,8 @@ fn translate_atomic_load( }; assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes()); - let final_effective_address = - finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?; - - // See the comments in `prepare_load` about the flags. - let mut flags = MemFlags::new(); - flags.set_endianness(ir::Endianness::Little); - let mut res = builder - .ins() - .atomic_load(access_ty, flags, final_effective_address); + let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + let mut res = builder.ins().atomic_load(access_ty, flags, addr); if access_ty != widened_ty { res = builder.ins().uextend(widened_ty, res); } @@ -2570,7 +2492,7 @@ fn translate_atomic_store( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let (linear_mem_addr, mut data) = state.pop2(); + let mut data = state.pop1(); let data_ty = builder.func.dfg.value_type(data); // The operation is performed at type `access_ty`, and the data to be stored may first @@ -2594,15 +2516,8 @@ fn translate_atomic_store( data = builder.ins().ireduce(access_ty, data); } - let final_effective_address = - finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?; - - // See the comments in `prepare_load` about the flags. - let mut flags = MemFlags::new(); - flags.set_endianness(ir::Endianness::Little); - builder - .ins() - .atomic_store(flags, data, final_effective_address); + let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + builder.ins().atomic_store(flags, data, addr); Ok(()) } diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 474aa059014d..7949acc6b0c1 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -613,6 +613,10 @@ pub trait FuncEnvironment: TargetEnvironment { /// for the same index. Whether the waited-on value is 32- or 64-bit can be /// determined by examining the type of `expected`, which must be only I32 or I64. /// + /// Note that the `addr` here is the host linear memory address rather + /// than a relative wasm linear memory address. The type of this value is + /// the same as the host's pointer. + /// /// Returns an i32, which is negative if the helper call failed. fn translate_atomic_wait( &mut self, @@ -629,6 +633,10 @@ pub trait FuncEnvironment: TargetEnvironment { /// to wait on, and `heap` is the heap reference returned by `make_heap` /// for the same index. /// + /// Note that the `addr` here is the host linear memory address rather + /// than a relative wasm linear memory address. The type of this value is + /// the same as the host's pointer. + /// /// Returns an i64, which is negative if the helper call failed. fn translate_atomic_notify( &mut self, diff --git a/crates/environ/src/builtin.rs b/crates/environ/src/builtin.rs index 2d56cf9b10ad..49c3f16dfe9c 100644 --- a/crates/environ/src/builtin.rs +++ b/crates/environ/src/builtin.rs @@ -38,11 +38,11 @@ macro_rules! foreach_builtin_function { /// Returns an index for Wasm's `global.get` instruction for `externref`s. externref_global_set(vmctx, i32, reference) -> (); /// Returns an index for wasm's `memory.atomic.notify` instruction. - memory_atomic_notify(vmctx, i32, i32, i32) -> (i32); + memory_atomic_notify(vmctx, i32, pointer, i32) -> (i32); /// Returns an index for wasm's `memory.atomic.wait32` instruction. - memory_atomic_wait32(vmctx, i32, i32, i32, i64) -> (i32); + memory_atomic_wait32(vmctx, i32, pointer, i32, i64) -> (i32); /// Returns an index for wasm's `memory.atomic.wait64` instruction. - memory_atomic_wait64(vmctx, i32, i32, i64, i64) -> (i32); + memory_atomic_wait64(vmctx, i32, pointer, i64, i64) -> (i32); /// Invoked when fuel has run out while executing a function. out_of_gas(vmctx) -> (); } diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index a578e7f3d605..816e888a3169 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -57,11 +57,14 @@ //! ``` use crate::externref::VMExternRef; +use crate::instance::Instance; use crate::table::Table; use crate::traphandlers::{raise_lib_trap, Trap}; use crate::vmcontext::{VMCallerCheckedAnyfunc, VMContext}; +use backtrace::Backtrace; use std::mem; use std::ptr::{self, NonNull}; +use wasmtime_environ::ir::TrapCode; use wasmtime_environ::wasm::{ DataIndex, ElemIndex, GlobalIndex, MemoryIndex, TableElementType, TableIndex, }; @@ -449,40 +452,103 @@ impl std::fmt::Display for Unimplemented { /// Implementation of `memory.atomic.notify` for locally defined memories. pub unsafe extern "C" fn wasmtime_memory_atomic_notify( - _vmctx: *mut VMContext, - _memory_index: u32, - _addr: u32, + vmctx: *mut VMContext, + memory_index: u32, + addr: usize, _count: u32, ) -> u32 { - raise_lib_trap(Trap::User(Box::new(Unimplemented( - "wasm atomics (fn wasmtime_memory_atomic_notify) unsupported", - )))); + let result = { + let memory = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance(); + // this should never overflow since addr + 4 either hits a guard page + // or it's been validated to be in-bounds already. Double-check for now + // just to be sure. + let addr_to_check = addr.checked_add(4).unwrap(); + validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| { + Err(Trap::User(Box::new(Unimplemented( + "wasm atomics (fn wasmtime_memory_atomic_notify) unsupported", + )))) + }) + }; + match result { + Ok(n) => n, + Err(e) => raise_lib_trap(e), + } } /// Implementation of `memory.atomic.wait32` for locally defined memories. pub unsafe extern "C" fn wasmtime_memory_atomic_wait32( - _vmctx: *mut VMContext, - _memory_index: u32, - _addr: u32, + vmctx: *mut VMContext, + memory_index: u32, + addr: usize, _expected: u32, _timeout: u64, ) -> u32 { - raise_lib_trap(Trap::User(Box::new(Unimplemented( - "wasm atomics (fn wasmtime_memory_atomic_wait32) unsupported", - )))); + let result = { + let memory = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance(); + // see wasmtime_memory_atomic_notify for why this shouldn't overflow + // but we still double-check + let addr_to_check = addr.checked_add(4).unwrap(); + validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| { + Err(Trap::User(Box::new(Unimplemented( + "wasm atomics (fn wasmtime_memory_atomic_wait32) unsupported", + )))) + }) + }; + match result { + Ok(n) => n, + Err(e) => raise_lib_trap(e), + } } /// Implementation of `memory.atomic.wait64` for locally defined memories. pub unsafe extern "C" fn wasmtime_memory_atomic_wait64( - _vmctx: *mut VMContext, - _memory_index: u32, - _addr: u32, + vmctx: *mut VMContext, + memory_index: u32, + addr: usize, _expected: u64, _timeout: u64, ) -> u32 { - raise_lib_trap(Trap::User(Box::new(Unimplemented( - "wasm atomics (fn wasmtime_memory_atomic_wait32) unsupported", - )))); + let result = { + let memory = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance(); + // see wasmtime_memory_atomic_notify for why this shouldn't overflow + // but we still double-check + let addr_to_check = addr.checked_add(8).unwrap(); + validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| { + Err(Trap::User(Box::new(Unimplemented( + "wasm atomics (fn wasmtime_memory_atomic_wait64) unsupported", + )))) + }) + }; + match result { + Ok(n) => n, + Err(e) => raise_lib_trap(e), + } +} + +/// For atomic operations we still check the actual address despite this also +/// being checked via the `heap_addr` instruction in cranelift. The reason for +/// that is because the `heap_addr` instruction can defer to a later segfault to +/// actually recognize the out-of-bounds whereas once we're running Rust code +/// here we don't want to segfault. +/// +/// In the situations where bounds checks were elided in JIT code (because oob +/// would then be later guaranteed to segfault) this manual check is here +/// so we don't segfault from Rust. +unsafe fn validate_atomic_addr( + instance: &Instance, + memory: MemoryIndex, + addr: usize, +) -> Result<(), Trap> { + if addr > instance.get_memory(memory).current_length { + return Err(Trap::Wasm { + trap_code: TrapCode::HeapOutOfBounds, + backtrace: Backtrace::new_unresolved(), + }); + } + Ok(()) } /// Hook for when an instance runs out of fuel. diff --git a/tests/all/wast.rs b/tests/all/wast.rs index 45e874da1f31..c4e10da30d07 100644 --- a/tests/all/wast.rs +++ b/tests/all/wast.rs @@ -25,6 +25,11 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()> // by reference types. let reftypes = simd || wast.iter().any(|s| s == "reference-types"); + // Threads aren't implemented in the old backend, so skip those tests. + if threads && cfg!(feature = "old-x86-backend") { + return Ok(()); + } + let mut cfg = Config::new(); cfg.wasm_simd(simd) .wasm_bulk_memory(bulk_mem) diff --git a/tests/misc_testsuite/threads/load-store-alignment.wast b/tests/misc_testsuite/threads/load-store-alignment.wast new file mode 100644 index 000000000000..5d8fff8db7ff --- /dev/null +++ b/tests/misc_testsuite/threads/load-store-alignment.wast @@ -0,0 +1,126 @@ +(module + ;; NB this should use a shared memory when it's supported + (memory 1) + + (func (export "32.load8u") (param i32) (result i32) + local.get 0 i32.atomic.load8_u) + (func (export "32.load16u") (param i32) (result i32) + local.get 0 i32.atomic.load16_u) + (func (export "32.load32u") (param i32) (result i32) + local.get 0 i32.atomic.load) + (func (export "64.load8u") (param i32) (result i64) + local.get 0 i64.atomic.load8_u) + (func (export "64.load16u") (param i32) (result i64) + local.get 0 i64.atomic.load16_u) + (func (export "64.load32u") (param i32) (result i64) + local.get 0 i64.atomic.load32_u) + (func (export "64.load64u") (param i32) (result i64) + local.get 0 i64.atomic.load) + + (func (export "32.store8") (param i32) + local.get 0 i32.const 0 i32.atomic.store8) + (func (export "32.store16") (param i32) + local.get 0 i32.const 0 i32.atomic.store16) + (func (export "32.store32") (param i32) + local.get 0 i32.const 0 i32.atomic.store) + (func (export "64.store8") (param i32) + local.get 0 i64.const 0 i64.atomic.store8) + (func (export "64.store16") (param i32) + local.get 0 i64.const 0 i64.atomic.store16) + (func (export "64.store32") (param i32) + local.get 0 i64.const 0 i64.atomic.store32) + (func (export "64.store64") (param i32) + local.get 0 i64.const 0 i64.atomic.store) + + (func (export "32.load8u o1") (param i32) (result i32) + local.get 0 i32.atomic.load8_u offset=1) + (func (export "32.load16u o1") (param i32) (result i32) + local.get 0 i32.atomic.load16_u offset=1) + (func (export "32.load32u o1") (param i32) (result i32) + local.get 0 i32.atomic.load offset=1) + (func (export "64.load8u o1") (param i32) (result i64) + local.get 0 i64.atomic.load8_u offset=1) + (func (export "64.load16u o1") (param i32) (result i64) + local.get 0 i64.atomic.load16_u offset=1) + (func (export "64.load32u o1") (param i32) (result i64) + local.get 0 i64.atomic.load32_u offset=1) + (func (export "64.load64u o1") (param i32) (result i64) + local.get 0 i64.atomic.load offset=1) + + (func (export "32.store8 o1") (param i32) + local.get 0 i32.const 0 i32.atomic.store8 offset=1) + (func (export "32.store16 o1") (param i32) + local.get 0 i32.const 0 i32.atomic.store16 offset=1) + (func (export "32.store32 o1") (param i32) + local.get 0 i32.const 0 i32.atomic.store offset=1) + (func (export "64.store8 o1") (param i32) + local.get 0 i64.const 0 i64.atomic.store8 offset=1) + (func (export "64.store16 o1") (param i32) + local.get 0 i64.const 0 i64.atomic.store16 offset=1) + (func (export "64.store32 o1") (param i32) + local.get 0 i64.const 0 i64.atomic.store32 offset=1) + (func (export "64.store64 o1") (param i32) + local.get 0 i64.const 0 i64.atomic.store offset=1) +) + +;; aligned loads +(assert_return (invoke "32.load8u" (i32.const 0)) (i32.const 0)) +(assert_return (invoke "32.load16u" (i32.const 0)) (i32.const 0)) +(assert_return (invoke "32.load32u" (i32.const 0)) (i32.const 0)) +(assert_return (invoke "64.load8u" (i32.const 0)) (i64.const 0)) +(assert_return (invoke "64.load16u" (i32.const 0)) (i64.const 0)) +(assert_return (invoke "64.load64u" (i32.const 0)) (i64.const 0)) +(assert_return (invoke "32.load8u o1" (i32.const 0)) (i32.const 0)) +(assert_return (invoke "32.load16u o1" (i32.const 1)) (i32.const 0)) +(assert_return (invoke "32.load32u o1" (i32.const 3)) (i32.const 0)) +(assert_return (invoke "64.load8u o1" (i32.const 0)) (i64.const 0)) +(assert_return (invoke "64.load16u o1" (i32.const 1)) (i64.const 0)) +(assert_return (invoke "64.load32u o1" (i32.const 3)) (i64.const 0)) +(assert_return (invoke "64.load64u o1" (i32.const 7)) (i64.const 0)) + +;; misaligned loads +(assert_return (invoke "32.load8u" (i32.const 1)) (i32.const 0)) +(assert_trap (invoke "32.load16u" (i32.const 1)) "misaligned memory access") +(assert_trap (invoke "32.load32u" (i32.const 1)) "misaligned memory access") +(assert_return (invoke "64.load8u" (i32.const 1)) (i64.const 0)) +(assert_trap (invoke "64.load16u" (i32.const 1)) "misaligned memory access") +(assert_trap (invoke "64.load32u" (i32.const 1)) "misaligned memory access") +(assert_trap (invoke "64.load64u" (i32.const 1)) "misaligned memory access") +(assert_return (invoke "32.load8u o1" (i32.const 0)) (i32.const 0)) +(assert_trap (invoke "32.load16u o1" (i32.const 0)) "misaligned memory access") +(assert_trap (invoke "32.load32u o1" (i32.const 0)) "misaligned memory access") +(assert_return (invoke "64.load8u o1" (i32.const 0)) (i64.const 0)) +(assert_trap (invoke "64.load16u o1" (i32.const 0)) "misaligned memory access") +(assert_trap (invoke "64.load32u o1" (i32.const 0)) "misaligned memory access") +(assert_trap (invoke "64.load64u o1" (i32.const 0)) "misaligned memory access") + +;; aligned stores +(assert_return (invoke "32.store8" (i32.const 0))) +(assert_return (invoke "32.store16" (i32.const 0))) +(assert_return (invoke "32.store32" (i32.const 0))) +(assert_return (invoke "64.store8" (i32.const 0))) +(assert_return (invoke "64.store16" (i32.const 0))) +(assert_return (invoke "64.store64" (i32.const 0))) +(assert_return (invoke "32.store8 o1" (i32.const 0))) +(assert_return (invoke "32.store16 o1" (i32.const 1))) +(assert_return (invoke "32.store32 o1" (i32.const 3))) +(assert_return (invoke "64.store8 o1" (i32.const 0))) +(assert_return (invoke "64.store16 o1" (i32.const 1))) +(assert_return (invoke "64.store32 o1" (i32.const 3))) +(assert_return (invoke "64.store64 o1" (i32.const 7))) + +;; misaligned stores +(assert_return (invoke "32.store8" (i32.const 1)) (i32.const 0)) +(assert_trap (invoke "32.store16" (i32.const 1)) "misaligned memory access") +(assert_trap (invoke "32.store32" (i32.const 1)) "misaligned memory access") +(assert_return (invoke "64.store8" (i32.const 1)) (i64.const 0)) +(assert_trap (invoke "64.store16" (i32.const 1)) "misaligned memory access") +(assert_trap (invoke "64.store32" (i32.const 1)) "misaligned memory access") +(assert_trap (invoke "64.store64" (i32.const 1)) "misaligned memory access") +(assert_return (invoke "32.store8 o1" (i32.const 0)) (i32.const 0)) +(assert_trap (invoke "32.store16 o1" (i32.const 0)) "misaligned memory access") +(assert_trap (invoke "32.store32 o1" (i32.const 0)) "misaligned memory access") +(assert_return (invoke "64.store8 o1" (i32.const 0)) (i64.const 0)) +(assert_trap (invoke "64.store16 o1" (i32.const 0)) "misaligned memory access") +(assert_trap (invoke "64.store32 o1" (i32.const 0)) "misaligned memory access") +(assert_trap (invoke "64.store64 o1" (i32.const 0)) "misaligned memory access")