diff --git a/CHANGELOG.md b/CHANGELOG.md index c7df09c1f47..b07db20936b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,15 @@ ## **[Unreleased]** -- [#1153](https://github.com/wasmerio/wasmer/pull/1969) Added D integration to the README +### Added + +- [#1969](https://github.com/wasmerio/wasmer/pull/1969) Added D integration to the README + +### Changed +- [#1979](https://github.com/wasmerio/wasmer/pull/1979) `WasmPtr::get_utf8_string` was renamed to `WasmPtr::get_utf8_str` and made `unsafe`. + +### Fixed +- [#1979](https://github.com/wasmerio/wasmer/pull/1979) `WasmPtr::get_utf8_string` now returns a `String`, fixing a soundness issue in certain circumstances. The old functionality is available under a new `unsafe` function, `WasmPtr::get_utf8_str`. ## 1.0.0-rc1 - 2020-12-23 diff --git a/lib/api/src/ptr.rs b/lib/api/src/ptr.rs index 0db7189d0e6..d7d8b6f8061 100644 --- a/lib/api/src/ptr.rs +++ b/lib/api/src/ptr.rs @@ -185,10 +185,22 @@ impl WasmPtr { /// Get a UTF-8 string from the `WasmPtr` with the given length. /// - /// Note that this method returns a reference to Wasm linear memory. The + /// Note that . The /// underlying data can be mutated if the Wasm is allowed to execute or /// an aliasing `WasmPtr` is used to mutate memory. - pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option<&str> { + /// + /// # Safety + /// This method returns a reference to Wasm linear memory. The underlying + /// data can be mutated if the Wasm is allowed to execute or an aliasing + /// `WasmPtr` is used to mutate memory. + /// + /// `str` has invariants that must not be broken by mutating Wasm memory. + /// Thus the caller must ensure that the backing memory is not modified + /// while the reference is held. + /// + /// Additionally, if `memory` is dynamic, the caller must also ensure that `memory` + /// is not grown while the reference is held. + pub unsafe fn get_utf8_str<'a>(self, memory: &'a Memory, str_len: u32) -> Option<&'a str> { let memory_size = memory.size().bytes().0; if self.offset as usize + str_len as usize > memory.size().bytes().0 @@ -196,25 +208,58 @@ impl WasmPtr { { return None; } - let ptr = unsafe { memory.view::().as_ptr().add(self.offset as usize) as *const u8 }; - let slice: &[u8] = unsafe { std::slice::from_raw_parts(ptr, str_len as usize) }; + let ptr = memory.view::().as_ptr().add(self.offset as usize) as *const u8; + let slice: &[u8] = std::slice::from_raw_parts(ptr, str_len as usize); std::str::from_utf8(slice).ok() } + /// Get a UTF-8 `String` from the `WasmPtr` with the given length. + /// + /// an aliasing `WasmPtr` is used to mutate memory. + pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { + let memory_size = memory.size().bytes().0; + if self.offset as usize + str_len as usize > memory.size().bytes().0 + || self.offset as usize >= memory_size + { + return None; + } + + // TODO: benchmark the internals of this function: there is likely room for + // micro-optimization here and this may be a fairly common function in user code. + let view = memory.view::(); + + let mut vec: Vec = Vec::with_capacity(str_len as usize); + let base = self.offset as usize; + for i in 0..(str_len as usize) { + let byte = view[base + i].get(); + vec.push(byte); + } + + String::from_utf8(vec).ok() + } + /// Get a UTF-8 string from the `WasmPtr`, where the string is nul-terminated. /// /// Note that this does not account for UTF-8 strings that _contain_ nul themselves, - /// [`WasmPtr::get_utf8_string`] has to be used for those. + /// [`WasmPtr::get_utf8_str`] has to be used for those. /// - /// Also note that this method returns a reference to Wasm linear memory. The - /// underlying data can be mutated if the Wasm is allowed to execute or - /// an aliasing `WasmPtr` is used to mutate memory. - pub fn get_utf8_string_with_nul(self, memory: &Memory) -> Option<&str> { + /// # Safety + /// This method behaves similarly to [`WasmPtr::get_utf8_str`], all safety invariants on + /// that method must also be upheld here. + pub unsafe fn get_utf8_str_with_nul<'a>(self, memory: &'a Memory) -> Option<&'a str> { memory.view::()[(self.offset as usize)..] .iter() .map(|cell| cell.get()) .position(|byte| byte == 0) - .and_then(|length| self.get_utf8_string(memory, length as u32)) + .and_then(|length| self.get_utf8_str(memory, length as u32)) + } + + /// Get a UTF-8 `String` from the `WasmPtr`, where the string is nul-terminated. + /// + /// Note that this does not account for UTF-8 strings that _contain_ nul themselves, + /// [`WasmPtr::get_utf8_string`] has to be used for those. + pub fn get_utf8_string_with_nul(self, memory: &Memory) -> Option { + unsafe { self.get_utf8_str_with_nul(memory) }.map(|s| s.to_owned()) } } @@ -280,6 +325,7 @@ mod test { assert!(start_wasm_ptr.deref(&memory).is_some()); assert!(unsafe { start_wasm_ptr.deref_mut(&memory).is_some() }); assert!(start_wasm_ptr_array.deref(&memory, 0, 0).is_some()); + assert!(unsafe { start_wasm_ptr_array.get_utf8_str(&memory, 0).is_some() }); assert!(start_wasm_ptr_array.get_utf8_string(&memory, 0).is_some()); assert!(unsafe { start_wasm_ptr_array.deref_mut(&memory, 0, 0).is_some() }); assert!(start_wasm_ptr_array.deref(&memory, 0, 1).is_some()); @@ -301,6 +347,7 @@ mod test { assert!(end_wasm_ptr_array.deref(&memory, idx, len).is_none()); assert!(unsafe { end_wasm_ptr_array.deref_mut(&memory, idx, len).is_none() }); } + assert!(unsafe { end_wasm_ptr_array.get_utf8_str(&memory, 2).is_none() }); assert!(end_wasm_ptr_array.get_utf8_string(&memory, 2).is_none()); // test that accesing the last valid memory address for a u32 is valid diff --git a/lib/emscripten/src/ptr.rs b/lib/emscripten/src/ptr.rs index 54d5fada657..a24f530c652 100644 --- a/lib/emscripten/src/ptr.rs +++ b/lib/emscripten/src/ptr.rs @@ -102,7 +102,16 @@ impl WasmPtr { } #[inline(always)] - pub fn get_utf8_string<'a>(self, memory: &'a Memory, str_len: u32) -> Option<&'a str> { + pub unsafe fn get_utf8_str<'a>(self, memory: &'a Memory, str_len: u32) -> Option<&'a str> { + if self.0.offset() == 0 { + None + } else { + self.0.get_utf8_str(memory, str_len) + } + } + + #[inline(always)] + pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { if self.0.offset() == 0 { None } else { diff --git a/lib/wasi/src/macros.rs b/lib/wasi/src/macros.rs index 278b31a6bf2..07f480ffe82 100644 --- a/lib/wasi/src/macros.rs +++ b/lib/wasi/src/macros.rs @@ -24,8 +24,13 @@ macro_rules! wasi_try { /// Reads a string from Wasm memory and returns the invalid argument error /// code if it fails. +/// +/// # Safety +/// See the safety docs for [`wasmer::WasmPtr::get_utf8_str`]: the returned value +/// points into Wasm memory and care must be taken that it does not get +/// corrupted. macro_rules! get_input_str { ($memory:expr, $data:expr, $len:expr) => {{ - wasi_try!($data.get_utf8_string($memory, $len), __WASI_EINVAL) + wasi_try!($data.get_utf8_str($memory, $len), __WASI_EINVAL) }}; } diff --git a/lib/wasi/src/ptr.rs b/lib/wasi/src/ptr.rs index 430c2b6ec38..4d5ca86a8e0 100644 --- a/lib/wasi/src/ptr.rs +++ b/lib/wasi/src/ptr.rs @@ -80,7 +80,12 @@ impl WasmPtr { } #[inline(always)] - pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option<&str> { + pub unsafe fn get_utf8_str(self, memory: &Memory, str_len: u32) -> Option<&str> { + self.0.get_utf8_str(memory, str_len) + } + + #[inline(always)] + pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { self.0.get_utf8_string(memory, str_len) } } diff --git a/lib/wasi/src/syscalls/mod.rs b/lib/wasi/src/syscalls/mod.rs index d174af20caf..7bb53919ad4 100644 --- a/lib/wasi/src/syscalls/mod.rs +++ b/lib/wasi/src/syscalls/mod.rs @@ -1348,7 +1348,7 @@ pub fn path_create_directory( if !has_rights(working_dir.rights, __WASI_RIGHT_PATH_CREATE_DIRECTORY) { return __WASI_EACCES; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> fd: {}, path: {}", fd, &path_string); let path = std::path::PathBuf::from(path_string); @@ -1451,7 +1451,7 @@ pub fn path_filestat_get( if !has_rights(root_dir.rights, __WASI_RIGHT_PATH_FILESTAT_GET) { return __WASI_EACCES; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> base_fd: {}, path: {}", fd, &path_string); @@ -1516,7 +1516,7 @@ pub fn path_filestat_set_times( return __WASI_EINVAL; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> base_fd: {}, path: {}", fd, &path_string); let file_inode = wasi_try!(state.fs.get_inode_at_path( @@ -1595,8 +1595,8 @@ pub fn path_link( debug!(" - will follow symlinks when opening path"); } let (memory, mut state) = env.get_memory_and_wasi_state(0); - let old_path_str = get_input_str!(memory, old_path, old_path_len); - let new_path_str = get_input_str!(memory, new_path, new_path_len); + let old_path_str = unsafe { get_input_str!(memory, old_path, old_path_len) }; + let new_path_str = unsafe { get_input_str!(memory, new_path, new_path_len) }; let source_fd = wasi_try!(state.fs.get_fd(old_fd)); let target_fd = wasi_try!(state.fs.get_fd(new_fd)); debug!( @@ -1700,7 +1700,7 @@ pub fn path_open( if !has_rights(working_dir.rights, __WASI_RIGHT_PATH_OPEN) { return __WASI_EACCES; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> fd: {}, path: {}", dirfd, &path_string); @@ -1943,7 +1943,7 @@ pub fn path_readlink( if !has_rights(base_dir.rights, __WASI_RIGHT_PATH_READLINK) { return __WASI_EACCES; } - let path_str = get_input_str!(memory, path, path_len); + let path_str = unsafe { get_input_str!(memory, path, path_len) }; let inode = wasi_try!(state.fs.get_inode_at_path(dir_fd, path_str, false)); if let Kind::Symlink { relative_path, .. } = &state.fs.inodes[inode].kind { @@ -1983,7 +1983,7 @@ pub fn path_remove_directory( let (memory, mut state) = env.get_memory_and_wasi_state(0); let base_dir = wasi_try!(state.fs.fd_map.get(&fd), __WASI_EBADF); - let path_str = get_input_str!(memory, path, path_len); + let path_str = unsafe { get_input_str!(memory, path, path_len) }; let inode = wasi_try!(state.fs.get_inode_at_path(fd, path_str, false)); let (parent_inode, childs_name) = @@ -2062,9 +2062,9 @@ pub fn path_rename( old_fd, new_fd ); let (memory, mut state) = env.get_memory_and_wasi_state(0); - let source_str = get_input_str!(memory, old_path, old_path_len); + let source_str = unsafe { get_input_str!(memory, old_path, old_path_len) }; let source_path = std::path::Path::new(source_str); - let target_str = get_input_str!(memory, new_path, new_path_len); + let target_str = unsafe { get_input_str!(memory, new_path, new_path_len) }; let target_path = std::path::Path::new(target_str); debug!("=> rename from {} to {}", source_str, target_str); @@ -2169,8 +2169,8 @@ pub fn path_symlink( ) -> __wasi_errno_t { debug!("wasi::path_symlink"); let (memory, mut state) = env.get_memory_and_wasi_state(0); - let old_path_str = get_input_str!(memory, old_path, old_path_len); - let new_path_str = get_input_str!(memory, new_path, new_path_len); + let old_path_str = unsafe { get_input_str!(memory, old_path, old_path_len) }; + let new_path_str = unsafe { get_input_str!(memory, new_path, new_path_len) }; let base_fd = wasi_try!(state.fs.get_fd(fd)); if !has_rights(base_fd.rights, __WASI_RIGHT_PATH_SYMLINK) { return __WASI_EACCES; @@ -2251,7 +2251,7 @@ pub fn path_unlink_file( if !has_rights(base_dir.rights, __WASI_RIGHT_PATH_UNLINK_FILE) { return __WASI_EACCES; } - let path_str = get_input_str!(memory, path, path_len); + let path_str = unsafe { get_input_str!(memory, path, path_len) }; debug!("Requested file: {}", path_str); let inode = wasi_try!(state.fs.get_inode_at_path(fd, path_str, false));