From 34072cbf6f91adcc2e3912effbcfda367ba550ff Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Mon, 4 Jan 2021 07:23:08 -0800 Subject: [PATCH 1/3] Make `WasmPtr::get_utf8_string` return `String` The old functionality is readded as an `unsafe` method, `WasmPtr::get_utf8_str`. It's important to note that `WasmPtr::get_utf8_string` isn't _entirely_ sound and technically should be marked `unsafe` as well, but this change is a massive improvement over what we had before. For future reference, the reason `WasmPtr::get_utf8_string` still has some soundness issues is that we can't guarantee exclusive access to the memory while parsing in the string, we temporarily hold a `&[u8]` and hope it doesn't get mutated. It's possible to implement this method in a more correct way by copying each byte as we read it into a `Vec` and converting that into a String. --- lib/api/src/ptr.rs | 47 +++++++++++++++++++++++++++++++----- lib/emscripten/src/ptr.rs | 11 ++++++++- lib/wasi/src/macros.rs | 7 +++++- lib/wasi/src/ptr.rs | 7 +++++- lib/wasi/src/syscalls/mod.rs | 26 ++++++++++---------- 5 files changed, 76 insertions(+), 22 deletions(-) diff --git a/lib/api/src/ptr.rs b/lib/api/src/ptr.rs index 0db7189d0e6..e70a6e76167 100644 --- a/lib/api/src/ptr.rs +++ b/lib/api/src/ptr.rs @@ -188,7 +188,15 @@ impl WasmPtr { /// Note that this method returns a reference to Wasm linear memory. The /// underlying data can be mutated if the Wasm is allowed to execute or /// an aliasing `WasmPtr` is used to mutate memory. - pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option<&str> { + /// + /// # Safety + /// `str` has invariants that must not be broken by mutating Wasm memory. + /// Thus the caller must ensure exclusive access to memory or otherwise ensure + /// that the backing memory is not modified while the reference is held. + /// + /// Additionally, if `memory` is dynamic, the caller must also ensure that `memory` + /// is not grown while the reference is held. + pub unsafe fn get_utf8_str<'a>(self, memory: &'a Memory, str_len: u32) -> Option<&'a str> { let memory_size = memory.size().bytes().0; if self.offset as usize + str_len as usize > memory.size().bytes().0 @@ -196,25 +204,50 @@ impl WasmPtr { { return None; } - let ptr = unsafe { memory.view::().as_ptr().add(self.offset as usize) as *const u8 }; - let slice: &[u8] = unsafe { std::slice::from_raw_parts(ptr, str_len as usize) }; + let ptr = memory.view::().as_ptr().add(self.offset as usize) as *const u8; + let slice: &[u8] = std::slice::from_raw_parts(ptr, str_len as usize); std::str::from_utf8(slice).ok() } + /// Get a UTF-8 string from the `WasmPtr` with the given length. + /// + /// Note that this method returns a reference to Wasm linear memory. The + /// underlying data can be mutated if the Wasm is allowed to execute or + /// an aliasing `WasmPtr` is used to mutate memory. + pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { + unsafe { self.get_utf8_str(memory, str_len).map(|s| s.to_owned()) } + } + /// Get a UTF-8 string from the `WasmPtr`, where the string is nul-terminated. /// /// Note that this does not account for UTF-8 strings that _contain_ nul themselves, - /// [`WasmPtr::get_utf8_string`] has to be used for those. + /// [`WasmPtr::get_utf8_str`] has to be used for those. /// /// Also note that this method returns a reference to Wasm linear memory. The /// underlying data can be mutated if the Wasm is allowed to execute or /// an aliasing `WasmPtr` is used to mutate memory. - pub fn get_utf8_string_with_nul(self, memory: &Memory) -> Option<&str> { + /// + /// # Safety + /// This method behaves similarly to [`WasmPtr::get_utf8_str`], all safety invariants on + /// that method must also be upheld here. + pub unsafe fn get_utf8_str_with_nul<'a>(self, memory: &'a Memory) -> Option<&'a str> { memory.view::()[(self.offset as usize)..] .iter() .map(|cell| cell.get()) .position(|byte| byte == 0) - .and_then(|length| self.get_utf8_string(memory, length as u32)) + .and_then(|length| self.get_utf8_str(memory, length as u32)) + } + + /// Get a UTF-8 string from the `WasmPtr`, where the string is nul-terminated. + /// + /// Note that this does not account for UTF-8 strings that _contain_ nul themselves, + /// [`WasmPtr::get_utf8_string`] has to be used for those. + /// + /// Also note that this method returns a reference to Wasm linear memory. The + /// underlying data can be mutated if the Wasm is allowed to execute or + /// an aliasing `WasmPtr` is used to mutate memory. + pub fn get_utf8_string_with_nul(self, memory: &Memory) -> Option { + unsafe { self.get_utf8_str_with_nul(memory) }.map(|s| s.to_owned()) } } @@ -280,6 +313,7 @@ mod test { assert!(start_wasm_ptr.deref(&memory).is_some()); assert!(unsafe { start_wasm_ptr.deref_mut(&memory).is_some() }); assert!(start_wasm_ptr_array.deref(&memory, 0, 0).is_some()); + assert!(unsafe { start_wasm_ptr_array.get_utf8_str(&memory, 0).is_some() }); assert!(start_wasm_ptr_array.get_utf8_string(&memory, 0).is_some()); assert!(unsafe { start_wasm_ptr_array.deref_mut(&memory, 0, 0).is_some() }); assert!(start_wasm_ptr_array.deref(&memory, 0, 1).is_some()); @@ -301,6 +335,7 @@ mod test { assert!(end_wasm_ptr_array.deref(&memory, idx, len).is_none()); assert!(unsafe { end_wasm_ptr_array.deref_mut(&memory, idx, len).is_none() }); } + assert!(unsafe { end_wasm_ptr_array.get_utf8_str(&memory, 2).is_none() }); assert!(end_wasm_ptr_array.get_utf8_string(&memory, 2).is_none()); // test that accesing the last valid memory address for a u32 is valid diff --git a/lib/emscripten/src/ptr.rs b/lib/emscripten/src/ptr.rs index 54d5fada657..a24f530c652 100644 --- a/lib/emscripten/src/ptr.rs +++ b/lib/emscripten/src/ptr.rs @@ -102,7 +102,16 @@ impl WasmPtr { } #[inline(always)] - pub fn get_utf8_string<'a>(self, memory: &'a Memory, str_len: u32) -> Option<&'a str> { + pub unsafe fn get_utf8_str<'a>(self, memory: &'a Memory, str_len: u32) -> Option<&'a str> { + if self.0.offset() == 0 { + None + } else { + self.0.get_utf8_str(memory, str_len) + } + } + + #[inline(always)] + pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { if self.0.offset() == 0 { None } else { diff --git a/lib/wasi/src/macros.rs b/lib/wasi/src/macros.rs index 278b31a6bf2..07f480ffe82 100644 --- a/lib/wasi/src/macros.rs +++ b/lib/wasi/src/macros.rs @@ -24,8 +24,13 @@ macro_rules! wasi_try { /// Reads a string from Wasm memory and returns the invalid argument error /// code if it fails. +/// +/// # Safety +/// See the safety docs for [`wasmer::WasmPtr::get_utf8_str`]: the returned value +/// points into Wasm memory and care must be taken that it does not get +/// corrupted. macro_rules! get_input_str { ($memory:expr, $data:expr, $len:expr) => {{ - wasi_try!($data.get_utf8_string($memory, $len), __WASI_EINVAL) + wasi_try!($data.get_utf8_str($memory, $len), __WASI_EINVAL) }}; } diff --git a/lib/wasi/src/ptr.rs b/lib/wasi/src/ptr.rs index 430c2b6ec38..4d5ca86a8e0 100644 --- a/lib/wasi/src/ptr.rs +++ b/lib/wasi/src/ptr.rs @@ -80,7 +80,12 @@ impl WasmPtr { } #[inline(always)] - pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option<&str> { + pub unsafe fn get_utf8_str(self, memory: &Memory, str_len: u32) -> Option<&str> { + self.0.get_utf8_str(memory, str_len) + } + + #[inline(always)] + pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { self.0.get_utf8_string(memory, str_len) } } diff --git a/lib/wasi/src/syscalls/mod.rs b/lib/wasi/src/syscalls/mod.rs index d174af20caf..7bb53919ad4 100644 --- a/lib/wasi/src/syscalls/mod.rs +++ b/lib/wasi/src/syscalls/mod.rs @@ -1348,7 +1348,7 @@ pub fn path_create_directory( if !has_rights(working_dir.rights, __WASI_RIGHT_PATH_CREATE_DIRECTORY) { return __WASI_EACCES; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> fd: {}, path: {}", fd, &path_string); let path = std::path::PathBuf::from(path_string); @@ -1451,7 +1451,7 @@ pub fn path_filestat_get( if !has_rights(root_dir.rights, __WASI_RIGHT_PATH_FILESTAT_GET) { return __WASI_EACCES; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> base_fd: {}, path: {}", fd, &path_string); @@ -1516,7 +1516,7 @@ pub fn path_filestat_set_times( return __WASI_EINVAL; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> base_fd: {}, path: {}", fd, &path_string); let file_inode = wasi_try!(state.fs.get_inode_at_path( @@ -1595,8 +1595,8 @@ pub fn path_link( debug!(" - will follow symlinks when opening path"); } let (memory, mut state) = env.get_memory_and_wasi_state(0); - let old_path_str = get_input_str!(memory, old_path, old_path_len); - let new_path_str = get_input_str!(memory, new_path, new_path_len); + let old_path_str = unsafe { get_input_str!(memory, old_path, old_path_len) }; + let new_path_str = unsafe { get_input_str!(memory, new_path, new_path_len) }; let source_fd = wasi_try!(state.fs.get_fd(old_fd)); let target_fd = wasi_try!(state.fs.get_fd(new_fd)); debug!( @@ -1700,7 +1700,7 @@ pub fn path_open( if !has_rights(working_dir.rights, __WASI_RIGHT_PATH_OPEN) { return __WASI_EACCES; } - let path_string = get_input_str!(memory, path, path_len); + let path_string = unsafe { get_input_str!(memory, path, path_len) }; debug!("=> fd: {}, path: {}", dirfd, &path_string); @@ -1943,7 +1943,7 @@ pub fn path_readlink( if !has_rights(base_dir.rights, __WASI_RIGHT_PATH_READLINK) { return __WASI_EACCES; } - let path_str = get_input_str!(memory, path, path_len); + let path_str = unsafe { get_input_str!(memory, path, path_len) }; let inode = wasi_try!(state.fs.get_inode_at_path(dir_fd, path_str, false)); if let Kind::Symlink { relative_path, .. } = &state.fs.inodes[inode].kind { @@ -1983,7 +1983,7 @@ pub fn path_remove_directory( let (memory, mut state) = env.get_memory_and_wasi_state(0); let base_dir = wasi_try!(state.fs.fd_map.get(&fd), __WASI_EBADF); - let path_str = get_input_str!(memory, path, path_len); + let path_str = unsafe { get_input_str!(memory, path, path_len) }; let inode = wasi_try!(state.fs.get_inode_at_path(fd, path_str, false)); let (parent_inode, childs_name) = @@ -2062,9 +2062,9 @@ pub fn path_rename( old_fd, new_fd ); let (memory, mut state) = env.get_memory_and_wasi_state(0); - let source_str = get_input_str!(memory, old_path, old_path_len); + let source_str = unsafe { get_input_str!(memory, old_path, old_path_len) }; let source_path = std::path::Path::new(source_str); - let target_str = get_input_str!(memory, new_path, new_path_len); + let target_str = unsafe { get_input_str!(memory, new_path, new_path_len) }; let target_path = std::path::Path::new(target_str); debug!("=> rename from {} to {}", source_str, target_str); @@ -2169,8 +2169,8 @@ pub fn path_symlink( ) -> __wasi_errno_t { debug!("wasi::path_symlink"); let (memory, mut state) = env.get_memory_and_wasi_state(0); - let old_path_str = get_input_str!(memory, old_path, old_path_len); - let new_path_str = get_input_str!(memory, new_path, new_path_len); + let old_path_str = unsafe { get_input_str!(memory, old_path, old_path_len) }; + let new_path_str = unsafe { get_input_str!(memory, new_path, new_path_len) }; let base_fd = wasi_try!(state.fs.get_fd(fd)); if !has_rights(base_fd.rights, __WASI_RIGHT_PATH_SYMLINK) { return __WASI_EACCES; @@ -2251,7 +2251,7 @@ pub fn path_unlink_file( if !has_rights(base_dir.rights, __WASI_RIGHT_PATH_UNLINK_FILE) { return __WASI_EACCES; } - let path_str = get_input_str!(memory, path, path_len); + let path_str = unsafe { get_input_str!(memory, path, path_len) }; debug!("Requested file: {}", path_str); let inode = wasi_try!(state.fs.get_inode_at_path(fd, path_str, false)); From 316bfa08e960086e0e0bef0b229c0f6b03508db1 Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Mon, 4 Jan 2021 07:37:39 -0800 Subject: [PATCH 2/3] Improve docs, update cl --- CHANGELOG.md | 10 +++++++++- lib/api/src/ptr.rs | 24 +++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7df09c1f47..b07db20936b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,15 @@ ## **[Unreleased]** -- [#1153](https://github.com/wasmerio/wasmer/pull/1969) Added D integration to the README +### Added + +- [#1969](https://github.com/wasmerio/wasmer/pull/1969) Added D integration to the README + +### Changed +- [#1979](https://github.com/wasmerio/wasmer/pull/1979) `WasmPtr::get_utf8_string` was renamed to `WasmPtr::get_utf8_str` and made `unsafe`. + +### Fixed +- [#1979](https://github.com/wasmerio/wasmer/pull/1979) `WasmPtr::get_utf8_string` now returns a `String`, fixing a soundness issue in certain circumstances. The old functionality is available under a new `unsafe` function, `WasmPtr::get_utf8_str`. ## 1.0.0-rc1 - 2020-12-23 diff --git a/lib/api/src/ptr.rs b/lib/api/src/ptr.rs index e70a6e76167..cf34c8fa80f 100644 --- a/lib/api/src/ptr.rs +++ b/lib/api/src/ptr.rs @@ -185,14 +185,18 @@ impl WasmPtr { /// Get a UTF-8 string from the `WasmPtr` with the given length. /// - /// Note that this method returns a reference to Wasm linear memory. The + /// Note that . The /// underlying data can be mutated if the Wasm is allowed to execute or /// an aliasing `WasmPtr` is used to mutate memory. /// /// # Safety + /// This method returns a reference to Wasm linear memory. The underlying + /// data can be mutated if the Wasm is allowed to execute or an aliasing + /// `WasmPtr` is used to mutate memory. + /// /// `str` has invariants that must not be broken by mutating Wasm memory. - /// Thus the caller must ensure exclusive access to memory or otherwise ensure - /// that the backing memory is not modified while the reference is held. + /// Thus the caller must ensure that the backing memory is not modified + /// while the reference is held. /// /// Additionally, if `memory` is dynamic, the caller must also ensure that `memory` /// is not grown while the reference is held. @@ -209,10 +213,8 @@ impl WasmPtr { std::str::from_utf8(slice).ok() } - /// Get a UTF-8 string from the `WasmPtr` with the given length. + /// Get a UTF-8 `String` from the `WasmPtr` with the given length. /// - /// Note that this method returns a reference to Wasm linear memory. The - /// underlying data can be mutated if the Wasm is allowed to execute or /// an aliasing `WasmPtr` is used to mutate memory. pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { unsafe { self.get_utf8_str(memory, str_len).map(|s| s.to_owned()) } @@ -223,10 +225,6 @@ impl WasmPtr { /// Note that this does not account for UTF-8 strings that _contain_ nul themselves, /// [`WasmPtr::get_utf8_str`] has to be used for those. /// - /// Also note that this method returns a reference to Wasm linear memory. The - /// underlying data can be mutated if the Wasm is allowed to execute or - /// an aliasing `WasmPtr` is used to mutate memory. - /// /// # Safety /// This method behaves similarly to [`WasmPtr::get_utf8_str`], all safety invariants on /// that method must also be upheld here. @@ -238,14 +236,10 @@ impl WasmPtr { .and_then(|length| self.get_utf8_str(memory, length as u32)) } - /// Get a UTF-8 string from the `WasmPtr`, where the string is nul-terminated. + /// Get a UTF-8 `String` from the `WasmPtr`, where the string is nul-terminated. /// /// Note that this does not account for UTF-8 strings that _contain_ nul themselves, /// [`WasmPtr::get_utf8_string`] has to be used for those. - /// - /// Also note that this method returns a reference to Wasm linear memory. The - /// underlying data can be mutated if the Wasm is allowed to execute or - /// an aliasing `WasmPtr` is used to mutate memory. pub fn get_utf8_string_with_nul(self, memory: &Memory) -> Option { unsafe { self.get_utf8_str_with_nul(memory) }.map(|s| s.to_owned()) } From b592ed83afb9742853cc6d9458fa28457c0290a4 Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Mon, 4 Jan 2021 07:45:57 -0800 Subject: [PATCH 3/3] Reimplement `WasmPtr::get_utf8_string` to be fully sound --- lib/api/src/ptr.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/api/src/ptr.rs b/lib/api/src/ptr.rs index cf34c8fa80f..d7d8b6f8061 100644 --- a/lib/api/src/ptr.rs +++ b/lib/api/src/ptr.rs @@ -217,7 +217,25 @@ impl WasmPtr { /// /// an aliasing `WasmPtr` is used to mutate memory. pub fn get_utf8_string(self, memory: &Memory, str_len: u32) -> Option { - unsafe { self.get_utf8_str(memory, str_len).map(|s| s.to_owned()) } + let memory_size = memory.size().bytes().0; + if self.offset as usize + str_len as usize > memory.size().bytes().0 + || self.offset as usize >= memory_size + { + return None; + } + + // TODO: benchmark the internals of this function: there is likely room for + // micro-optimization here and this may be a fairly common function in user code. + let view = memory.view::(); + + let mut vec: Vec = Vec::with_capacity(str_len as usize); + let base = self.offset as usize; + for i in 0..(str_len as usize) { + let byte = view[base + i].get(); + vec.push(byte); + } + + String::from_utf8(vec).ok() } /// Get a UTF-8 string from the `WasmPtr`, where the string is nul-terminated.