diff --git a/Cargo.lock b/Cargo.lock index 0bff336d784..3b77b0e6fc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3421,6 +3421,12 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "replace_with" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690" + [[package]] name = "reqwest" version = "0.11.18" @@ -4995,6 +5001,7 @@ dependencies = [ "libc", "pin-project-lite", "pretty_assertions", + "replace_with", "slab", "tempfile", "thiserror", @@ -6423,9 +6430,9 @@ dependencies = [ [[package]] name = "webc" -version = "5.0.0" +version = "5.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06bee486f9207604f99bfa3c95afcd03272d95db5872c6c1b11470be4390d514" +checksum = "42af14e63ed784e4f813bd5fb35bc84016fa8b245879809547247da6051107f0" dependencies = [ "anyhow", "base64", diff --git a/lib/cli/src/commands/run_unstable.rs b/lib/cli/src/commands/run_unstable.rs index 9a48d84fb93..9ced59358f2 100644 --- a/lib/cli/src/commands/run_unstable.rs +++ b/lib/cli/src/commands/run_unstable.rs @@ -20,7 +20,7 @@ use sha2::{Digest, Sha256}; use tempfile::NamedTempFile; use tokio::runtime::Handle; use url::Url; -use wapm_targz_to_pirita::FileMap; +use wapm_targz_to_pirita::{FileMap, TransformManifestFunctions}; use wasmer::{ DeserializeError, Engine, Function, Imports, Instance, Module, Store, Type, TypedFunction, Value, @@ -176,7 +176,7 @@ impl RunUnstable { } } - #[tracing::instrument(skip_all)] + #[tracing::instrument(level = "debug", skip_all)] fn load_injected_packages(&self, runtime: &dyn Runtime) -> Result, Error> { let mut dependencies = Vec::new(); @@ -494,26 +494,10 @@ enum ExecutableTarget { impl ExecutableTarget { /// Try to load a Wasmer package from a directory containing a `wasmer.toml` /// file. - #[tracing::instrument(skip_all)] fn from_dir(dir: &Path, runtime: &dyn Runtime) -> Result { - let mut files = BTreeMap::new(); - load_files_from_disk(&mut files, dir, dir)?; - - let wasmer_toml = DirOrFile::File("wasmer.toml".into()); - if let Some(toml_data) = files.remove(&wasmer_toml) { - // HACK(Michael-F-Bryan): The version of wapm-targz-to-pirita we are - // using doesn't know we renamed "wapm.toml" to "wasmer.toml", so we - // manually patch things up if people have already migrated their - // projects. - files - .entry(DirOrFile::File("wapm.toml".into())) - .or_insert(toml_data); - } - - let functions = wapm_targz_to_pirita::TransformManifestFunctions::default(); - let webc = wapm_targz_to_pirita::generate_webc_file(files, dir, None, &functions)?; - + let webc = construct_webc_in_memory(dir)?; let container = Container::from_bytes(webc)?; + let pkg = runtime .task_manager() .block_on(BinaryPackage::from_webc(&container, runtime))?; @@ -554,6 +538,28 @@ impl ExecutableTarget { } } +#[tracing::instrument(level = "debug", skip_all)] +fn construct_webc_in_memory(dir: &Path) -> Result, Error> { + let mut files = BTreeMap::new(); + load_files_from_disk(&mut files, dir, dir)?; + + let wasmer_toml = DirOrFile::File("wasmer.toml".into()); + if let Some(toml_data) = files.remove(&wasmer_toml) { + // HACK(Michael-F-Bryan): The version of wapm-targz-to-pirita we are + // using doesn't know we renamed "wapm.toml" to "wasmer.toml", so we + // manually patch things up if people have already migrated their + // projects. + files + .entry(DirOrFile::File("wapm.toml".into())) + .or_insert(toml_data); + } + + let functions = TransformManifestFunctions::default(); + let webc = wapm_targz_to_pirita::generate_webc_file(files, dir, None, &functions)?; + + Ok(webc) +} + fn load_files_from_disk(files: &mut FileMap, dir: &Path, base: &Path) -> Result<(), Error> { let entries = dir .read_dir() diff --git a/lib/emscripten/src/env/mod.rs b/lib/emscripten/src/env/mod.rs index a560a416912..5513b7a691e 100644 --- a/lib/emscripten/src/env/mod.rs +++ b/lib/emscripten/src/env/mod.rs @@ -26,7 +26,7 @@ pub fn call_malloc(mut ctx: &mut FunctionEnvMut, size: u32) -> u32 { malloc_ref.call(&mut ctx, size).unwrap() } -#[warn(dead_code)] +#[allow(dead_code)] pub fn call_malloc_with_cast(ctx: &mut FunctionEnvMut, size: u32) -> WasmPtr { WasmPtr::new(call_malloc(ctx, size)) } diff --git a/lib/virtual-fs/Cargo.toml b/lib/virtual-fs/Cargo.toml index 8b532aca15f..e1b408f2423 100644 --- a/lib/virtual-fs/Cargo.toml +++ b/lib/virtual-fs/Cargo.toml @@ -26,6 +26,7 @@ bytes = "1" tokio = { version = "1", features = ["io-util", "sync", "macros"], default_features = false } pin-project-lite = "0.2.9" indexmap = "1.9.2" +replace_with = "0.1.7" [target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies] getrandom = { version = "0.2" } diff --git a/lib/virtual-fs/src/buffer_file.rs b/lib/virtual-fs/src/buffer_file.rs new file mode 100644 index 00000000000..9af66811cbf --- /dev/null +++ b/lib/virtual-fs/src/buffer_file.rs @@ -0,0 +1,103 @@ +//! Used for /dev/zero - infinitely returns zero +//! which is useful for commands like `dd if=/dev/zero of=bigfile.img size=1G` + +use std::io::{self, *}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite}; + +use crate::VirtualFile; + +#[derive(Debug, Default)] +pub struct BufferFile { + pub(crate) data: Cursor>, +} + +impl AsyncSeek for BufferFile { + fn start_seek(mut self: Pin<&mut Self>, position: io::SeekFrom) -> io::Result<()> { + let data = Pin::new(&mut self.data); + data.start_seek(position) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_complete(cx) + } +} + +impl AsyncWrite for BufferFile { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_write(cx, buf) + } + + fn poll_write_vectored( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[io::IoSlice<'_>], + ) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_write_vectored(cx, bufs) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_flush(cx) + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_shutdown(cx) + } +} + +impl AsyncRead for BufferFile { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_read(cx, buf) + } +} + +impl VirtualFile for BufferFile { + fn last_accessed(&self) -> u64 { + 0 + } + fn last_modified(&self) -> u64 { + 0 + } + fn created_time(&self) -> u64 { + 0 + } + fn size(&self) -> u64 { + self.data.get_ref().len() as u64 + } + fn set_len(&mut self, new_size: u64) -> crate::Result<()> { + self.data.get_mut().resize(new_size as usize, 0); + Ok(()) + } + fn unlink(&mut self) -> crate::Result<()> { + Ok(()) + } + fn poll_read_ready(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + let cur = self.data.stream_position().unwrap_or_default(); + let len = self.data.seek(SeekFrom::End(0)).unwrap_or_default(); + if cur < len { + Poll::Ready(Ok((len - cur) as usize)) + } else { + Poll::Ready(Ok(0)) + } + } + + fn poll_write_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(8192)) + } +} diff --git a/lib/virtual-fs/src/cow_file.rs b/lib/virtual-fs/src/cow_file.rs new file mode 100644 index 00000000000..fd213936408 --- /dev/null +++ b/lib/virtual-fs/src/cow_file.rs @@ -0,0 +1,229 @@ +//! Used for /dev/zero - infinitely returns zero +//! which is useful for commands like `dd if=/dev/zero of=bigfile.img size=1G` + +use replace_with::replace_with_or_abort; +use std::io::{self, *}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; + +use crate::{BufferFile, VirtualFile}; + +#[derive(Debug)] +enum CowState { + ReadOnly(Box), + Copying { + pos: u64, + inner: Box, + }, + Copied, +} +impl CowState { + fn as_ref(&self) -> Option<&(dyn VirtualFile + Send + Sync)> { + match self { + Self::ReadOnly(inner) => Some(inner.as_ref()), + Self::Copying { inner, .. } => Some(inner.as_ref()), + _ => None, + } + } + fn as_mut(&mut self) -> Option<&mut Box> { + match self { + Self::ReadOnly(inner) => Some(inner), + Self::Copying { inner, .. } => Some(inner), + _ => None, + } + } +} + +#[derive(Debug)] +pub struct CopyOnWriteFile { + last_accessed: u64, + last_modified: u64, + created_time: u64, + state: CowState, + buf: BufferFile, +} + +impl CopyOnWriteFile { + pub fn new(inner: Box) -> Self { + Self { + last_accessed: inner.last_accessed(), + last_modified: inner.last_modified(), + created_time: inner.created_time(), + state: CowState::ReadOnly(inner), + buf: BufferFile::default(), + } + } + fn poll_copy_progress(&mut self, cx: &mut Context) -> Poll> { + if let CowState::Copying { ref mut inner, pos } = &mut self.state { + let mut temp = [0u8; 8192]; + + while *pos < inner.size() { + let mut read_temp = ReadBuf::new(&mut temp); + + if let Err(err) = Pin::new(inner.as_mut()).start_seek(SeekFrom::Start(*pos)) { + return Poll::Ready(Err(err)); + } + match Pin::new(inner.as_mut()).poll_complete(cx).map_ok(|_| ()) { + Poll::Ready(Ok(())) => {} + p => return p, + } + match Pin::new(inner.as_mut()).poll_read(cx, &mut read_temp) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + if read_temp.remaining() == 0 { + return Poll::Pending; + } + *pos += read_temp.remaining() as u64; + + self.buf.data.write_all(read_temp.filled()).unwrap(); + } + self.state = CowState::Copied; + } + Poll::Ready(Ok(())) + } + fn poll_copy_start_and_progress(&mut self, cx: &mut Context) -> Poll> { + replace_with_or_abort(&mut self.state, |state| match state { + CowState::ReadOnly(inner) => CowState::Copying { pos: 0, inner }, + state => state, + }); + self.poll_copy_progress(cx) + } +} + +impl AsyncSeek for CopyOnWriteFile { + fn start_seek(mut self: Pin<&mut Self>, position: io::SeekFrom) -> io::Result<()> { + let data = Pin::new(&mut self.buf); + data.start_seek(position)?; + + if let Some(inner) = self.state.as_mut() { + Pin::new(inner.as_mut()).start_seek(position)?; + } + + Ok(()) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.state.as_mut() { + Some(inner) => Pin::new(inner.as_mut()).poll_complete(cx), + None => { + let data = Pin::new(&mut self.buf); + data.poll_complete(cx) + } + } + } +} + +impl AsyncWrite for CopyOnWriteFile { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + match self.poll_copy_start_and_progress(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + let data = Pin::new(&mut self.buf); + data.poll_write(cx, buf) + } + + fn poll_write_vectored( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[io::IoSlice<'_>], + ) -> Poll> { + match self.poll_copy_start_and_progress(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + let data = Pin::new(&mut self.buf); + data.poll_write_vectored(cx, bufs) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.poll_copy_start_and_progress(cx) { + Poll::Ready(Ok(())) => {} + p => return p, + } + let data = Pin::new(&mut self.buf); + data.poll_flush(cx) + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.poll_copy_start_and_progress(cx) { + Poll::Ready(Ok(())) => {} + p => return p, + } + let data = Pin::new(&mut self.buf); + data.poll_shutdown(cx) + } +} + +impl AsyncRead for CopyOnWriteFile { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + match self.poll_copy_progress(cx) { + Poll::Ready(Ok(())) => {} + p => return p, + } + match self.state.as_mut() { + Some(inner) => Pin::new(inner.as_mut()).poll_read(cx, buf), + None => { + let data = Pin::new(&mut self.buf); + data.poll_read(cx, buf) + } + } + } +} + +impl VirtualFile for CopyOnWriteFile { + fn last_accessed(&self) -> u64 { + self.last_accessed + } + fn last_modified(&self) -> u64 { + self.last_modified + } + fn created_time(&self) -> u64 { + self.created_time + } + fn size(&self) -> u64 { + match self.state.as_ref() { + Some(inner) => inner.size(), + None => self.buf.size(), + } + } + fn set_len(&mut self, new_size: u64) -> crate::Result<()> { + self.buf.set_len(new_size) + } + fn unlink(&mut self) -> crate::Result<()> { + self.buf.set_len(0)?; + Ok(()) + } + fn poll_read_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.poll_copy_progress(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + match self.state.as_mut() { + Some(inner) => Pin::new(inner.as_mut()).poll_read_ready(cx), + None => { + let data: Pin<&mut BufferFile> = Pin::new(&mut self.buf); + data.poll_read_ready(cx) + } + } + } + + fn poll_write_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.poll_copy_progress(cx).map_ok(|_| 8192) + } +} diff --git a/lib/virtual-fs/src/lib.rs b/lib/virtual-fs/src/lib.rs index f39a05b15de..ea72de475ff 100644 --- a/lib/virtual-fs/src/lib.rs +++ b/lib/virtual-fs/src/lib.rs @@ -5,6 +5,7 @@ extern crate pretty_assertions; use std::any::Any; use std::ffi::OsString; use std::fmt; +use std::future::Future; use std::io; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -16,8 +17,10 @@ use thiserror::Error; pub mod arc_box_file; pub mod arc_file; pub mod arc_fs; +pub mod buffer_file; pub mod builder; pub mod combine_file; +pub mod cow_file; pub mod dual_write_file; pub mod empty_fs; #[cfg(feature = "host-fs")] @@ -48,8 +51,10 @@ pub mod limiter; pub use arc_box_file::*; pub use arc_file::*; pub use arc_fs::*; +pub use buffer_file::*; pub use builder::*; pub use combine_file::*; +pub use cow_file::*; pub use dual_write_file::*; pub use empty_fs::*; pub use filesystems::FileSystems; @@ -343,6 +348,20 @@ pub trait VirtualFile: None } + /// This method will copy a file from a source to this destination where + /// the default is to do a straight byte copy however file system implementors + /// may optimize this to do a zero copy + fn copy_reference<'a>( + &'a mut self, + mut src: Box, + ) -> Pin> + 'a>> { + Box::pin(async move { + let bytes_written = tokio::io::copy(&mut src, self).await?; + tracing::trace!(bytes_written, "Copying file into host filesystem",); + Ok(()) + }) + } + /// Polls the file for when there is data to be read fn poll_read_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; diff --git a/lib/virtual-fs/src/mem_fs/file.rs b/lib/virtual-fs/src/mem_fs/file.rs index cc439ce71af..cf18889ca68 100644 --- a/lib/virtual-fs/src/mem_fs/file.rs +++ b/lib/virtual-fs/src/mem_fs/file.rs @@ -7,7 +7,7 @@ use tokio::io::{AsyncSeek, AsyncWrite}; use super::*; use crate::limiter::TrackedVec; -use crate::{FsError, Result, VirtualFile}; +use crate::{CopyOnWriteFile, FsError, Result, VirtualFile}; use std::borrow::Cow; use std::cmp; use std::convert::TryInto; @@ -276,6 +276,29 @@ impl VirtualFile for FileHandle { } } + fn copy_reference<'a>( + &'a mut self, + src: Box, + ) -> Pin> + 'a>> { + let inner = self.filesystem.inner.clone(); + Box::pin(async move { + let mut fs = inner.write().unwrap(); + let inode = fs.storage.get_mut(self.inode); + match inode { + Some(inode) => { + *inode = Node::CustomFile(CustomFileNode { + inode: inode.inode(), + name: inode.name().to_string_lossy().to_string().into(), + file: Mutex::new(Box::new(CopyOnWriteFile::new(src))), + metadata: inode.metadata().clone(), + }); + Ok(()) + } + None => Err(std::io::ErrorKind::InvalidInput.into()), + } + }) + } + fn poll_read_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { if !self.readable { return Poll::Ready(Err(io::Error::new( diff --git a/lib/wasi/Cargo.toml b/lib/wasi/Cargo.toml index 5b6826533b8..e3fd39cf7d1 100644 --- a/lib/wasi/Cargo.toml +++ b/lib/wasi/Cargo.toml @@ -29,7 +29,7 @@ bincode = { version = "1.3" } chrono = { version = "^0.4", default-features = false, features = [ "wasmbind", "std", "clock" ], optional = true } derivative = { version = "^2" } bytes = "1" -webc = { version = "5.0", default-features = false } +webc = { version = "5.0.2", default-features = false } serde_cbor = { version = "0.11.2", optional = true } anyhow = { version = "1.0.66" } lazy_static = "1.4" diff --git a/lib/wasi/src/fs/mod.rs b/lib/wasi/src/fs/mod.rs index 72fc2ed9a98..9de78400925 100644 --- a/lib/wasi/src/fs/mod.rs +++ b/lib/wasi/src/fs/mod.rs @@ -4,7 +4,7 @@ mod notification; use std::{ borrow::{Borrow, Cow}, - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, ops::{Deref, DerefMut}, path::{Component, Path, PathBuf}, sync::{ @@ -14,6 +14,7 @@ use std::{ }; use crate::state::{Stderr, Stdin, Stdout}; +use futures::TryStreamExt; #[cfg(feature = "enable-serde")] use serde_derive::{Deserialize, Serialize}; use tokio::io::AsyncWriteExt; @@ -268,6 +269,25 @@ pub enum WasiFsRoot { Backing(Arc>), } +impl WasiFsRoot { + /// Merge the contents of a filesystem into this one. + pub(crate) async fn merge( + &self, + other: &Arc, + ) -> Result<(), virtual_fs::FsError> { + match self { + WasiFsRoot::Sandbox(fs) => { + fs.union(other); + Ok(()) + } + WasiFsRoot::Backing(fs) => { + merge_filesystems(other, fs).await?; + Ok(()) + } + } + } +} + impl FileSystem for WasiFsRoot { fn read_dir(&self, path: &Path) -> virtual_fs::Result { match self { @@ -319,6 +339,78 @@ impl FileSystem for WasiFsRoot { } } +/// Merge the contents of one filesystem into another. +/// +#[tracing::instrument(level = "debug", skip_all)] +async fn merge_filesystems( + source: &dyn FileSystem, + destination: &dyn FileSystem, +) -> Result<(), virtual_fs::FsError> { + tracing::debug!("Falling back to a recursive copy to merge filesystems"); + let files = futures::stream::FuturesUnordered::new(); + + let mut to_check = VecDeque::new(); + to_check.push_back(PathBuf::from("/")); + + while let Some(path) = to_check.pop_front() { + let metadata = source.metadata(&path)?; + + if metadata.is_dir() { + create_dir_all(destination, &path)?; + + for entry in source.read_dir(&path)? { + let entry = entry?; + to_check.push_back(entry.path); + } + } else if metadata.is_file() { + files.push(async move { + copy_reference(source, destination, &path) + .await + .map_err(virtual_fs::FsError::from) + }); + } else { + tracing::debug!( + path=%path.display(), + ?metadata, + "Skipping unknown file type while merging" + ); + } + } + + files.try_collect().await +} + +#[tracing::instrument(level = "trace", skip_all, fields(path=%path.display()))] +async fn copy_reference( + source: &dyn FileSystem, + destination: &dyn FileSystem, + path: &Path, +) -> Result<(), std::io::Error> { + let src = source.new_open_options().read(true).open(path)?; + let mut dst = destination + .new_open_options() + .create(true) + .write(true) + .truncate(true) + .open(path)?; + + dst.copy_reference(src).await +} + +fn create_dir_all(fs: &dyn FileSystem, path: &Path) -> Result<(), virtual_fs::FsError> { + if fs.metadata(path).is_ok() { + return Ok(()); + } + + if let Some(parent) = path.parent() { + create_dir_all(fs, parent)?; + } + + fs.create_dir(path)?; + + Ok(()) +} + /// Warning, modifying these fields directly may cause invariants to break and /// should be considered unsafe. These fields may be made private in a future release #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] diff --git a/lib/wasi/src/runtime/package_loader/builtin_loader.rs b/lib/wasi/src/runtime/package_loader/builtin_loader.rs index a1077e33819..ea1f62da2c5 100644 --- a/lib/wasi/src/runtime/package_loader/builtin_loader.rs +++ b/lib/wasi/src/runtime/package_loader/builtin_loader.rs @@ -89,13 +89,20 @@ impl BuiltinPackageLoader { async fn download(&self, dist: &DistributionInfo) -> Result { if dist.webc.scheme() == "file" { - // Note: The Url::to_file_path() method is platform-specific - #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] - if let Ok(path) = dist.webc.to_file_path() { - // FIXME: This will block the thread - let bytes = std::fs::read(&path) - .with_context(|| format!("Unable to read \"{}\"", path.display()))?; - return Ok(bytes.into()); + match crate::runtime::resolver::utils::file_path_from_url(&dist.webc) { + Ok(path) => { + // FIXME: This will block the thread + let bytes = std::fs::read(&path) + .with_context(|| format!("Unable to read \"{}\"", path.display()))?; + return Ok(bytes.into()); + } + Err(e) => { + tracing::debug!( + url=%dist.webc, + error=&*e, + "Unable to convert the file:// URL to a path", + ); + } } } diff --git a/lib/wasi/src/runtime/resolver/inputs.rs b/lib/wasi/src/runtime/resolver/inputs.rs index b835b6e7fac..e289277830b 100644 --- a/lib/wasi/src/runtime/resolver/inputs.rs +++ b/lib/wasi/src/runtime/resolver/inputs.rs @@ -47,6 +47,21 @@ impl FromStr for PackageSpecifier { type Err = anyhow::Error; fn from_str(s: &str) -> Result { + // There is no function in std for checking if a string is a valid path + // and we can't do Path::new(s).exists() because that assumes the + // package being specified is on the local filesystem, so let's make a + // best-effort guess. + if s.starts_with('.') || s.starts_with('/') { + return Ok(PackageSpecifier::Path(s.into())); + } + #[cfg(windows)] + if s.contains('\\') { + return Ok(PackageSpecifier::Path(s.into())); + } + if Path::new(s).exists() { + return Ok(PackageSpecifier::Path(s.into())); + } + if let Ok(url) = Url::parse(s) { if url.has_host() { return Ok(PackageSpecifier::Url(url)); @@ -351,6 +366,16 @@ pub(crate) mod tests { "https://wapm/io/namespace/package@1.0.0", PackageSpecifier::Url("https://wapm/io/namespace/package@1.0.0".parse().unwrap()), ), + ( + "/path/to/some/file.webc", + PackageSpecifier::Path("/path/to/some/file.webc".into()), + ), + ("./file.webc", PackageSpecifier::Path("./file.webc".into())), + #[cfg(windows)] + ( + r"C:\Path\to\some\file.webc", + PackageSpecifier::Path(r"C:\Path\to\some\file.webc".into()), + ), ]; for (src, expected) in inputs { diff --git a/lib/wasi/src/runtime/resolver/utils.rs b/lib/wasi/src/runtime/resolver/utils.rs index 521215b2cb2..03c018cc5f4 100644 --- a/lib/wasi/src/runtime/resolver/utils.rs +++ b/lib/wasi/src/runtime/resolver/utils.rs @@ -1,6 +1,6 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; -use anyhow::Error; +use anyhow::{Context, Error}; use http::{HeaderMap, StatusCode}; use url::Url; @@ -56,13 +56,39 @@ pub(crate) fn http_error(response: &HttpResponse) -> Error { Error::msg(status) } +pub(crate) fn file_path_from_url(url: &Url) -> Result { + debug_assert_eq!(url.scheme(), "file"); + + // Note: The Url::to_file_path() method is platform-specific + cfg_if::cfg_if! { + if #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] { + if let Ok(path) = url.to_file_path() { + return Ok(path); + } + + // Sometimes we'll get a UNC-like path (e.g. + // "file:///?\\C:/\\/path/to/file.txt") and Url::to_file_path() + // won't be able to handle the "\\?" so we try to "massage" the URL + // a bit. + // See for more. + let modified = url.as_str().replace(r"\\?", "").replace("//?", "").replace('\\', "/"); + Url::parse(&modified) + .ok() + .and_then(|url| url.to_file_path().ok()) + .context("Unable to extract the file path") + } else { + anyhow::bail!("Url::to_file_path() is not supported on this platform"); + } + } +} + #[cfg(test)] mod tests { use super::*; #[test] #[cfg(unix)] - fn behaviour_is_identical() { + fn from_file_path_behaviour_is_identical() { let inputs = [ "/", "/path", @@ -78,4 +104,17 @@ mod tests { assert_eq!(got, expected, "Mismatch for \"{path}\""); } } + + #[test] + #[cfg(windows)] + fn to_file_path_can_handle_unc_paths() { + let path = Path::new(env!("CARGO_MANIFEST_DIR")) + .canonicalize() + .unwrap(); + let url = Url::from_file_path(&path).unwrap(); + + let got = file_path_from_url(&url).unwrap(); + + assert_eq!(got.canonicalize().unwrap(), path); + } } diff --git a/lib/wasi/src/state/builder.rs b/lib/wasi/src/state/builder.rs index ee6208c21e3..7fbb0dc223f 100644 --- a/lib/wasi/src/state/builder.rs +++ b/lib/wasi/src/state/builder.rs @@ -269,19 +269,28 @@ impl WasiEnvBuilder { &mut self.args } - /// Adds a container this module inherits from + /// Adds a container this module inherits from. + /// + /// This will make all of the container's files and commands available to the + /// resulting WASI instance. pub fn use_webc(mut self, pkg: BinaryPackage) -> Self { self.add_webc(pkg); self } - /// Adds a container this module inherits from + /// Adds a container this module inherits from. + /// + /// This will make all of the container's files and commands available to the + /// resulting WASI instance. pub fn add_webc(&mut self, pkg: BinaryPackage) -> &mut Self { self.uses.push(pkg); self } - /// Adds a list of other containers this module inherits from + /// Adds a list of other containers this module inherits from. + /// + /// This will make all of the container's files and commands available to the + /// resulting WASI instance. pub fn uses(mut self, uses: I) -> Self where I: IntoIterator, diff --git a/lib/wasi/src/state/env.rs b/lib/wasi/src/state/env.rs index fa3f8010ec7..44b17b6c103 100644 --- a/lib/wasi/src/state/env.rs +++ b/lib/wasi/src/state/env.rs @@ -849,18 +849,15 @@ impl WasiEnv { /// [cmd-atom]: crate::bin_factory::BinaryPackageCommand::atom() /// [pkg-fs]: crate::bin_factory::BinaryPackage::webc_fs pub fn use_package(&self, pkg: &BinaryPackage) -> Result<(), WasiStateCreationError> { - // PERF: We should avoid all these copies in the WasiFsRoot::Backing case. - let root_fs = &self.state.fs.root_fs; + // We first need to copy any files in the package over to the - // temporary file system - match root_fs { - WasiFsRoot::Sandbox(root_fs) => { - root_fs.union(&pkg.webc_fs); - } - WasiFsRoot::Backing(_fs) => { - tracing::warn!("TODO: Manually copy each file across one-by-one"); - } + // main file system + if let Err(e) = self.tasks().block_on(root_fs.merge(&pkg.webc_fs)) { + warn!( + error = &e as &dyn std::error::Error, + "Unable to merge the package's filesystem into the main one", + ); } // Next, make sure all commands will be available diff --git a/lib/wasi/tests/runners.rs b/lib/wasi/tests/runners.rs index 71d5926c405..790f36b3cdf 100644 --- a/lib/wasi/tests/runners.rs +++ b/lib/wasi/tests/runners.rs @@ -49,9 +49,8 @@ mod wasi { .with_args(["--version"]) .run_command("wat2wasm", &pkg, Arc::new(rt)) }); - let result = handle.join().unwrap(); - assert!(result.is_ok()); + handle.join().unwrap().expect("Runner failed"); } #[tokio::test] @@ -68,13 +67,11 @@ mod wasi { }); let err = handle.join().unwrap().unwrap_err(); - let runtime_error = err - .chain() - .find_map(|e| e.downcast_ref::()) - .unwrap(); + let runtime_error = err.chain().find_map(|e| e.downcast_ref::()); let exit_code = match runtime_error { - WasiError::Exit(code) => *code, - other => unreachable!("Something else went wrong: {:?}", other), + Some(WasiError::Exit(code)) => *code, + Some(other) => panic!("Something else went wrong: {:?}", other), + None => panic!("Not a WasiError: {:?}", err), }; assert_eq!(exit_code.raw(), 42); } diff --git a/tests/integration/cli/tests/run.rs b/tests/integration/cli/tests/run.rs index 9f7c7d16f65..102d3cde405 100644 --- a/tests/integration/cli/tests/run.rs +++ b/tests/integration/cli/tests/run.rs @@ -704,9 +704,11 @@ fn run_test_caching_works_for_packages_with_versions() -> anyhow::Result<()> { .arg("test.py") .output()?; - if output.stdout != b"hello\n".to_vec() { - panic!("failed to run https://wapm.io/python/python for the first time"); - } + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "hello\n", + "failed to run https://wapm.io/python/python for the first time" + ); let time = std::time::Instant::now(); @@ -718,9 +720,11 @@ fn run_test_caching_works_for_packages_with_versions() -> anyhow::Result<()> { dbg!(&output); - if output.stdout != b"hello\n".to_vec() { - panic!("failed to run https://wapm.io/python/python for the second time"); - } + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "hello\n", + "failed to run https://wapm.io/python/python for the second time" + ); // package should be cached assert!(std::time::Instant::now() - time < std::time::Duration::from_secs(1)); diff --git a/tests/integration/cli/tests/run_unstable.rs b/tests/integration/cli/tests/run_unstable.rs index 9d4ea6e9c5e..8d418f3dad7 100644 --- a/tests/integration/cli/tests/run_unstable.rs +++ b/tests/integration/cli/tests/run_unstable.rs @@ -245,6 +245,34 @@ mod webc_on_disk { assert.success().stdout(contains("Hello, World!")); } + + #[test] + #[cfg_attr( + all(target_env = "musl", target_os = "linux"), + ignore = "wasmer run-unstable segfaults on musl" + )] + #[cfg_attr( + windows, + ignore = "FIXME(Michael-F-Bryan): Temporarily broken on Windows - https://github.com/wasmerio/wasmer/issues/3929" + )] + fn merged_filesystem_contains_all_files() { + let assert = wasmer_run_unstable() + .arg(fixtures::bash()) + .arg("--entrypoint=bash") + .arg("--use") + .arg(fixtures::coreutils()) + .arg("--use") + .arg(fixtures::python()) + .arg("--") + .arg("-c") + .arg("ls -l /usr/coreutils/*.md && ls -l /lib/python3.6/*.py") + .assert(); + + assert + .success() + .stdout(contains("/usr/coreutils/README.md")) + .stdout(contains("/lib/python3.6/this.py")); + } } mod wasm_on_disk { @@ -324,24 +352,28 @@ mod wasm_on_disk { } } -#[test] -#[cfg_attr( - all(target_env = "musl", target_os = "linux"), - ignore = "wasmer run-unstable segfaults on musl" -)] -fn wasmer_package_directory() { - let temp = TempDir::new().unwrap(); - std::fs::copy(fixtures::qjs(), temp.path().join("qjs.wasm")).unwrap(); - std::fs::copy(fixtures::qjs_wasmer_toml(), temp.path().join("wasmer.toml")).unwrap(); - - let assert = wasmer_run_unstable() - .arg(temp.path()) - .arg("--") - .arg("--eval") - .arg("console.log('Hello, World!')") - .assert(); +mod local_directory { + use super::*; - assert.success().stdout(contains("Hello, World!")); + #[test] + #[cfg_attr( + all(target_env = "musl", target_os = "linux"), + ignore = "wasmer run-unstable segfaults on musl" + )] + fn wasmer_package_directory() { + let temp = TempDir::new().unwrap(); + std::fs::copy(fixtures::qjs(), temp.path().join("qjs.wasm")).unwrap(); + std::fs::copy(fixtures::qjs_wasmer_toml(), temp.path().join("wasmer.toml")).unwrap(); + + let assert = wasmer_run_unstable() + .arg(temp.path()) + .arg("--") + .arg("--eval") + .arg("console.log('Hello, World!')") + .assert(); + + assert.success().stdout(contains("Hello, World!")); + } } mod remote_webc { @@ -402,6 +434,9 @@ mod remote_webc { .arg("ls /bin") .assert(); + // Note: the resulting filesystem should contain the main command as + // well as the commands from all the --use packages + let some_expected_binaries = [ "arch", "base32", "base64", "baseenc", "basename", "bash", "cat", ] @@ -427,6 +462,13 @@ mod fixtures { .join("coreutils-1.0.16-e27dbb4f-2ef2-4b44-b46a-ddd86497c6d7.webc") } + pub fn bash() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("webc") + .join("bash-1.0.16-f097441a-a80b-4e0d-87d7-684918ef4bb6.webc") + } + /// A WEBC file containing `wat2wasm`, `wasm-validate`, and other helpful /// WebAssembly-related commands. pub fn wabt() -> PathBuf { diff --git a/tests/lib/compiler-test-derive/src/lib.rs b/tests/lib/compiler-test-derive/src/lib.rs index 0948f3e26bb..aa45272cc13 100644 --- a/tests/lib/compiler-test-derive/src/lib.rs +++ b/tests/lib/compiler-test-derive/src/lib.rs @@ -5,7 +5,7 @@ use proc_macro::TokenStream; #[cfg(test)] use proc_macro2::TokenStream; use quote::quote; -use std::path::PathBuf; +use std::path::Path; #[cfg(not(test))] use syn::parse; #[cfg(test)] @@ -45,9 +45,11 @@ pub fn compiler_test(attrs: TokenStream, input: TokenStream) -> TokenStream { // Let's build the ignores to append an `#[ignore]` macro to the // autogenerated tests in case the test appears in the `ignores.txt` path; - let mut ignores_txt_path = PathBuf::new(); - ignores_txt_path.push(env!("CARGO_MANIFEST_DIR")); - ignores_txt_path.push("../../ignores.txt"); + let tests_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(2) + .unwrap(); + let ignores_txt_path = tests_dir.join("ignores.txt"); let ignores = crate::ignores::Ignores::build_from_path(ignores_txt_path);