From 12b007229f21e8ea2cd64ca91d183231c53c00fb Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sat, 27 May 2023 04:33:14 +0800 Subject: [PATCH 01/19] Implement unioning for WasiEnv::use_package() when the root fs is WasiFsRoot::Backing(..) --- lib/cli/src/commands/run_unstable.rs | 46 ++++++------ lib/wasi/src/fs/mod.rs | 103 ++++++++++++++++++++++++++- lib/wasi/src/state/builder.rs | 15 +++- lib/wasi/src/state/env.rs | 17 ++--- 4 files changed, 147 insertions(+), 34 deletions(-) diff --git a/lib/cli/src/commands/run_unstable.rs b/lib/cli/src/commands/run_unstable.rs index 9a48d84fb93..9ced59358f2 100644 --- a/lib/cli/src/commands/run_unstable.rs +++ b/lib/cli/src/commands/run_unstable.rs @@ -20,7 +20,7 @@ use sha2::{Digest, Sha256}; use tempfile::NamedTempFile; use tokio::runtime::Handle; use url::Url; -use wapm_targz_to_pirita::FileMap; +use wapm_targz_to_pirita::{FileMap, TransformManifestFunctions}; use wasmer::{ DeserializeError, Engine, Function, Imports, Instance, Module, Store, Type, TypedFunction, Value, @@ -176,7 +176,7 @@ impl RunUnstable { } } - #[tracing::instrument(skip_all)] + #[tracing::instrument(level = "debug", skip_all)] fn load_injected_packages(&self, runtime: &dyn Runtime) -> Result, Error> { let mut dependencies = Vec::new(); @@ -494,26 +494,10 @@ enum ExecutableTarget { impl ExecutableTarget { /// Try to load a Wasmer package from a directory containing a `wasmer.toml` /// file. - #[tracing::instrument(skip_all)] fn from_dir(dir: &Path, runtime: &dyn Runtime) -> Result { - let mut files = BTreeMap::new(); - load_files_from_disk(&mut files, dir, dir)?; - - let wasmer_toml = DirOrFile::File("wasmer.toml".into()); - if let Some(toml_data) = files.remove(&wasmer_toml) { - // HACK(Michael-F-Bryan): The version of wapm-targz-to-pirita we are - // using doesn't know we renamed "wapm.toml" to "wasmer.toml", so we - // manually patch things up if people have already migrated their - // projects. - files - .entry(DirOrFile::File("wapm.toml".into())) - .or_insert(toml_data); - } - - let functions = wapm_targz_to_pirita::TransformManifestFunctions::default(); - let webc = wapm_targz_to_pirita::generate_webc_file(files, dir, None, &functions)?; - + let webc = construct_webc_in_memory(dir)?; let container = Container::from_bytes(webc)?; + let pkg = runtime .task_manager() .block_on(BinaryPackage::from_webc(&container, runtime))?; @@ -554,6 +538,28 @@ impl ExecutableTarget { } } +#[tracing::instrument(level = "debug", skip_all)] +fn construct_webc_in_memory(dir: &Path) -> Result, Error> { + let mut files = BTreeMap::new(); + load_files_from_disk(&mut files, dir, dir)?; + + let wasmer_toml = DirOrFile::File("wasmer.toml".into()); + if let Some(toml_data) = files.remove(&wasmer_toml) { + // HACK(Michael-F-Bryan): The version of wapm-targz-to-pirita we are + // using doesn't know we renamed "wapm.toml" to "wasmer.toml", so we + // manually patch things up if people have already migrated their + // projects. + files + .entry(DirOrFile::File("wapm.toml".into())) + .or_insert(toml_data); + } + + let functions = TransformManifestFunctions::default(); + let webc = wapm_targz_to_pirita::generate_webc_file(files, dir, None, &functions)?; + + Ok(webc) +} + fn load_files_from_disk(files: &mut FileMap, dir: &Path, base: &Path) -> Result<(), Error> { let entries = dir .read_dir() diff --git a/lib/wasi/src/fs/mod.rs b/lib/wasi/src/fs/mod.rs index 72fc2ed9a98..383fec86c22 100644 --- a/lib/wasi/src/fs/mod.rs +++ b/lib/wasi/src/fs/mod.rs @@ -4,7 +4,7 @@ mod notification; use std::{ borrow::{Borrow, Cow}, - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, ops::{Deref, DerefMut}, path::{Component, Path, PathBuf}, sync::{ @@ -14,6 +14,7 @@ use std::{ }; use crate::state::{Stderr, Stdin, Stdout}; +use futures::TryStreamExt; #[cfg(feature = "enable-serde")] use serde_derive::{Deserialize, Serialize}; use tokio::io::AsyncWriteExt; @@ -268,6 +269,25 @@ pub enum WasiFsRoot { Backing(Arc>), } +impl WasiFsRoot { + /// Merge the contents of a filesystem into this one. + pub(crate) async fn merge( + &self, + other: &Arc, + ) -> Result<(), virtual_fs::FsError> { + match self { + WasiFsRoot::Sandbox(fs) => { + fs.union(other); + Ok(()) + } + WasiFsRoot::Backing(fs) => { + merge_filesystems_expensive(other, fs).await?; + Ok(()) + } + } + } +} + impl FileSystem for WasiFsRoot { fn read_dir(&self, path: &Path) -> virtual_fs::Result { match self { @@ -319,6 +339,87 @@ impl FileSystem for WasiFsRoot { } } +/// Merge the contents of one filesystem into another. +/// +/// This is a pretty heavy-weight operation because it will copy the contents of +/// each file from `source` into `destination`. +#[tracing::instrument(level = "debug", skip_all)] +async fn merge_filesystems_expensive( + source: &dyn FileSystem, + destination: &dyn FileSystem, +) -> Result<(), virtual_fs::FsError> { + tracing::debug!("Falling back to a recursive copy to merge filesystems"); + let files = futures::stream::FuturesUnordered::new(); + + let mut to_check = VecDeque::new(); + to_check.push_back(PathBuf::from("/")); + + while let Some(path) = to_check.pop_front() { + let metadata = source.metadata(&path)?; + + if metadata.is_dir() { + create_dir_all(destination, &path)?; + + for entry in source.read_dir(&path)? { + let entry = entry?; + to_check.push_back(entry.path); + } + } else if metadata.is_file() { + files.push(async move { + copy_file(source, destination, &path) + .await + .map_err(virtual_fs::FsError::from) + }); + } else { + tracing::debug!( + path=%path.display(), + ?metadata, + "Skipping unknown file type while merging" + ); + } + } + + files.try_collect().await +} + +#[tracing::instrument(level = "trace", skip_all, fields(path=%path.display()))] +async fn copy_file( + source: &dyn FileSystem, + destination: &dyn FileSystem, + path: &Path, +) -> Result<(), std::io::Error> { + let mut src = source.new_open_options().read(true).open(path)?; + let mut dst = destination + .new_open_options() + .create(true) + .write(true) + .truncate(true) + .open(path)?; + let bytes_written = tokio::io::copy(&mut src, &mut dst).await?; + + tracing::trace!( + path=%path.display(), + bytes_written, + "Copying file into host filesystem", + ); + + Ok(()) +} + +fn create_dir_all(fs: &dyn FileSystem, path: &Path) -> Result<(), virtual_fs::FsError> { + if fs.metadata(path).is_ok() { + return Ok(()); + } + + if let Some(parent) = path.parent() { + create_dir_all(fs, parent)?; + } + + fs.create_dir(path)?; + + Ok(()) +} + /// Warning, modifying these fields directly may cause invariants to break and /// should be considered unsafe. These fields may be made private in a future release #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] diff --git a/lib/wasi/src/state/builder.rs b/lib/wasi/src/state/builder.rs index ee6208c21e3..7fbb0dc223f 100644 --- a/lib/wasi/src/state/builder.rs +++ b/lib/wasi/src/state/builder.rs @@ -269,19 +269,28 @@ impl WasiEnvBuilder { &mut self.args } - /// Adds a container this module inherits from + /// Adds a container this module inherits from. + /// + /// This will make all of the container's files and commands available to the + /// resulting WASI instance. pub fn use_webc(mut self, pkg: BinaryPackage) -> Self { self.add_webc(pkg); self } - /// Adds a container this module inherits from + /// Adds a container this module inherits from. + /// + /// This will make all of the container's files and commands available to the + /// resulting WASI instance. pub fn add_webc(&mut self, pkg: BinaryPackage) -> &mut Self { self.uses.push(pkg); self } - /// Adds a list of other containers this module inherits from + /// Adds a list of other containers this module inherits from. + /// + /// This will make all of the container's files and commands available to the + /// resulting WASI instance. pub fn uses(mut self, uses: I) -> Self where I: IntoIterator, diff --git a/lib/wasi/src/state/env.rs b/lib/wasi/src/state/env.rs index fa3f8010ec7..44b17b6c103 100644 --- a/lib/wasi/src/state/env.rs +++ b/lib/wasi/src/state/env.rs @@ -849,18 +849,15 @@ impl WasiEnv { /// [cmd-atom]: crate::bin_factory::BinaryPackageCommand::atom() /// [pkg-fs]: crate::bin_factory::BinaryPackage::webc_fs pub fn use_package(&self, pkg: &BinaryPackage) -> Result<(), WasiStateCreationError> { - // PERF: We should avoid all these copies in the WasiFsRoot::Backing case. - let root_fs = &self.state.fs.root_fs; + // We first need to copy any files in the package over to the - // temporary file system - match root_fs { - WasiFsRoot::Sandbox(root_fs) => { - root_fs.union(&pkg.webc_fs); - } - WasiFsRoot::Backing(_fs) => { - tracing::warn!("TODO: Manually copy each file across one-by-one"); - } + // main file system + if let Err(e) = self.tasks().block_on(root_fs.merge(&pkg.webc_fs)) { + warn!( + error = &e as &dyn std::error::Error, + "Unable to merge the package's filesystem into the main one", + ); } // Next, make sure all commands will be available From e32537d1698a86e4482403a2bf37745b4a4d4945 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sat, 27 May 2023 06:53:16 +0800 Subject: [PATCH 02/19] Bump the webc version to pick up wasmerio/pirita#115 --- Cargo.lock | 4 ++-- lib/cli/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0bff336d784..2c673029d72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6423,9 +6423,9 @@ dependencies = [ [[package]] name = "webc" -version = "5.0.0" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06bee486f9207604f99bfa3c95afcd03272d95db5872c6c1b11470be4390d514" +checksum = "e90441ab5f53d68200c4b38f614b6653bc32240dbcebca32e44173030759b3b6" dependencies = [ "anyhow", "base64", diff --git a/lib/cli/Cargo.toml b/lib/cli/Cargo.toml index 27b45d74ba9..f107c040c3e 100644 --- a/lib/cli/Cargo.toml +++ b/lib/cli/Cargo.toml @@ -45,7 +45,7 @@ virtual-fs = { version = "0.3.0", path = "../virtual-fs", default-features = fa virtual-net = { version = "0.2.0", path = "../virtual-net" } # Wasmer-owned dependencies. -webc = { version = "5.0" } +webc = { version = "5.0.1" } wapm-targz-to-pirita = "0.2.1" wasmer-deploy-cli = { version = "0.1.9", default-features = false } From e9eab9b1cca7ec0797705717fbc859b1e22ea953 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sat, 27 May 2023 06:59:40 +0800 Subject: [PATCH 03/19] Add tests for the resulting filesystem --- lib/wasi/src/runtime/resolver/inputs.rs | 14 ++++ lib/wasi/tests/runners.rs | 5 +- tests/integration/cli/tests/run_unstable.rs | 72 ++++++++++++++++----- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/lib/wasi/src/runtime/resolver/inputs.rs b/lib/wasi/src/runtime/resolver/inputs.rs index b835b6e7fac..b2a1626ddb5 100644 --- a/lib/wasi/src/runtime/resolver/inputs.rs +++ b/lib/wasi/src/runtime/resolver/inputs.rs @@ -47,6 +47,10 @@ impl FromStr for PackageSpecifier { type Err = anyhow::Error; fn from_str(s: &str) -> Result { + if s.starts_with('.') || s.starts_with('/') || s.starts_with('\\') { + return Ok(PackageSpecifier::Path(s.into())); + } + if let Ok(url) = Url::parse(s) { if url.has_host() { return Ok(PackageSpecifier::Url(url)); @@ -351,6 +355,16 @@ pub(crate) mod tests { "https://wapm/io/namespace/package@1.0.0", PackageSpecifier::Url("https://wapm/io/namespace/package@1.0.0".parse().unwrap()), ), + ( + "/path/to/some/file.webc", + PackageSpecifier::Path("/path/to/some/file.webc".into()), + ), + ("./file.webc", PackageSpecifier::Path("./file.webc".into())), + #[cfg(windows)] + ( + r"C:\Path\to\some\file.webc", + PackageSpecifier::Path(r"C:\Path\to\some\file.webc".into()), + ), ]; for (src, expected) in inputs { diff --git a/lib/wasi/tests/runners.rs b/lib/wasi/tests/runners.rs index 71d5926c405..e20ca3a4290 100644 --- a/lib/wasi/tests/runners.rs +++ b/lib/wasi/tests/runners.rs @@ -49,9 +49,8 @@ mod wasi { .with_args(["--version"]) .run_command("wat2wasm", &pkg, Arc::new(rt)) }); - let result = handle.join().unwrap(); - assert!(result.is_ok()); + handle.join().unwrap().expect("Runner failed"); } #[tokio::test] @@ -68,6 +67,8 @@ mod wasi { }); let err = handle.join().unwrap().unwrap_err(); + dbg!(&err); + let runtime_error = err .chain() .find_map(|e| e.downcast_ref::()) diff --git a/tests/integration/cli/tests/run_unstable.rs b/tests/integration/cli/tests/run_unstable.rs index 9d4ea6e9c5e..942a01b34bf 100644 --- a/tests/integration/cli/tests/run_unstable.rs +++ b/tests/integration/cli/tests/run_unstable.rs @@ -245,6 +245,30 @@ mod webc_on_disk { assert.success().stdout(contains("Hello, World!")); } + + #[test] + #[cfg_attr( + all(target_env = "musl", target_os = "linux"), + ignore = "wasmer run-unstable segfaults on musl" + )] + fn merged_filesystem_contains_all_files() { + let assert = wasmer_run_unstable() + .arg(fixtures::bash()) + .arg("--entrypoint=bash") + .arg("--use") + .arg(fixtures::coreutils()) + .arg("--use") + .arg(fixtures::python()) + .arg("--") + .arg("-c") + .arg("ls -l /usr/coreutils/*.md && ls -l /lib/python3.6/") + .assert(); + + assert + .success() + .stdout(contains("/usr/coreutils/README.md")) + .stdout(contains("/lib/python3.6/")); + } } mod wasm_on_disk { @@ -324,24 +348,28 @@ mod wasm_on_disk { } } -#[test] -#[cfg_attr( - all(target_env = "musl", target_os = "linux"), - ignore = "wasmer run-unstable segfaults on musl" -)] -fn wasmer_package_directory() { - let temp = TempDir::new().unwrap(); - std::fs::copy(fixtures::qjs(), temp.path().join("qjs.wasm")).unwrap(); - std::fs::copy(fixtures::qjs_wasmer_toml(), temp.path().join("wasmer.toml")).unwrap(); - - let assert = wasmer_run_unstable() - .arg(temp.path()) - .arg("--") - .arg("--eval") - .arg("console.log('Hello, World!')") - .assert(); +mod local_directory { + use super::*; - assert.success().stdout(contains("Hello, World!")); + #[test] + #[cfg_attr( + all(target_env = "musl", target_os = "linux"), + ignore = "wasmer run-unstable segfaults on musl" + )] + fn wasmer_package_directory() { + let temp = TempDir::new().unwrap(); + std::fs::copy(fixtures::qjs(), temp.path().join("qjs.wasm")).unwrap(); + std::fs::copy(fixtures::qjs_wasmer_toml(), temp.path().join("wasmer.toml")).unwrap(); + + let assert = wasmer_run_unstable() + .arg(temp.path()) + .arg("--") + .arg("--eval") + .arg("console.log('Hello, World!')") + .assert(); + + assert.success().stdout(contains("Hello, World!")); + } } mod remote_webc { @@ -402,6 +430,9 @@ mod remote_webc { .arg("ls /bin") .assert(); + // Note: the resulting filesystem should contain the main command as + // well as the commands from all the --use packages + let some_expected_binaries = [ "arch", "base32", "base64", "baseenc", "basename", "bash", "cat", ] @@ -427,6 +458,13 @@ mod fixtures { .join("coreutils-1.0.16-e27dbb4f-2ef2-4b44-b46a-ddd86497c6d7.webc") } + pub fn bash() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("webc") + .join("bash-1.0.16-f097441a-a80b-4e0d-87d7-684918ef4bb6.webc") + } + /// A WEBC file containing `wat2wasm`, `wasm-validate`, and other helpful /// WebAssembly-related commands. pub fn wabt() -> PathBuf { From a428565ebf7231f77ed2a09bc3fcacf710b0ed75 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sun, 28 May 2023 18:10:23 +0800 Subject: [PATCH 04/19] Fixed the PackageSpecifier::from_str() test on Windows --- lib/wasi/src/runtime/resolver/inputs.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/wasi/src/runtime/resolver/inputs.rs b/lib/wasi/src/runtime/resolver/inputs.rs index b2a1626ddb5..6b2498835fc 100644 --- a/lib/wasi/src/runtime/resolver/inputs.rs +++ b/lib/wasi/src/runtime/resolver/inputs.rs @@ -47,7 +47,15 @@ impl FromStr for PackageSpecifier { type Err = anyhow::Error; fn from_str(s: &str) -> Result { - if s.starts_with('.') || s.starts_with('/') || s.starts_with('\\') { + // There is no function in std for checking if a string is a valid path + // and we can't do Path::new(s).exists() because that assumes the + // package being specified is on the local filesystem, so let's make a + // best-effort guess. + if s.starts_with('.') || s.starts_with('/') { + return Ok(PackageSpecifier::Path(s.into())); + } + #[cfg(windows)] + if s.contains('\\') { return Ok(PackageSpecifier::Path(s.into())); } From 16cf0e04b450f4d1c2d8feaf1ca7596f46d28df8 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sun, 28 May 2023 18:10:41 +0800 Subject: [PATCH 05/19] Move a dbg!() into the test's assertion --- lib/wasi/tests/runners.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/wasi/tests/runners.rs b/lib/wasi/tests/runners.rs index e20ca3a4290..790f36b3cdf 100644 --- a/lib/wasi/tests/runners.rs +++ b/lib/wasi/tests/runners.rs @@ -67,15 +67,11 @@ mod wasi { }); let err = handle.join().unwrap().unwrap_err(); - dbg!(&err); - - let runtime_error = err - .chain() - .find_map(|e| e.downcast_ref::()) - .unwrap(); + let runtime_error = err.chain().find_map(|e| e.downcast_ref::()); let exit_code = match runtime_error { - WasiError::Exit(code) => *code, - other => unreachable!("Something else went wrong: {:?}", other), + Some(WasiError::Exit(code)) => *code, + Some(other) => panic!("Something else went wrong: {:?}", other), + None => panic!("Not a WasiError: {:?}", err), }; assert_eq!(exit_code.raw(), 42); } From fe244a3fdcbcd41a533b85aee0c154cc9cd965f1 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sun, 28 May 2023 19:01:26 +0800 Subject: [PATCH 06/19] Add Path::exists() to PackageSpecifier::from_str() just in case --- lib/wasi/src/runtime/resolver/inputs.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/wasi/src/runtime/resolver/inputs.rs b/lib/wasi/src/runtime/resolver/inputs.rs index 6b2498835fc..e289277830b 100644 --- a/lib/wasi/src/runtime/resolver/inputs.rs +++ b/lib/wasi/src/runtime/resolver/inputs.rs @@ -58,6 +58,9 @@ impl FromStr for PackageSpecifier { if s.contains('\\') { return Ok(PackageSpecifier::Path(s.into())); } + if Path::new(s).exists() { + return Ok(PackageSpecifier::Path(s.into())); + } if let Ok(url) = Url::parse(s) { if url.has_host() { From 3fa4ed7170810544c5d2e386457f1e7287c162b6 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sun, 28 May 2023 19:01:46 +0800 Subject: [PATCH 07/19] Look for a specific *.py file --- tests/integration/cli/tests/run_unstable.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/cli/tests/run_unstable.rs b/tests/integration/cli/tests/run_unstable.rs index 942a01b34bf..cabb3b86e73 100644 --- a/tests/integration/cli/tests/run_unstable.rs +++ b/tests/integration/cli/tests/run_unstable.rs @@ -261,13 +261,13 @@ mod webc_on_disk { .arg(fixtures::python()) .arg("--") .arg("-c") - .arg("ls -l /usr/coreutils/*.md && ls -l /lib/python3.6/") + .arg("ls -l /usr/coreutils/*.md && ls -l /lib/python3.6/*.py") .assert(); assert .success() .stdout(contains("/usr/coreutils/README.md")) - .stdout(contains("/lib/python3.6/")); + .stdout(contains("/lib/python3.6/this.py")); } } From 3c8d14beb6b20f606817e0c4b920b56507ca9040 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sun, 28 May 2023 19:02:34 +0800 Subject: [PATCH 08/19] Use assert_eq!() instead of panic!() when testing python/python --- tests/integration/cli/tests/run.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/integration/cli/tests/run.rs b/tests/integration/cli/tests/run.rs index 9f7c7d16f65..102d3cde405 100644 --- a/tests/integration/cli/tests/run.rs +++ b/tests/integration/cli/tests/run.rs @@ -704,9 +704,11 @@ fn run_test_caching_works_for_packages_with_versions() -> anyhow::Result<()> { .arg("test.py") .output()?; - if output.stdout != b"hello\n".to_vec() { - panic!("failed to run https://wapm.io/python/python for the first time"); - } + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "hello\n", + "failed to run https://wapm.io/python/python for the first time" + ); let time = std::time::Instant::now(); @@ -718,9 +720,11 @@ fn run_test_caching_works_for_packages_with_versions() -> anyhow::Result<()> { dbg!(&output); - if output.stdout != b"hello\n".to_vec() { - panic!("failed to run https://wapm.io/python/python for the second time"); - } + assert_eq!( + String::from_utf8_lossy(&output.stdout), + "hello\n", + "failed to run https://wapm.io/python/python for the second time" + ); // package should be cached assert!(std::time::Instant::now() - time < std::time::Duration::from_secs(1)); From 7a6ed96a117815de89b97641057402a0390f1672 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Sun, 28 May 2023 19:10:49 +0800 Subject: [PATCH 09/19] Update wasmer-wasix to webc v5.0.2 --- Cargo.lock | 4 ++-- lib/cli/Cargo.toml | 2 +- lib/wasi/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c673029d72..0c3bbe14c09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6423,9 +6423,9 @@ dependencies = [ [[package]] name = "webc" -version = "5.0.1" +version = "5.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e90441ab5f53d68200c4b38f614b6653bc32240dbcebca32e44173030759b3b6" +checksum = "42af14e63ed784e4f813bd5fb35bc84016fa8b245879809547247da6051107f0" dependencies = [ "anyhow", "base64", diff --git a/lib/cli/Cargo.toml b/lib/cli/Cargo.toml index f107c040c3e..27b45d74ba9 100644 --- a/lib/cli/Cargo.toml +++ b/lib/cli/Cargo.toml @@ -45,7 +45,7 @@ virtual-fs = { version = "0.3.0", path = "../virtual-fs", default-features = fa virtual-net = { version = "0.2.0", path = "../virtual-net" } # Wasmer-owned dependencies. -webc = { version = "5.0.1" } +webc = { version = "5.0" } wapm-targz-to-pirita = "0.2.1" wasmer-deploy-cli = { version = "0.1.9", default-features = false } diff --git a/lib/wasi/Cargo.toml b/lib/wasi/Cargo.toml index 5b6826533b8..e3fd39cf7d1 100644 --- a/lib/wasi/Cargo.toml +++ b/lib/wasi/Cargo.toml @@ -29,7 +29,7 @@ bincode = { version = "1.3" } chrono = { version = "^0.4", default-features = false, features = [ "wasmbind", "std", "clock" ], optional = true } derivative = { version = "^2" } bytes = "1" -webc = { version = "5.0", default-features = false } +webc = { version = "5.0.2", default-features = false } serde_cbor = { version = "0.11.2", optional = true } anyhow = { version = "1.0.66" } lazy_static = "1.4" From c4d0528e7dab2d4ed61d622146621f5f9cb225cf Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Mon, 29 May 2023 13:41:00 +0800 Subject: [PATCH 10/19] Added a polyfill for Url::to_file_path() --- .../runtime/package_loader/builtin_loader.rs | 21 ++++++--- lib/wasi/src/runtime/resolver/utils.rs | 45 +++++++++++++++++-- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/lib/wasi/src/runtime/package_loader/builtin_loader.rs b/lib/wasi/src/runtime/package_loader/builtin_loader.rs index a1077e33819..ea1f62da2c5 100644 --- a/lib/wasi/src/runtime/package_loader/builtin_loader.rs +++ b/lib/wasi/src/runtime/package_loader/builtin_loader.rs @@ -89,13 +89,20 @@ impl BuiltinPackageLoader { async fn download(&self, dist: &DistributionInfo) -> Result { if dist.webc.scheme() == "file" { - // Note: The Url::to_file_path() method is platform-specific - #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] - if let Ok(path) = dist.webc.to_file_path() { - // FIXME: This will block the thread - let bytes = std::fs::read(&path) - .with_context(|| format!("Unable to read \"{}\"", path.display()))?; - return Ok(bytes.into()); + match crate::runtime::resolver::utils::file_path_from_url(&dist.webc) { + Ok(path) => { + // FIXME: This will block the thread + let bytes = std::fs::read(&path) + .with_context(|| format!("Unable to read \"{}\"", path.display()))?; + return Ok(bytes.into()); + } + Err(e) => { + tracing::debug!( + url=%dist.webc, + error=&*e, + "Unable to convert the file:// URL to a path", + ); + } } } diff --git a/lib/wasi/src/runtime/resolver/utils.rs b/lib/wasi/src/runtime/resolver/utils.rs index 521215b2cb2..152c93ead21 100644 --- a/lib/wasi/src/runtime/resolver/utils.rs +++ b/lib/wasi/src/runtime/resolver/utils.rs @@ -1,7 +1,7 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; -use anyhow::Error; use http::{HeaderMap, StatusCode}; +use anyhow::{Context, Error}; use url::Url; use crate::http::{HttpResponse, USER_AGENT}; @@ -56,13 +56,39 @@ pub(crate) fn http_error(response: &HttpResponse) -> Error { Error::msg(status) } +pub(crate) fn file_path_from_url(url: &Url) -> Result { + debug_assert_eq!(url.scheme(), "file"); + + // Note: The Url::to_file_path() method is platform-specific + cfg_if::cfg_if! { + if #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] { + if let Ok(path) = url.to_file_path() { + return Ok(path); + } + + // Sometimes we'll get a UNC-like path (e.g. + // "file:///?\\C:/\\/path/to/file.txt") and Url::to_file_path() + // won't be able to handle the "\\?" so we try to "massage" the URL + // a bit. + // See for more. + let modified = url.as_str().replace(r"\\?", "").replace("//?", "").replace('\\', "/"); + Url::parse(&modified) + .ok() + .and_then(|url| url.to_file_path().ok()) + .context("Unable to extract the file path") + } else { + anyhow::bail!("Url::to_file_path() is not supported on this platform"); + } + } +} + #[cfg(test)] mod tests { use super::*; #[test] #[cfg(unix)] - fn behaviour_is_identical() { + fn from_file_path_behaviour_is_identical() { let inputs = [ "/", "/path", @@ -78,4 +104,17 @@ mod tests { assert_eq!(got, expected, "Mismatch for \"{path}\""); } } + + #[test] + #[cfg(windows)] + fn to_file_path_can_handle_unc_paths() { + let path = Path::new(env!("CARGO_MANIFEST_DIR")) + .canonicalize() + .unwrap(); + let url = Url::from_file_path(&path).unwrap(); + + let got = file_path_from_url(&url).unwrap(); + + assert_eq!(got.canonicalize().unwrap(), path); + } } From 8a4c5666003925c32042739cf7bbd40c5b8bb793 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Mon, 29 May 2023 17:33:51 +0800 Subject: [PATCH 11/19] Rustfmt --- lib/wasi/src/runtime/resolver/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/wasi/src/runtime/resolver/utils.rs b/lib/wasi/src/runtime/resolver/utils.rs index 152c93ead21..03c018cc5f4 100644 --- a/lib/wasi/src/runtime/resolver/utils.rs +++ b/lib/wasi/src/runtime/resolver/utils.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; -use http::{HeaderMap, StatusCode}; use anyhow::{Context, Error}; +use http::{HeaderMap, StatusCode}; use url::Url; use crate::http::{HttpResponse, USER_AGENT}; From d829cd4746279015ef0baf46260fefe037469564 Mon Sep 17 00:00:00 2001 From: Johnathan Sharratt Date: Sun, 28 May 2023 21:48:47 +1000 Subject: [PATCH 12/19] Added a copy on write system for faster union and merge operations --- lib/virtual-fs/src/buffer_file.rs | 103 ++++++++++++++++ lib/virtual-fs/src/cow_file.rs | 188 ++++++++++++++++++++++++++++++ lib/virtual-fs/src/lib.rs | 19 +++ lib/virtual-fs/src/mem_fs/file.rs | 25 +++- lib/wasi/src/fs/mod.rs | 21 +--- 5 files changed, 340 insertions(+), 16 deletions(-) create mode 100644 lib/virtual-fs/src/buffer_file.rs create mode 100644 lib/virtual-fs/src/cow_file.rs diff --git a/lib/virtual-fs/src/buffer_file.rs b/lib/virtual-fs/src/buffer_file.rs new file mode 100644 index 00000000000..9c89fe53e64 --- /dev/null +++ b/lib/virtual-fs/src/buffer_file.rs @@ -0,0 +1,103 @@ +//! Used for /dev/zero - infinitely returns zero +//! which is useful for commands like `dd if=/dev/zero of=bigfile.img size=1G` + +use std::io::{self, *}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite}; + +use crate::VirtualFile; + +#[derive(Debug, Default)] +pub struct BufferFile { + pub(crate) data: Cursor>, +} + +impl AsyncSeek for BufferFile { + fn start_seek(mut self: Pin<&mut Self>, position: io::SeekFrom) -> io::Result<()> { + let data = Pin::new(&mut self.data); + data.start_seek(position) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_complete(cx) + } +} + +impl AsyncWrite for BufferFile { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_write(cx, buf) + } + + fn poll_write_vectored( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[io::IoSlice<'_>], + ) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_write_vectored(cx, bufs) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_flush(cx) + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_shutdown(cx) + } +} + +impl AsyncRead for BufferFile { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + let data = Pin::new(&mut self.data); + data.poll_read(cx, buf) + } +} + +impl VirtualFile for BufferFile { + fn last_accessed(&self) -> u64 { + 0 + } + fn last_modified(&self) -> u64 { + 0 + } + fn created_time(&self) -> u64 { + 0 + } + fn size(&self) -> u64 { + self.data.get_ref().len() as u64 + } + fn set_len(&mut self, new_size: u64) -> crate::Result<()> { + self.data.get_mut().resize(new_size as usize, 0); + Ok(()) + } + fn unlink(&mut self) -> crate::Result<()> { + Ok(()) + } + fn poll_read_ready(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + let cur = self.data.seek(SeekFrom::Current(0)).unwrap_or_default(); + let len = self.data.seek(SeekFrom::End(0)).unwrap_or_default(); + if cur < len { + Poll::Ready(Ok((len - cur) as usize)) + } else { + Poll::Ready(Ok(0)) + } + } + + fn poll_write_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(8192)) + } +} diff --git a/lib/virtual-fs/src/cow_file.rs b/lib/virtual-fs/src/cow_file.rs new file mode 100644 index 00000000000..7f6672625fe --- /dev/null +++ b/lib/virtual-fs/src/cow_file.rs @@ -0,0 +1,188 @@ +//! Used for /dev/zero - infinitely returns zero +//! which is useful for commands like `dd if=/dev/zero of=bigfile.img size=1G` + +use std::io::{self, *}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; + +use crate::{BufferFile, VirtualFile}; + +#[derive(Debug)] +pub struct CopyOnWriteFile { + inner: Option>, + buf: BufferFile, +} + +impl CopyOnWriteFile { + pub fn new(inner: Box) -> Self { + Self { + inner: Some(inner), + buf: BufferFile::default(), + } + } + fn poll_copy(&mut self, cx: &mut Context) -> Poll> { + if let Some(inner) = self.inner.as_mut() { + let mut temp = [0u8; 8192]; + while self.buf.size() < inner.size() { + let mut read_temp = ReadBuf::new(&mut temp); + + let inner = Pin::new(inner.as_mut()); + match inner.poll_read(cx, &mut read_temp) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + if read_temp.remaining() <= 0 { + return Poll::Pending; + } + + self.buf.data.write_all(read_temp.filled()).unwrap(); + } + + drop(inner); + self.inner.take(); + } + Poll::Ready(Ok(())) + } +} + +impl AsyncSeek for CopyOnWriteFile { + fn start_seek(mut self: Pin<&mut Self>, position: io::SeekFrom) -> io::Result<()> { + let data = Pin::new(&mut self.buf); + data.start_seek(position)?; + + if let Some(inner) = self.inner.as_mut() { + let data = Pin::new(inner.as_mut()); + data.start_seek(position)?; + } + + Ok(()) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.inner.as_mut() { + Some(inner) => { + let data = Pin::new(inner.as_mut()); + data.poll_complete(cx) + } + None => { + let data = Pin::new(&mut self.buf); + data.poll_complete(cx) + } + } + } +} + +impl AsyncWrite for CopyOnWriteFile { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + match self.poll_copy(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + let data = Pin::new(&mut self.buf); + data.poll_write(cx, buf) + } + + fn poll_write_vectored( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[io::IoSlice<'_>], + ) -> Poll> { + match self.poll_copy(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + let data = Pin::new(&mut self.buf); + data.poll_write_vectored(cx, bufs) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.poll_copy(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + let data = Pin::new(&mut self.buf); + data.poll_flush(cx) + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.poll_copy(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + let data = Pin::new(&mut self.buf); + data.poll_shutdown(cx) + } +} + +impl AsyncRead for CopyOnWriteFile { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + match self.inner.as_mut() { + Some(inner) => { + let data = Pin::new(inner.as_mut()); + data.poll_read(cx, buf) + } + None => { + let data = Pin::new(&mut self.buf); + data.poll_read(cx, buf) + } + } + } +} + +impl VirtualFile for CopyOnWriteFile { + fn last_accessed(&self) -> u64 { + 0 + } + fn last_modified(&self) -> u64 { + 0 + } + fn created_time(&self) -> u64 { + 0 + } + fn size(&self) -> u64 { + match self.inner.as_ref() { + Some(inner) => inner.size(), + None => self.buf.size(), + } + } + fn set_len(&mut self, new_size: u64) -> crate::Result<()> { + match self.inner.as_mut() { + Some(inner) => inner.set_len(new_size), + None => self.buf.set_len(new_size), + } + } + fn unlink(&mut self) -> crate::Result<()> { + Ok(()) + } + fn poll_read_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.inner.as_mut() { + Some(inner) => { + let data = Pin::new(inner.as_mut()); + data.poll_read_ready(cx) + } + None => { + let data: Pin<&mut BufferFile> = Pin::new(&mut self.buf); + data.poll_read_ready(cx) + } + } + } + + fn poll_write_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(8192)) + } +} diff --git a/lib/virtual-fs/src/lib.rs b/lib/virtual-fs/src/lib.rs index f39a05b15de..ea72de475ff 100644 --- a/lib/virtual-fs/src/lib.rs +++ b/lib/virtual-fs/src/lib.rs @@ -5,6 +5,7 @@ extern crate pretty_assertions; use std::any::Any; use std::ffi::OsString; use std::fmt; +use std::future::Future; use std::io; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -16,8 +17,10 @@ use thiserror::Error; pub mod arc_box_file; pub mod arc_file; pub mod arc_fs; +pub mod buffer_file; pub mod builder; pub mod combine_file; +pub mod cow_file; pub mod dual_write_file; pub mod empty_fs; #[cfg(feature = "host-fs")] @@ -48,8 +51,10 @@ pub mod limiter; pub use arc_box_file::*; pub use arc_file::*; pub use arc_fs::*; +pub use buffer_file::*; pub use builder::*; pub use combine_file::*; +pub use cow_file::*; pub use dual_write_file::*; pub use empty_fs::*; pub use filesystems::FileSystems; @@ -343,6 +348,20 @@ pub trait VirtualFile: None } + /// This method will copy a file from a source to this destination where + /// the default is to do a straight byte copy however file system implementors + /// may optimize this to do a zero copy + fn copy_reference<'a>( + &'a mut self, + mut src: Box, + ) -> Pin> + 'a>> { + Box::pin(async move { + let bytes_written = tokio::io::copy(&mut src, self).await?; + tracing::trace!(bytes_written, "Copying file into host filesystem",); + Ok(()) + }) + } + /// Polls the file for when there is data to be read fn poll_read_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; diff --git a/lib/virtual-fs/src/mem_fs/file.rs b/lib/virtual-fs/src/mem_fs/file.rs index cc439ce71af..cf18889ca68 100644 --- a/lib/virtual-fs/src/mem_fs/file.rs +++ b/lib/virtual-fs/src/mem_fs/file.rs @@ -7,7 +7,7 @@ use tokio::io::{AsyncSeek, AsyncWrite}; use super::*; use crate::limiter::TrackedVec; -use crate::{FsError, Result, VirtualFile}; +use crate::{CopyOnWriteFile, FsError, Result, VirtualFile}; use std::borrow::Cow; use std::cmp; use std::convert::TryInto; @@ -276,6 +276,29 @@ impl VirtualFile for FileHandle { } } + fn copy_reference<'a>( + &'a mut self, + src: Box, + ) -> Pin> + 'a>> { + let inner = self.filesystem.inner.clone(); + Box::pin(async move { + let mut fs = inner.write().unwrap(); + let inode = fs.storage.get_mut(self.inode); + match inode { + Some(inode) => { + *inode = Node::CustomFile(CustomFileNode { + inode: inode.inode(), + name: inode.name().to_string_lossy().to_string().into(), + file: Mutex::new(Box::new(CopyOnWriteFile::new(src))), + metadata: inode.metadata().clone(), + }); + Ok(()) + } + None => Err(std::io::ErrorKind::InvalidInput.into()), + } + }) + } + fn poll_read_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { if !self.readable { return Poll::Ready(Err(io::Error::new( diff --git a/lib/wasi/src/fs/mod.rs b/lib/wasi/src/fs/mod.rs index 383fec86c22..9de78400925 100644 --- a/lib/wasi/src/fs/mod.rs +++ b/lib/wasi/src/fs/mod.rs @@ -281,7 +281,7 @@ impl WasiFsRoot { Ok(()) } WasiFsRoot::Backing(fs) => { - merge_filesystems_expensive(other, fs).await?; + merge_filesystems(other, fs).await?; Ok(()) } } @@ -341,10 +341,8 @@ impl FileSystem for WasiFsRoot { /// Merge the contents of one filesystem into another. /// -/// This is a pretty heavy-weight operation because it will copy the contents of -/// each file from `source` into `destination`. #[tracing::instrument(level = "debug", skip_all)] -async fn merge_filesystems_expensive( +async fn merge_filesystems( source: &dyn FileSystem, destination: &dyn FileSystem, ) -> Result<(), virtual_fs::FsError> { @@ -366,7 +364,7 @@ async fn merge_filesystems_expensive( } } else if metadata.is_file() { files.push(async move { - copy_file(source, destination, &path) + copy_reference(source, destination, &path) .await .map_err(virtual_fs::FsError::from) }); @@ -383,27 +381,20 @@ async fn merge_filesystems_expensive( } #[tracing::instrument(level = "trace", skip_all, fields(path=%path.display()))] -async fn copy_file( +async fn copy_reference( source: &dyn FileSystem, destination: &dyn FileSystem, path: &Path, ) -> Result<(), std::io::Error> { - let mut src = source.new_open_options().read(true).open(path)?; + let src = source.new_open_options().read(true).open(path)?; let mut dst = destination .new_open_options() .create(true) .write(true) .truncate(true) .open(path)?; - let bytes_written = tokio::io::copy(&mut src, &mut dst).await?; - tracing::trace!( - path=%path.display(), - bytes_written, - "Copying file into host filesystem", - ); - - Ok(()) + dst.copy_reference(src).await } fn create_dir_all(fs: &dyn FileSystem, path: &Path) -> Result<(), virtual_fs::FsError> { From 964fa4f8b2ebb5add443fef219e796d8e823b576 Mon Sep 17 00:00:00 2001 From: Johnathan Sharratt Date: Tue, 30 May 2023 10:31:00 +1000 Subject: [PATCH 13/19] Fixes for the copy on write file --- Cargo.lock | 7 ++ lib/virtual-fs/Cargo.toml | 1 + lib/virtual-fs/src/cow_file.rs | 137 ++++++++++++++++++++++----------- 3 files changed, 99 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c3bbe14c09..3b77b0e6fc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3421,6 +3421,12 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "replace_with" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690" + [[package]] name = "reqwest" version = "0.11.18" @@ -4995,6 +5001,7 @@ dependencies = [ "libc", "pin-project-lite", "pretty_assertions", + "replace_with", "slab", "tempfile", "thiserror", diff --git a/lib/virtual-fs/Cargo.toml b/lib/virtual-fs/Cargo.toml index 8b532aca15f..e1b408f2423 100644 --- a/lib/virtual-fs/Cargo.toml +++ b/lib/virtual-fs/Cargo.toml @@ -26,6 +26,7 @@ bytes = "1" tokio = { version = "1", features = ["io-util", "sync", "macros"], default_features = false } pin-project-lite = "0.2.9" indexmap = "1.9.2" +replace_with = "0.1.7" [target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies] getrandom = { version = "0.2" } diff --git a/lib/virtual-fs/src/cow_file.rs b/lib/virtual-fs/src/cow_file.rs index 7f6672625fe..9b4087eba53 100644 --- a/lib/virtual-fs/src/cow_file.rs +++ b/lib/virtual-fs/src/cow_file.rs @@ -1,6 +1,7 @@ //! Used for /dev/zero - infinitely returns zero //! which is useful for commands like `dd if=/dev/zero of=bigfile.img size=1G` +use replace_with::replace_with_or_abort; use std::io::{self, *}; use std::pin::Pin; use std::task::{Context, Poll}; @@ -9,27 +10,70 @@ use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; use crate::{BufferFile, VirtualFile}; +#[derive(Debug)] +enum CowState { + ReadOnly(Box), + Copying { + pos: u64, + inner: Box, + }, + Copied, +} +impl CowState { + fn as_ref(&self) -> Option<&Box> { + match self { + Self::ReadOnly(inner) => Some(inner), + Self::Copying { inner, .. } => Some(inner), + _ => None, + } + } + fn as_mut(&mut self) -> Option<&mut Box> { + match self { + Self::ReadOnly(inner) => Some(inner), + Self::Copying { inner, .. } => Some(inner), + _ => None, + } + } +} + #[derive(Debug)] pub struct CopyOnWriteFile { - inner: Option>, + last_accessed: u64, + last_modified: u64, + created_time: u64, + state: CowState, buf: BufferFile, } impl CopyOnWriteFile { pub fn new(inner: Box) -> Self { Self { - inner: Some(inner), + last_accessed: inner.last_accessed(), + last_modified: inner.last_modified(), + created_time: inner.created_time(), + state: CowState::ReadOnly(inner), buf: BufferFile::default(), } } - fn poll_copy(&mut self, cx: &mut Context) -> Poll> { - if let Some(inner) = self.inner.as_mut() { + fn poll_copy_progress(&mut self, cx: &mut Context) -> Poll> { + replace_with_or_abort(&mut self.state, |state| match state { + CowState::ReadOnly(inner) => CowState::Copying { pos: 0, inner }, + state => state, + }); + if let CowState::Copying { ref mut inner, pos } = &mut self.state { let mut temp = [0u8; 8192]; - while self.buf.size() < inner.size() { + + while *pos < inner.size() { let mut read_temp = ReadBuf::new(&mut temp); - let inner = Pin::new(inner.as_mut()); - match inner.poll_read(cx, &mut read_temp) { + if let Err(err) = Pin::new(inner.as_mut()).start_seek(SeekFrom::Start(*pos)) { + return Poll::Ready(Err(err)); + } + match Pin::new(inner.as_mut()).poll_complete(cx).map_ok(|_| ()) { + Poll::Ready(Ok(())) => {} + p => return p, + } + match Pin::new(inner.as_mut()).poll_read(cx, &mut read_temp) { Poll::Pending => return Poll::Pending, Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), Poll::Ready(Ok(())) => {} @@ -37,15 +81,21 @@ impl CopyOnWriteFile { if read_temp.remaining() <= 0 { return Poll::Pending; } + *pos += read_temp.remaining() as u64; self.buf.data.write_all(read_temp.filled()).unwrap(); } - - drop(inner); - self.inner.take(); + self.state = CowState::Copied; } Poll::Ready(Ok(())) } + fn poll_copy_start_and_progress(&mut self, cx: &mut Context) -> Poll> { + replace_with_or_abort(&mut self.state, |state| match state { + CowState::ReadOnly(inner) => CowState::Copying { pos: 0, inner }, + state => state, + }); + self.poll_copy_progress(cx) + } } impl AsyncSeek for CopyOnWriteFile { @@ -53,20 +103,16 @@ impl AsyncSeek for CopyOnWriteFile { let data = Pin::new(&mut self.buf); data.start_seek(position)?; - if let Some(inner) = self.inner.as_mut() { - let data = Pin::new(inner.as_mut()); - data.start_seek(position)?; + if let Some(inner) = self.state.as_mut() { + Pin::new(inner.as_mut()).start_seek(position)?; } Ok(()) } fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.inner.as_mut() { - Some(inner) => { - let data = Pin::new(inner.as_mut()); - data.poll_complete(cx) - } + match self.state.as_mut() { + Some(inner) => Pin::new(inner.as_mut()).poll_complete(cx), None => { let data = Pin::new(&mut self.buf); data.poll_complete(cx) @@ -81,7 +127,7 @@ impl AsyncWrite for CopyOnWriteFile { cx: &mut Context<'_>, buf: &[u8], ) -> Poll> { - match self.poll_copy(cx) { + match self.poll_copy_start_and_progress(cx) { Poll::Pending => return Poll::Pending, Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), Poll::Ready(Ok(())) => {} @@ -95,7 +141,7 @@ impl AsyncWrite for CopyOnWriteFile { cx: &mut Context<'_>, bufs: &[io::IoSlice<'_>], ) -> Poll> { - match self.poll_copy(cx) { + match self.poll_copy_start_and_progress(cx) { Poll::Pending => return Poll::Pending, Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), Poll::Ready(Ok(())) => {} @@ -105,20 +151,18 @@ impl AsyncWrite for CopyOnWriteFile { } fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.poll_copy(cx) { - Poll::Pending => return Poll::Pending, - Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + match self.poll_copy_start_and_progress(cx) { Poll::Ready(Ok(())) => {} + p => return p, } let data = Pin::new(&mut self.buf); data.poll_flush(cx) } fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.poll_copy(cx) { - Poll::Pending => return Poll::Pending, - Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + match self.poll_copy_start_and_progress(cx) { Poll::Ready(Ok(())) => {} + p => return p, } let data = Pin::new(&mut self.buf); data.poll_shutdown(cx) @@ -131,11 +175,12 @@ impl AsyncRead for CopyOnWriteFile { cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>, ) -> Poll> { - match self.inner.as_mut() { - Some(inner) => { - let data = Pin::new(inner.as_mut()); - data.poll_read(cx, buf) - } + match self.poll_copy_progress(cx) { + Poll::Ready(Ok(())) => {} + p => return p, + } + match self.state.as_mut() { + Some(inner) => Pin::new(inner.as_mut()).poll_read(cx, buf), None => { let data = Pin::new(&mut self.buf); data.poll_read(cx, buf) @@ -146,35 +191,35 @@ impl AsyncRead for CopyOnWriteFile { impl VirtualFile for CopyOnWriteFile { fn last_accessed(&self) -> u64 { - 0 + self.last_accessed } fn last_modified(&self) -> u64 { - 0 + self.last_modified } fn created_time(&self) -> u64 { - 0 + self.created_time } fn size(&self) -> u64 { - match self.inner.as_ref() { + match self.state.as_ref() { Some(inner) => inner.size(), None => self.buf.size(), } } fn set_len(&mut self, new_size: u64) -> crate::Result<()> { - match self.inner.as_mut() { - Some(inner) => inner.set_len(new_size), - None => self.buf.set_len(new_size), - } + self.buf.set_len(new_size) } fn unlink(&mut self) -> crate::Result<()> { + self.buf.set_len(0)?; Ok(()) } fn poll_read_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.inner.as_mut() { - Some(inner) => { - let data = Pin::new(inner.as_mut()); - data.poll_read_ready(cx) - } + match self.poll_copy_progress(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), + Poll::Ready(Ok(())) => {} + } + match self.state.as_mut() { + Some(inner) => Pin::new(inner.as_mut()).poll_read_ready(cx), None => { let data: Pin<&mut BufferFile> = Pin::new(&mut self.buf); data.poll_read_ready(cx) @@ -182,7 +227,7 @@ impl VirtualFile for CopyOnWriteFile { } } - fn poll_write_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { - Poll::Ready(Ok(8192)) + fn poll_write_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.poll_copy_progress(cx).map_ok(|_| 8192) } } From c023c54b65f3ca24385cc562d79bcbbdc5d0756d Mon Sep 17 00:00:00 2001 From: Johnathan Sharratt Date: Tue, 30 May 2023 11:08:38 +1000 Subject: [PATCH 14/19] Fixed some lint issues --- lib/virtual-fs/src/cow_file.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/virtual-fs/src/cow_file.rs b/lib/virtual-fs/src/cow_file.rs index 9b4087eba53..fd213936408 100644 --- a/lib/virtual-fs/src/cow_file.rs +++ b/lib/virtual-fs/src/cow_file.rs @@ -20,10 +20,10 @@ enum CowState { Copied, } impl CowState { - fn as_ref(&self) -> Option<&Box> { + fn as_ref(&self) -> Option<&(dyn VirtualFile + Send + Sync)> { match self { - Self::ReadOnly(inner) => Some(inner), - Self::Copying { inner, .. } => Some(inner), + Self::ReadOnly(inner) => Some(inner.as_ref()), + Self::Copying { inner, .. } => Some(inner.as_ref()), _ => None, } } @@ -56,10 +56,6 @@ impl CopyOnWriteFile { } } fn poll_copy_progress(&mut self, cx: &mut Context) -> Poll> { - replace_with_or_abort(&mut self.state, |state| match state { - CowState::ReadOnly(inner) => CowState::Copying { pos: 0, inner }, - state => state, - }); if let CowState::Copying { ref mut inner, pos } = &mut self.state { let mut temp = [0u8; 8192]; @@ -78,7 +74,7 @@ impl CopyOnWriteFile { Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), Poll::Ready(Ok(())) => {} } - if read_temp.remaining() <= 0 { + if read_temp.remaining() == 0 { return Poll::Pending; } *pos += read_temp.remaining() as u64; From 92d00c7b16ed48d19b8d0bab34d92ea6faabe28e Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Tue, 30 May 2023 11:23:52 +0800 Subject: [PATCH 15/19] Stop the compile-test-derive crate from panicking on Windows --- tests/lib/compiler-test-derive/src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/lib/compiler-test-derive/src/lib.rs b/tests/lib/compiler-test-derive/src/lib.rs index 0948f3e26bb..aa45272cc13 100644 --- a/tests/lib/compiler-test-derive/src/lib.rs +++ b/tests/lib/compiler-test-derive/src/lib.rs @@ -5,7 +5,7 @@ use proc_macro::TokenStream; #[cfg(test)] use proc_macro2::TokenStream; use quote::quote; -use std::path::PathBuf; +use std::path::Path; #[cfg(not(test))] use syn::parse; #[cfg(test)] @@ -45,9 +45,11 @@ pub fn compiler_test(attrs: TokenStream, input: TokenStream) -> TokenStream { // Let's build the ignores to append an `#[ignore]` macro to the // autogenerated tests in case the test appears in the `ignores.txt` path; - let mut ignores_txt_path = PathBuf::new(); - ignores_txt_path.push(env!("CARGO_MANIFEST_DIR")); - ignores_txt_path.push("../../ignores.txt"); + let tests_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(2) + .unwrap(); + let ignores_txt_path = tests_dir.join("ignores.txt"); let ignores = crate::ignores::Ignores::build_from_path(ignores_txt_path); From e218585f0a6e874e60e4b786689d0ce18192508d Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Tue, 30 May 2023 13:55:04 +0800 Subject: [PATCH 16/19] Made an emscripten warning go away on Windows --- lib/emscripten/src/env/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/emscripten/src/env/mod.rs b/lib/emscripten/src/env/mod.rs index a560a416912..5513b7a691e 100644 --- a/lib/emscripten/src/env/mod.rs +++ b/lib/emscripten/src/env/mod.rs @@ -26,7 +26,7 @@ pub fn call_malloc(mut ctx: &mut FunctionEnvMut, size: u32) -> u32 { malloc_ref.call(&mut ctx, size).unwrap() } -#[warn(dead_code)] +#[allow(dead_code)] pub fn call_malloc_with_cast(ctx: &mut FunctionEnvMut, size: u32) -> WasmPtr { WasmPtr::new(call_malloc(ctx, size)) } From 26c7146455b17e978a5db79cbba2f490ee593ea0 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Tue, 30 May 2023 22:47:01 +0800 Subject: [PATCH 17/19] We can fix this broken test in #3929 --- tests/integration/cli/tests/run_unstable.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/cli/tests/run_unstable.rs b/tests/integration/cli/tests/run_unstable.rs index cabb3b86e73..e647fe6c5e1 100644 --- a/tests/integration/cli/tests/run_unstable.rs +++ b/tests/integration/cli/tests/run_unstable.rs @@ -251,6 +251,7 @@ mod webc_on_disk { all(target_env = "musl", target_os = "linux"), ignore = "wasmer run-unstable segfaults on musl" )] + #[cfg_attr(windows, ignore = "FIXME(Michael-F-Bryan): Temporarily broken on Windows - https://github.com/wasmerio/wasmer/issues/3929")] fn merged_filesystem_contains_all_files() { let assert = wasmer_run_unstable() .arg(fixtures::bash()) From 3bfd3288650167fdd2c307c4b25b0a84a7a0e35e Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Tue, 30 May 2023 22:54:38 +0800 Subject: [PATCH 18/19] Rustfmt --- tests/integration/cli/tests/run_unstable.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/cli/tests/run_unstable.rs b/tests/integration/cli/tests/run_unstable.rs index e647fe6c5e1..8d418f3dad7 100644 --- a/tests/integration/cli/tests/run_unstable.rs +++ b/tests/integration/cli/tests/run_unstable.rs @@ -251,7 +251,10 @@ mod webc_on_disk { all(target_env = "musl", target_os = "linux"), ignore = "wasmer run-unstable segfaults on musl" )] - #[cfg_attr(windows, ignore = "FIXME(Michael-F-Bryan): Temporarily broken on Windows - https://github.com/wasmerio/wasmer/issues/3929")] + #[cfg_attr( + windows, + ignore = "FIXME(Michael-F-Bryan): Temporarily broken on Windows - https://github.com/wasmerio/wasmer/issues/3929" + )] fn merged_filesystem_contains_all_files() { let assert = wasmer_run_unstable() .arg(fixtures::bash()) From 42054235f3e015eeda8fae3a048a4d9709099427 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Tue, 30 May 2023 23:04:47 +0800 Subject: [PATCH 19/19] Clippy --- lib/virtual-fs/src/buffer_file.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/virtual-fs/src/buffer_file.rs b/lib/virtual-fs/src/buffer_file.rs index 9c89fe53e64..9af66811cbf 100644 --- a/lib/virtual-fs/src/buffer_file.rs +++ b/lib/virtual-fs/src/buffer_file.rs @@ -88,7 +88,7 @@ impl VirtualFile for BufferFile { Ok(()) } fn poll_read_ready(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { - let cur = self.data.seek(SeekFrom::Current(0)).unwrap_or_default(); + let cur = self.data.stream_position().unwrap_or_default(); let len = self.data.seek(SeekFrom::End(0)).unwrap_or_default(); if cur < len { Poll::Ready(Ok((len - cur) as usize))