diff --git a/.gitignore b/.gitignore index 9878bb1b4cf..d9c8c3e33f1 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ api-docs-repo/ .xwin-cache wapm.toml wasmer.toml +*.snap.new # Generated by tests on Android /avd /core @@ -25,4 +26,4 @@ build-capi.tar.gz build-wasmer.tar.gz lcov.info link/ -link.tar.gz \ No newline at end of file +link.tar.gz diff --git a/Cargo.lock b/Cargo.lock index c9feca47dfe..6e0ebe0b61c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5562,6 +5562,7 @@ name = "wasmer-cli" version = "3.3.0" dependencies = [ "anyhow", + "async-trait", "atty", "bytes", "bytesize", @@ -5581,6 +5582,7 @@ dependencies = [ "libc", "log", "object 0.30.3", + "once_cell", "pathdiff", "prettytable-rs", "regex", @@ -5595,6 +5597,7 @@ dependencies = [ "tempfile", "thiserror", "tldextract", + "tokio", "toml 0.5.11", "tracing", "tracing-subscriber 0.3.17", @@ -5856,10 +5859,12 @@ dependencies = [ "derivative", "dirs", "flate2", + "futures", "hex", "insta", "md5", "object 0.30.3", + "once_cell", "predicates 2.1.5", "pretty_assertions", "rand", @@ -6077,6 +6082,7 @@ dependencies = [ "linked_hash_set", "once_cell", "pin-project", + "pretty_assertions", "rand", "reqwest", "semver 1.0.17", diff --git a/lib/cli/Cargo.toml b/lib/cli/Cargo.toml index 24a4246fd05..5459dd4caf7 100644 --- a/lib/cli/Cargo.toml +++ b/lib/cli/Cargo.toml @@ -91,6 +91,9 @@ wasm-coredump-builder = { version = "0.1.11", optional = true } tracing = { version = "0.1" } tracing-subscriber = { version = "0.3", features = [ "env-filter", "fmt" ] } clap-verbosity-flag = "2" +async-trait = "0.1.68" +tokio = { version = "1.28.1", features = ["macros", "rt-multi-thread"] } +once_cell = "1.17.1" # NOTE: Must use different features for clap because the "color" feature does not # work on wasi due to the anstream dependency not compiling. diff --git a/lib/cli/src/commands/run.rs b/lib/cli/src/commands/run.rs index f4749caf0c9..1f7c5888b75 100644 --- a/lib/cli/src/commands/run.rs +++ b/lib/cli/src/commands/run.rs @@ -9,14 +9,16 @@ use anyhow::{anyhow, Context, Result}; use clap::Parser; #[cfg(feature = "coredump")] use std::fs::File; -use std::io::Write; use std::net::SocketAddr; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::str::FromStr; +use std::{io::Write, sync::Arc}; +use tokio::runtime::Handle; use wasmer::FunctionEnv; use wasmer::*; use wasmer_cache::{Cache, FileSystemCache, Hash}; +use wasmer_registry::WasmerConfig; use wasmer_types::Type as ValueType; use wasmer_wasix::runners::Runner; @@ -246,10 +248,15 @@ impl RunWithPathBuf { } fn inner_execute(&self) -> Result<()> { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + let handle = runtime.handle().clone(); + #[cfg(feature = "webc_runner")] { if let Ok(pf) = webc::Container::from_disk(self.path.clone()) { - return self.run_container(pf, self.command_name.as_deref(), &self.args); + return self.run_container(pf, self.command_name.as_deref(), &self.args, handle); } } let (mut store, module) = self.get_store_module()?; @@ -342,9 +349,13 @@ impl RunWithPathBuf { .map(|f| f.to_string_lossy().to_string()) }) .unwrap_or_default(); + + let wasmer_dir = WasmerConfig::get_wasmer_dir().map_err(anyhow::Error::msg)?; + let runtime = Arc::new(self.wasi.prepare_runtime(store.engine().clone(), &wasmer_dir, handle)?); + let (ctx, instance) = self .wasi - .instantiate(&mut store, &module, program_name, self.args.clone()) + .instantiate(&module, program_name, self.args.clone(), runtime, &mut store) .with_context(|| "failed to instantiate WASI module")?; let capable_of_deep_sleep = unsafe { ctx.data(&store).capable_of_deep_sleep() }; @@ -415,7 +426,16 @@ impl RunWithPathBuf { container: webc::Container, id: Option<&str>, args: &[String], + handle: Handle, ) -> Result<(), anyhow::Error> { + use wasmer_wasix::{ + bin_factory::BinaryPackage, + runners::{emscripten::EmscriptenRunner, wasi::WasiRunner, wcgi::WcgiRunner}, + WasiRuntime, + }; + + let wasmer_dir = WasmerConfig::get_wasmer_dir().map_err(anyhow::Error::msg)?; + let id = id .or_else(|| container.manifest().entrypoint.as_deref()) .context("No command specified")?; @@ -426,35 +446,45 @@ impl RunWithPathBuf { .with_context(|| format!("No metadata found for the command, \"{id}\""))?; let (store, _compiler_type) = self.store.get_store()?; - let mut runner = wasmer_wasix::runners::wasi::WasiRunner::new(store); - runner.set_args(args.to_vec()); - if runner.can_run_command(id, command).unwrap_or(false) { - return runner.run_cmd(&container, id).context("WASI runner failed"); + let runtime = self + .wasi + .prepare_runtime(store.engine().clone(), &wasmer_dir, handle)?; + let runtime = Arc::new(runtime); + let pkg = runtime + .task_manager() + .block_on(BinaryPackage::from_webc(&container, &*runtime))?; + + if WasiRunner::can_run_command(command).unwrap_or(false) { + let mut runner = WasiRunner::new(); + runner.set_args(args.to_vec()); + return runner + .run_command(id, &pkg, runtime) + .context("WASI runner failed"); } - let (store, _compiler_type) = self.store.get_store()?; - let mut runner = wasmer_wasix::runners::emscripten::EmscriptenRunner::new(store); - runner.set_args(args.to_vec()); - if runner.can_run_command(id, command).unwrap_or(false) { + if EmscriptenRunner::can_run_command(command).unwrap_or(false) { + let mut runner = EmscriptenRunner::new(); + runner.set_args(args.to_vec()); return runner - .run_cmd(&container, id) + .run_command(id, &pkg, runtime) .context("Emscripten runner failed"); } - let mut runner = wasmer_wasix::runners::wcgi::WcgiRunner::new(id); - let (store, _compiler_type) = self.store.get_store()?; - runner - .config() - .args(args) - .store(store) - .addr(self.wcgi.addr) - .envs(self.wasi.env_vars.clone()) - .map_directories(self.wasi.mapped_dirs.clone()); - if self.wasi.forward_host_env { - runner.config().forward_host_env(); - } - if runner.can_run_command(id, command).unwrap_or(false) { - return runner.run_cmd(&container, id).context("WCGI runner failed"); + if WcgiRunner::can_run_command(command).unwrap_or(false) { + let mut runner = WcgiRunner::new(); + runner + .config() + .args(args) + .addr(self.wcgi.addr) + .envs(self.wasi.env_vars.clone()) + .map_directories(self.wasi.mapped_dirs.clone()); + if self.wasi.forward_host_env { + runner.config().forward_host_env(); + } + + return runner + .run_command(id, &pkg, runtime) + .context("WCGI runner failed"); } anyhow::bail!( diff --git a/lib/cli/src/commands/run/wasi.rs b/lib/cli/src/commands/run/wasi.rs index 799ffb12de7..772508c8bb0 100644 --- a/lib/cli/src/commands/run/wasi.rs +++ b/lib/cli/src/commands/run/wasi.rs @@ -1,35 +1,39 @@ -use crate::anyhow::Context; -use crate::utils::{parse_envvar, parse_mapdir}; -use anyhow::Result; -use bytes::Bytes; use std::{ collections::{BTreeSet, HashMap}, path::{Path, PathBuf}, sync::{mpsc::Sender, Arc}, }; + +use anyhow::{Context, Result}; +use bytes::Bytes; +use clap::Parser; +use tokio::runtime::Handle; +use url::Url; use virtual_fs::{DeviceFile, FileSystem, PassthruFileSystem, RootFileSystemBuilder}; -use wasmer::{ - AsStoreMut, Engine, Function, Instance, Memory32, Memory64, Module, RuntimeError, Store, Value, -}; -use wasmer_registry::WasmerConfig; +use wasmer::{Engine, Function, Instance, Memory32, Memory64, Module, RuntimeError, Store, Value}; use wasmer_wasix::{ bin_factory::BinaryPackage, default_fs_backing, get_wasi_versions, + http::HttpClient, os::{tty_sys::SysTty, TtyBridge}, rewind_ext, runners::MappedDirectory, runtime::{ module_cache::{FileSystemCache, ModuleCache}, - resolver::{PackageResolver, RegistryResolver}, + package_loader::{BuiltinPackageLoader, PackageLoader}, + resolver::{ + FileSystemSource, InMemorySource, MultiSource, PackageSpecifier, Source, WapmSource, + WebSource, + }, task_manager::tokio::TokioTaskManager, }, types::__WASI_STDIN_FILENO, wasmer_wasix_types::wasi::Errno, PluggableRuntime, RewindState, WasiEnv, WasiEnvBuilder, WasiError, WasiFunctionEnv, - WasiVersion, + WasiRuntime, WasiVersion, }; -use clap::Parser; +use crate::utils::{parse_envvar, parse_mapdir}; use super::RunWithPathBuf; @@ -62,7 +66,7 @@ pub struct Wasi { /// List of other containers this module depends on #[clap(long = "use", name = "USE")] - uses: Vec, + pub(crate) uses: Vec, /// List of webc packages that are explicitly included for execution /// Note: these packages will be used instead of those in the registry @@ -104,6 +108,10 @@ pub struct Wasi { /// Require WASI modules to only import 1 version of WASI. #[clap(long = "deny-multiple-wasi-versions")] pub deny_multiple_wasi_versions: bool, + + /// The registry to use. + #[clap(long, env = "WASMER_REGISTRY", value_parser = parse_registry)] + pub registry: Option, } pub struct RunProperties { @@ -144,10 +152,10 @@ impl Wasi { pub fn prepare( &self, - store: &mut impl AsStoreMut, module: &Module, program_name: String, args: Vec, + rt: Arc, ) -> Result { let args = args.into_iter().map(|arg| arg.into_bytes()); @@ -158,17 +166,22 @@ impl Wasi { .map(|(a, b)| (a.to_string(), b.to_string())) .collect::>(); - let engine = store.as_store_mut().engine().clone(); - - let rt = self - .prepare_runtime(engine) - .context("Unable to prepare the wasi runtime")?; + let mut uses = Vec::new(); + for name in &self.uses { + let specifier = PackageSpecifier::parse(name) + .with_context(|| format!("Unable to parse \"{name}\" as a package specifier"))?; + let pkg = rt + .task_manager() + .block_on(BinaryPackage::from_registry(&specifier, &*rt)) + .with_context(|| format!("Unable to load \"{name}\""))?; + uses.push(pkg); + } let builder = WasiEnv::builder(program_name) - .runtime(Arc::new(rt)) + .runtime(Arc::clone(&rt)) .args(args) .envs(self.env_vars.clone()) - .uses(self.uses.clone()) + .uses(uses) .map_commands(map_commands); let mut builder = if wasmer_wasix::is_wasix_module(module) { @@ -227,8 +240,13 @@ impl Wasi { Ok(builder) } - fn prepare_runtime(&self, engine: Engine) -> Result { - let mut rt = PluggableRuntime::new(Arc::new(TokioTaskManager::shared())); + pub fn prepare_runtime( + &self, + engine: Engine, + wasmer_dir: &Path, + handle: Handle, + ) -> Result { + let mut rt = PluggableRuntime::new(Arc::new(TokioTaskManager::new(handle))); if self.networking { rt.set_networking_implementation(virtual_net::host::LocalNetworking::default()); @@ -242,16 +260,23 @@ impl Wasi { rt.set_tty(tty); } - let wasmer_home = WasmerConfig::get_wasmer_dir().map_err(anyhow::Error::msg)?; + let client = + wasmer_wasix::http::default_http_client().context("No HTTP client available")?; + let client = Arc::new(client); - let resolver = self - .prepare_resolver(&wasmer_home) - .context("Unable to prepare the package resolver")?; + let package_loader = self + .prepare_package_loader(wasmer_dir, client.clone()) + .context("Unable to prepare the package loader")?; + + let registry = self.prepare_source(wasmer_dir, client)?; + + let cache_dir = FileSystemCache::default_cache_dir(wasmer_dir); let module_cache = wasmer_wasix::runtime::module_cache::in_memory() - .and_then(FileSystemCache::new(wasmer_home.join("compiled"))); + .with_fallback(FileSystemCache::new(cache_dir)); - rt.set_resolver(resolver) + rt.set_package_loader(package_loader) .set_module_cache(module_cache) + .set_source(registry) .set_engine(Some(engine)); Ok(rt) @@ -260,13 +285,15 @@ impl Wasi { /// Helper function for instantiating a module with Wasi imports for the `Run` command. pub fn instantiate( &self, - store: &mut impl AsStoreMut, module: &Module, program_name: String, args: Vec, + runtime: Arc, + store: &mut Store, ) -> Result<(WasiFunctionEnv, Instance)> { - let builder = self.prepare(store, module, program_name, args)?; + let builder = self.prepare(module, program_name, args, runtime)?; let (instance, wasi_env) = builder.instantiate(module.clone(), store)?; + Ok((wasi_env, instance)) } @@ -439,37 +466,61 @@ impl Wasi { }) } - fn prepare_resolver(&self, wasmer_home: &Path) -> Result { - let mut resolver = wapm_resolver(wasmer_home)?; + fn prepare_package_loader( + &self, + wasmer_dir: &Path, + client: Arc, + ) -> Result { + let checkout_dir = BuiltinPackageLoader::default_cache_dir(wasmer_dir); + let loader = BuiltinPackageLoader::new_with_client(checkout_dir, Arc::new(client)); + Ok(loader) + } + fn prepare_source( + &self, + wasmer_dir: &Path, + client: Arc, + ) -> Result { + let mut source = MultiSource::new(); + + // Note: This should be first so our "preloaded" sources get a chance to + // override the main registry. + let mut preloaded = InMemorySource::new(); for path in &self.include_webcs { - let pkg = preload_webc(path) + preloaded + .add_webc(path) .with_context(|| format!("Unable to load \"{}\"", path.display()))?; - resolver.add_preload(pkg); } + source.add_source(preloaded); - Ok(resolver.with_cache()) + let graphql_endpoint = self.graphql_endpoint(wasmer_dir)?; + source.add_source(WapmSource::new(graphql_endpoint, Arc::clone(&client))); + + let cache_dir = WebSource::default_cache_dir(wasmer_dir); + source.add_source(WebSource::new(cache_dir, client)); + + source.add_source(FileSystemSource::default()); + + Ok(source) } -} -fn wapm_resolver(wasmer_home: &Path) -> Result { - // FIXME(Michael-F-Bryan): Ideally, all of this would in the - // RegistryResolver::from_env() constructor, but we don't want to add - // wasmer-registry as a dependency of wasmer-wasix just yet. - let cache_dir = wasmer_registry::get_webc_dir(wasmer_home); - let config = - wasmer_registry::WasmerConfig::from_file(wasmer_home).map_err(anyhow::Error::msg)?; + fn graphql_endpoint(&self, wasmer_dir: &Path) -> Result { + if let Some(endpoint) = &self.registry { + return Ok(endpoint.clone()); + } - let registry = config.registry.get_graphql_url(); - let registry = registry - .parse() - .with_context(|| format!("Unable to parse \"{registry}\" as a URL"))?; + let config = + wasmer_registry::WasmerConfig::from_file(wasmer_dir).map_err(anyhow::Error::msg)?; + let graphql_endpoint = config.registry.get_graphql_url(); + let graphql_endpoint = graphql_endpoint + .parse() + .with_context(|| format!("Unable to parse \"{graphql_endpoint}\" as a URL"))?; - Ok(RegistryResolver::new(cache_dir, registry)) + Ok(graphql_endpoint) + } } -fn preload_webc(path: &Path) -> Result { - let bytes = std::fs::read(path)?; - let webc = wasmer_wasix::wapm::parse_static_webc(bytes)?; - Ok(webc) +fn parse_registry(r: &str) -> Result { + let url = wasmer_registry::format_graphql(r).parse()?; + Ok(url) } diff --git a/lib/cli/src/commands/run_unstable.rs b/lib/cli/src/commands/run_unstable.rs index d9d5cbdcb25..d3817d55ae4 100644 --- a/lib/cli/src/commands/run_unstable.rs +++ b/lib/cli/src/commands/run_unstable.rs @@ -2,47 +2,64 @@ use std::{ collections::BTreeMap, - fmt::Display, + fmt::{Binary, Display}, fs::File, io::{ErrorKind, LineWriter, Read, Write}, net::SocketAddr, path::{Path, PathBuf}, str::FromStr, - sync::Mutex, + sync::{Arc, Mutex}, time::{Duration, SystemTime}, }; use anyhow::{Context, Error}; use clap::Parser; use clap_verbosity_flag::WarnLevel; +use once_cell::sync::Lazy; use sha2::{Digest, Sha256}; use tempfile::NamedTempFile; +use tokio::runtime::Handle; use url::Url; use wapm_targz_to_pirita::FileMap; use wasmer::{ DeserializeError, Engine, Function, Imports, Instance, Module, Store, Type, TypedFunction, Value, }; -use wasmer_cache::Cache; #[cfg(feature = "compiler")] use wasmer_compiler::ArtifactBuild; use wasmer_registry::Package; -use wasmer_wasix::runners::wcgi::AbortHandle; -use wasmer_wasix::runners::{MappedDirectory, Runner}; +use wasmer_wasix::{ + bin_factory::BinaryPackage, + runners::{MappedDirectory, Runner}, + runtime::resolver::PackageSpecifier, +}; +use wasmer_wasix::{ + runners::{ + emscripten::EmscriptenRunner, + wasi::WasiRunner, + wcgi::{AbortHandle, WcgiRunner}, + }, + WasiRuntime, +}; use webc::{metadata::Manifest, v1::DirOrFile, Container}; -use crate::{ - store::StoreOptions, - wasmer_home::{DownloadCached, ModuleCache, WasmerHome}, -}; +use crate::store::StoreOptions; + +static WASMER_HOME: Lazy = Lazy::new(|| { + wasmer_registry::WasmerConfig::get_wasmer_dir() + .ok() + .or_else(|| dirs::home_dir().map(|home| home.join(".wasmer"))) + .unwrap_or_else(|| PathBuf::from(".wasmer")) +}); /// The unstable `wasmer run` subcommand. #[derive(Debug, Parser)] pub struct RunUnstable { #[clap(flatten)] verbosity: clap_verbosity_flag::Verbosity, - #[clap(flatten)] - wasmer_home: WasmerHome, + /// The Wasmer home directory. + #[clap(long = "wasmer-dir", env = "WASMER_DIR", default_value = WASMER_HOME.as_os_str())] + wasmer_dir: PathBuf, #[clap(flatten)] store: StoreOptions, #[clap(flatten)] @@ -69,52 +86,61 @@ pub struct RunUnstable { impl RunUnstable { pub fn execute(&self) -> Result<(), Error> { crate::logging::set_up_logging(self.verbosity.log_level_filter()); + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + let handle = runtime.handle().clone(); + + #[cfg(feature = "sys")] + if self.stack_size.is_some() { + wasmer_vm::set_stack_size(self.stack_size.unwrap()); + } + + let (mut store, _) = self.store.get_store()?; + let runtime = + self.wasi + .prepare_runtime(store.engine().clone(), &self.wasmer_dir, handle)?; let target = self .input - .resolve_target(&self.wasmer_home) + .resolve_target(&runtime) .with_context(|| format!("Unable to resolve \"{}\"", self.input))?; - let (mut store, _) = self.store.get_store()?; - - let mut cache = self.wasmer_home.module_cache(); - let result = match target.load(&mut cache, &store)? { - ExecutableTarget::WebAssembly(wasm) => self.execute_wasm(&target, &wasm, &mut store), - ExecutableTarget::Webc(container) => { - self.execute_webc(&target, container, cache, &mut store) - } - }; + let result = self.execute_target(target, Arc::new(runtime), &mut store); if let Err(e) = &result { - #[cfg(feature = "coredump")] - if let Some(coredump) = &self.coredump_on_trap { - if let Err(e) = generate_coredump(e, target.path(), coredump) { - tracing::warn!( - error = &*e as &dyn std::error::Error, - coredump_path=%coredump.display(), - "Unable to generate a coredump", - ); - } - } + self.maybe_save_coredump(e); } result } + fn execute_target( + &self, + executable_target: ExecutableTarget, + runtime: Arc, + store: &mut Store, + ) -> Result<(), Error> { + match executable_target { + ExecutableTarget::WebAssembly { module, path } => { + self.execute_wasm(&path, &module, store, runtime) + } + ExecutableTarget::Package(pkg) => self.execute_webc(&pkg, runtime), + } + } + + #[tracing::instrument(skip_all)] fn execute_wasm( &self, - target: &TargetOnDisk, + path: &Path, module: &Module, store: &mut Store, + runtime: Arc, ) -> Result<(), Error> { - #[cfg(feature = "sys")] - if self.stack_size.is_some() { - wasmer_vm::set_stack_size(self.stack_size.unwrap()); - } if wasmer_emscripten::is_emscripten_module(module) { self.execute_emscripten_module() } else if wasmer_wasix::is_wasi_module(module) || wasmer_wasix::is_wasix_module(module) { - self.execute_wasi_module(target.path(), module, store) + self.execute_wasi_module(path, module, runtime, store) } else { self.execute_pure_wasm_module(module, store) } @@ -123,93 +149,106 @@ impl RunUnstable { #[tracing::instrument(skip_all)] fn execute_webc( &self, - target: &TargetOnDisk, - container: Container, - mut cache: ModuleCache, - store: &mut Store, + pkg: &BinaryPackage, + runtime: Arc, ) -> Result<(), Error> { - #[cfg(feature = "sys")] - if self.stack_size.is_some() { - wasmer_vm::set_stack_size(self.stack_size.unwrap()); - } let id = match self.entrypoint.as_deref() { Some(cmd) => cmd, - None => infer_webc_entrypoint(container.manifest())?, + None => infer_webc_entrypoint(pkg)?, }; - let command = container - .manifest() - .commands - .get(id) + let cmd = pkg + .get_command(id) .with_context(|| format!("Unable to get metadata for the \"{id}\" command"))?; - let (store, _compiler_type) = self.store.get_store()?; - let runner_base = command - .runner - .as_str() - .split_once('@') - .map(|(base, version)| base) - .unwrap_or_else(|| command.runner.as_str()); - - let cache = Mutex::new(cache); - - match runner_base { - webc::metadata::annotations::EMSCRIPTEN_RUNNER_URI => { - let mut runner = wasmer_wasix::runners::emscripten::EmscriptenRunner::new(store); - runner.set_args(self.args.clone()); - if runner.can_run_command(id, command).unwrap_or(false) { - return runner - .run_cmd(&container, id) - .context("Emscripten runner failed"); - } - } - webc::metadata::annotations::WCGI_RUNNER_URI => { - let mut runner = wasmer_wasix::runners::wcgi::WcgiRunner::new(id).with_compile( - move |engine, bytes| { - let mut cache = cache.lock().unwrap(); - compile_wasm_cached("".to_string(), bytes, &mut cache, engine) - }, - ); + let uses = self.load_injected_packages(&*runtime)?; - runner - .config() - .args(self.args.clone()) - .store(store) - .addr(self.wcgi.addr) - .envs(self.wasi.env_vars.clone()) - .map_directories(self.wasi.mapped_dirs.clone()) - .callbacks(Callbacks::new(self.wcgi.addr)); - if self.wasi.forward_host_env { - runner.config().forward_host_env(); - } - if runner.can_run_command(id, command).unwrap_or(false) { - return runner.run_cmd(&container, id).context("WCGI runner failed"); - } - } - // TODO: Add this on the webc annotation itself - "https://webc.org/runner/wasi/command" - | webc::metadata::annotations::WASI_RUNNER_URI => { - let mut runner = wasmer_wasix::runners::wasi::WasiRunner::new(store) - .with_compile(move |engine, bytes| { - let mut cache = cache.lock().unwrap(); - compile_wasm_cached("".to_string(), bytes, &mut cache, engine) - }) - .with_args(self.args.clone()) - .with_envs(self.wasi.env_vars.clone()) - .with_mapped_directories(self.wasi.mapped_dirs.clone()); - if self.wasi.forward_host_env { - runner.set_forward_host_env(); - } - if runner.can_run_command(id, command).unwrap_or(false) { - return runner.run_cmd(&container, id).context("WASI runner failed"); - } - } - _ => {} + if WcgiRunner::can_run_command(cmd.metadata())? { + self.run_wcgi(id, pkg, uses, runtime) + } else if WasiRunner::can_run_command(cmd.metadata())? { + self.run_wasi(id, pkg, uses, runtime) + } else if EmscriptenRunner::can_run_command(cmd.metadata())? { + self.run_emscripten(id, pkg, runtime) + } else { + anyhow::bail!( + "Unable to find a runner that supports \"{}\"", + cmd.metadata().runner + ); } + } - anyhow::bail!( - "Unable to find a runner that supports \"{}\"", - command.runner - ); + #[tracing::instrument(skip_all)] + fn load_injected_packages( + &self, + runtime: &dyn WasiRuntime, + ) -> Result, Error> { + let mut dependencies = Vec::new(); + + for name in &self.wasi.uses { + let specifier = PackageSpecifier::parse(name) + .with_context(|| format!("Unable to parse \"{name}\" as a package specifier"))?; + let pkg = runtime + .task_manager() + .block_on(BinaryPackage::from_registry(&specifier, runtime)) + .with_context(|| format!("Unable to load \"{name}\""))?; + dependencies.push(pkg); + } + + Ok(dependencies) + } + + fn run_wasi( + &self, + command_name: &str, + pkg: &BinaryPackage, + uses: Vec, + runtime: Arc, + ) -> Result<(), Error> { + let mut runner = wasmer_wasix::runners::wasi::WasiRunner::new() + .with_args(self.args.clone()) + .with_envs(self.wasi.env_vars.clone()) + .with_mapped_directories(self.wasi.mapped_dirs.clone()) + .with_injected_packages(uses); + if self.wasi.forward_host_env { + runner.set_forward_host_env(); + } + + runner.run_command(command_name, pkg, runtime) + } + + fn run_wcgi( + &self, + command_name: &str, + pkg: &BinaryPackage, + uses: Vec, + runtime: Arc, + ) -> Result<(), Error> { + let mut runner = wasmer_wasix::runners::wcgi::WcgiRunner::new(); + + runner + .config() + .args(self.args.clone()) + .addr(self.wcgi.addr) + .envs(self.wasi.env_vars.clone()) + .map_directories(self.wasi.mapped_dirs.clone()) + .callbacks(Callbacks::new(self.wcgi.addr)) + .inject_packages(uses); + if self.wasi.forward_host_env { + runner.config().forward_host_env(); + } + + runner.run_command(command_name, pkg, runtime) + } + + fn run_emscripten( + &self, + command_name: &str, + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error> { + let mut runner = wasmer_wasix::runners::emscripten::EmscriptenRunner::new(); + runner.set_args(self.args.clone()); + + runner.run_command(command_name, pkg, runtime) } #[tracing::instrument(skip_all)] @@ -249,20 +288,37 @@ impl RunUnstable { &self, wasm_path: &Path, module: &Module, + runtime: Arc, store: &mut Store, ) -> Result<(), Error> { let program_name = wasm_path.display().to_string(); + let builder = self .wasi - .prepare(store, module, program_name, self.args.clone())?; + .prepare(module, program_name, self.args.clone(), runtime)?; builder.run_with_store(module.clone(), store)?; + Ok(()) } #[tracing::instrument(skip_all)] fn execute_emscripten_module(&self) -> Result<(), Error> { - todo!() + anyhow::bail!("Emscripten packages are not currently supported") + } + + #[allow(unused_variables)] + fn maybe_save_coredump(&self, e: &Error) { + #[cfg(feature = "coredump")] + if let Some(coredump) = &self.coredump_on_trap { + if let Err(e) = generate_coredump(e, self.input.to_string(), coredump) { + tracing::warn!( + error = &*e as &dyn std::error::Error, + coredump_path=%coredump.display(), + "Unable to generate a coredump", + ); + } + } } } @@ -309,70 +365,34 @@ fn parse_value(s: &str, ty: wasmer_types::Type) -> Result { Ok(value) } -fn infer_webc_entrypoint(manifest: &Manifest) -> Result<&str, Error> { - if let Some(entrypoint) = manifest.entrypoint.as_deref() { +fn infer_webc_entrypoint(pkg: &BinaryPackage) -> Result<&str, Error> { + if let Some(entrypoint) = pkg.entrypoint_cmd.as_deref() { return Ok(entrypoint); } - let commands: Vec<_> = manifest.commands.keys().collect(); - - match commands.as_slice() { + match pkg.commands.as_slice() { [] => anyhow::bail!("The WEBC file doesn't contain any executable commands"), - [one] => Ok(one.as_str()), + [one] => Ok(one.name()), [..] => { + let mut commands: Vec<_> = pkg.commands.iter().map(|cmd| cmd.name()).collect(); + commands.sort(); anyhow::bail!( - "Unable to determine the WEBC file's entrypoint. Please choose one of {commands:?}" + "Unable to determine the WEBC file's entrypoint. Please choose one of {:?}", + commands, ); } } } -fn compile_directory_to_webc(dir: &Path) -> Result, Error> { - let mut files = BTreeMap::new(); - load_files_from_disk(&mut files, dir, dir)?; - - let wasmer_toml = DirOrFile::File("wasmer.toml".into()); - if let Some(toml_data) = files.remove(&wasmer_toml) { - // HACK(Michael-F-Bryan): The version of wapm-targz-to-pirita we are - // using doesn't know we renamed "wapm.toml" to "wasmer.toml", so we - // manually patch things up if people have already migrated their - // projects. - files - .entry(DirOrFile::File("wapm.toml".into())) - .or_insert(toml_data); - } - - let functions = wapm_targz_to_pirita::TransformManifestFunctions::default(); - wapm_targz_to_pirita::generate_webc_file(files, dir, None, &functions) -} - -fn load_files_from_disk(files: &mut FileMap, dir: &Path, base: &Path) -> Result<(), Error> { - let entries = dir - .read_dir() - .with_context(|| format!("Unable to read the contents of \"{}\"", dir.display()))?; - - for entry in entries { - let path = entry?.path(); - let relative_path = path.strip_prefix(base)?.to_path_buf(); - - if path.is_dir() { - load_files_from_disk(files, &path, base)?; - files.insert(DirOrFile::Dir(relative_path), Vec::new()); - } else if path.is_file() { - let data = std::fs::read(&path) - .with_context(|| format!("Unable to read \"{}\"", path.display()))?; - files.insert(DirOrFile::File(relative_path), data); - } - } - Ok(()) -} - +/// The input that was passed in via the command-line. #[derive(Debug, Clone, PartialEq)] enum PackageSource { + /// A file on disk (`*.wasm`, `*.webc`, etc.). File(PathBuf), + /// A directory containing a `wasmer.toml` file Dir(PathBuf), - Package(Package), - Url(Url), + /// A package to be downloaded (a URL, package name, etc.) + Package(PackageSpecifier), } impl PackageSource { @@ -384,11 +404,7 @@ impl PackageSource { return Ok(PackageSource::Dir(path.to_path_buf())); } - if let Ok(url) = Url::parse(s) { - return Ok(PackageSource::Url(url)); - } - - if let Ok(pkg) = Package::from_str(s) { + if let Ok(pkg) = PackageSpecifier::parse(s) { return Ok(PackageSource::Package(pkg)); } @@ -397,21 +413,19 @@ impl PackageSource { )) } - /// Try to resolve the [`PackageSource`] to an artifact on disk. + /// Try to resolve the [`PackageSource`] to an executable artifact. /// /// This will try to automatically download and cache any resources from the /// internet. - fn resolve_target(&self, home: &impl DownloadCached) -> Result { + fn resolve_target(&self, rt: &dyn WasiRuntime) -> Result { match self { - PackageSource::File(path) => TargetOnDisk::from_file(path.clone()), - PackageSource::Dir(d) => Ok(TargetOnDisk::Directory(d.clone())), + PackageSource::File(path) => ExecutableTarget::from_file(path, rt), + PackageSource::Dir(d) => ExecutableTarget::from_dir(d, rt), PackageSource::Package(pkg) => { - let cached = home.download_package(pkg)?; - Ok(TargetOnDisk::Webc(cached)) - } - PackageSource::Url(url) => { - let cached = home.download_url(url)?; - Ok(TargetOnDisk::Webc(cached)) + let pkg = rt + .task_manager() + .block_on(BinaryPackage::from_registry(pkg, rt))?; + Ok(ExecutableTarget::Package(pkg)) } } } @@ -422,176 +436,150 @@ impl Display for PackageSource { match self { PackageSource::File(path) | PackageSource::Dir(path) => write!(f, "{}", path.display()), PackageSource::Package(p) => write!(f, "{p}"), - PackageSource::Url(u) => write!(f, "{u}"), } } } -/// A file/directory on disk that will be executed. -/// -/// Depending on the type of target and the command-line arguments, this might -/// be something the user passed in manually or something that was automatically -/// saved to `$WASMER_HOME` for caching purposes. +/// We've been given the path for a file... What does it contain and how should +/// that be run? #[derive(Debug, Clone)] enum TargetOnDisk { - WebAssemblyBinary(PathBuf), - Wat(PathBuf), - Webc(PathBuf), - Directory(PathBuf), - Artifact(PathBuf), + WebAssemblyBinary, + Wat, + LocalWebc, + Artifact, } impl TargetOnDisk { - fn from_file(path: PathBuf) -> Result { + fn from_file(path: &Path) -> Result { // Normally the first couple hundred bytes is enough to figure // out what type of file this is. let mut buffer = [0_u8; 512]; - let mut f = File::open(&path) + let mut f = File::open(path) .with_context(|| format!("Unable to open \"{}\" for reading", path.display(),))?; let bytes_read = f.read(&mut buffer)?; let leading_bytes = &buffer[..bytes_read]; if wasmer::is_wasm(leading_bytes) { - return Ok(TargetOnDisk::WebAssemblyBinary(path)); + return Ok(TargetOnDisk::WebAssemblyBinary); } if webc::detect(leading_bytes).is_ok() { - return Ok(TargetOnDisk::Webc(path)); + return Ok(TargetOnDisk::LocalWebc); } #[cfg(feature = "compiler")] if ArtifactBuild::is_deserializable(leading_bytes) { - return Ok(TargetOnDisk::Artifact(path)); + return Ok(TargetOnDisk::Artifact); } // If we can't figure out the file type based on its content, fall back // to checking the extension. match path.extension().and_then(|s| s.to_str()) { - Some("wat") => Ok(TargetOnDisk::Wat(path)), + Some("wat") => Ok(TargetOnDisk::Wat), + Some("wasm") => Ok(TargetOnDisk::WebAssemblyBinary), + Some("webc") => Ok(TargetOnDisk::LocalWebc), + Some("wasmu") => Ok(TargetOnDisk::WebAssemblyBinary), _ => anyhow::bail!("Unable to determine how to execute \"{}\"", path.display()), } } +} - fn path(&self) -> &Path { - match self { - TargetOnDisk::WebAssemblyBinary(p) - | TargetOnDisk::Webc(p) - | TargetOnDisk::Wat(p) - | TargetOnDisk::Directory(p) - | TargetOnDisk::Artifact(p) => p, +#[derive(Debug, Clone)] +enum ExecutableTarget { + WebAssembly { module: Module, path: PathBuf }, + Package(BinaryPackage), +} + +impl ExecutableTarget { + /// Try to load a Wasmer package from a directory containing a `wasmer.toml` + /// file. + #[tracing::instrument(skip_all)] + fn from_dir(dir: &Path, runtime: &dyn WasiRuntime) -> Result { + let mut files = BTreeMap::new(); + load_files_from_disk(&mut files, dir, dir)?; + + let wasmer_toml = DirOrFile::File("wasmer.toml".into()); + if let Some(toml_data) = files.remove(&wasmer_toml) { + // HACK(Michael-F-Bryan): The version of wapm-targz-to-pirita we are + // using doesn't know we renamed "wapm.toml" to "wasmer.toml", so we + // manually patch things up if people have already migrated their + // projects. + files + .entry(DirOrFile::File("wapm.toml".into())) + .or_insert(toml_data); } + + let functions = wapm_targz_to_pirita::TransformManifestFunctions::default(); + let webc = wapm_targz_to_pirita::generate_webc_file(files, dir, None, &functions)?; + + let container = Container::from_bytes(webc)?; + let pkg = runtime + .task_manager() + .block_on(BinaryPackage::from_webc(&container, runtime))?; + + Ok(ExecutableTarget::Package(pkg)) } - fn load(&self, cache: &mut ModuleCache, store: &Store) -> Result { - match self { - TargetOnDisk::Webc(webc) => { - // As an optimisation, try to use the mmapped version first. - if let Ok(container) = Container::from_disk(webc.clone()) { - return Ok(ExecutableTarget::Webc(container)); - } - - // Otherwise, fall back to the version that reads everything - // into memory. - let bytes = std::fs::read(webc) - .with_context(|| format!("Unable to read \"{}\"", webc.display()))?; - let container = Container::from_bytes(bytes)?; - - Ok(ExecutableTarget::Webc(container)) - } - TargetOnDisk::Directory(dir) => { - // FIXME: Runners should be able to load directories directly - // instead of needing to compile to a WEBC file. - let webc = compile_directory_to_webc(dir).with_context(|| { - format!("Unable to bundle \"{}\" as a WEBC package", dir.display()) - })?; - let container = Container::from_bytes(webc) - .context("Unable to parse the generated WEBC file")?; - - Ok(ExecutableTarget::Webc(container)) - } - TargetOnDisk::WebAssemblyBinary(path) => { - let wasm = std::fs::read(path) - .with_context(|| format!("Unable to read \"{}\"", path.display()))?; - let module = - compile_wasm_cached(path.display().to_string(), &wasm, cache, store.engine())?; - Ok(ExecutableTarget::WebAssembly(module)) + /// Try to load a file into something that can be used to run it. + #[tracing::instrument(skip_all)] + fn from_file(path: &Path, runtime: &dyn WasiRuntime) -> Result { + match TargetOnDisk::from_file(path)? { + TargetOnDisk::WebAssemblyBinary | TargetOnDisk::Wat => { + let wasm = std::fs::read(path)?; + let engine = runtime.engine().context("No engine available")?; + let module = Module::new(&engine, &wasm)?; + Ok(ExecutableTarget::WebAssembly { + module, + path: path.to_path_buf(), + }) } - TargetOnDisk::Wat(path) => { - let wat = std::fs::read(path) - .with_context(|| format!("Unable to read \"{}\"", path.display()))?; - let wasm = - wasmer::wat2wasm(&wat).context("Unable to convert the WAT to WebAssembly")?; - - let module = - compile_wasm_cached(path.display().to_string(), &wasm, cache, store.engine())?; - Ok(ExecutableTarget::WebAssembly(module)) + TargetOnDisk::Artifact => { + let engine = runtime.engine().context("No engine available")?; + let module = unsafe { Module::deserialize_from_file(&engine, path)? }; + + Ok(ExecutableTarget::WebAssembly { + module, + path: path.to_path_buf(), + }) } - TargetOnDisk::Artifact(artifact) => { - let module = unsafe { - Module::deserialize_from_file(store, artifact) - .context("Unable to deserialize the pre-compiled module")? - }; - Ok(ExecutableTarget::WebAssembly(module)) + TargetOnDisk::LocalWebc => { + let container = Container::from_disk(path)?; + let pkg = runtime + .task_manager() + .block_on(BinaryPackage::from_webc(&container, runtime))?; + Ok(ExecutableTarget::Package(pkg)) } } } } -fn compile_wasm_cached( - name: String, - wasm: &[u8], - cache: &mut ModuleCache, - engine: &Engine, -) -> Result { - tracing::debug!("Trying to retrieve module from cache"); - - let hash = wasmer_cache::Hash::generate(wasm); - tracing::debug!("Generated hash: {}", hash); - - unsafe { - match cache.load(engine, hash) { - Ok(m) => { - tracing::debug!(%hash, "Module loaded from cache"); - return Ok(m); - } - Err(DeserializeError::Io(e)) if e.kind() == ErrorKind::NotFound => {} - Err(error) => { - tracing::warn!( - %hash, - error=&error as &dyn std::error::Error, - name=%name, - "Unable to deserialize the cached module", - ); - } - } - } +fn load_files_from_disk(files: &mut FileMap, dir: &Path, base: &Path) -> Result<(), Error> { + let entries = dir + .read_dir() + .with_context(|| format!("Unable to read the contents of \"{}\"", dir.display()))?; - let mut module = Module::new(engine, wasm).context("Unable to load the module from a file")?; - module.set_name(&name); + for entry in entries { + let path = entry?.path(); + let relative_path = path.strip_prefix(base)?.to_path_buf(); - if let Err(e) = cache.store(hash, &module) { - tracing::warn!( - error=&e as &dyn std::error::Error, - wat=%name, - key=%hash, - "Unable to cache the compiled module", - ); + if path.is_dir() { + load_files_from_disk(files, &path, base)?; + files.insert(DirOrFile::Dir(relative_path), Vec::new()); + } else if path.is_file() { + let data = std::fs::read(&path) + .with_context(|| format!("Unable to read \"{}\"", path.display()))?; + files.insert(DirOrFile::File(relative_path), data); + } } - - Ok(module) -} - -#[derive(Debug, Clone)] -enum ExecutableTarget { - WebAssembly(Module), - Webc(Container), + Ok(()) } #[cfg(feature = "coredump")] -fn generate_coredump(err: &Error, source: &Path, coredump_path: &Path) -> Result<(), Error> { +fn generate_coredump(err: &Error, source_name: String, coredump_path: &Path) -> Result<(), Error> { let err: &wasmer::RuntimeError = match err.downcast_ref() { Some(e) => e, None => { @@ -600,7 +588,6 @@ fn generate_coredump(err: &Error, source: &Path, coredump_path: &Path) -> Result } }; - let source_name = source.display().to_string(); let mut coredump_builder = wasm_coredump_builder::CoredumpBuilder::new().executable_name(&source_name); diff --git a/lib/cli/src/lib.rs b/lib/cli/src/lib.rs index 12c0f8f95e8..5e9d4597aad 100644 --- a/lib/cli/src/lib.rs +++ b/lib/cli/src/lib.rs @@ -27,7 +27,6 @@ pub mod package_source; pub mod store; pub mod suggestions; pub mod utils; -pub mod wasmer_home; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/lib/cli/src/wasmer_home.rs b/lib/cli/src/wasmer_home.rs deleted file mode 100644 index 786b1d2343d..00000000000 --- a/lib/cli/src/wasmer_home.rs +++ /dev/null @@ -1,399 +0,0 @@ -#![allow(missing_docs)] - -use std::{ - io::Write, - path::{Path, PathBuf}, - time::{Duration, SystemTime}, -}; - -use anyhow::{Context, Error}; -use reqwest::{blocking::Client, Url}; -use tempfile::NamedTempFile; -use wasmer::{AsEngineRef, DeserializeError, Module, SerializeError}; -use wasmer_cache::Hash; -use wasmer_registry::Package; - -const DEFAULT_REGISTRY: &str = "https://wapm.io/"; -const CACHE_INVALIDATION_THRESHOLD: Duration = Duration::from_secs(5 * 60); - -/// Something which can fetch resources from the internet and will cache them -/// locally. -pub trait DownloadCached { - fn download_url(&self, url: &Url) -> Result; - fn download_package(&self, pkg: &Package) -> Result; -} - -#[derive(Debug, clap::Parser)] -pub struct WasmerHome { - /// The Wasmer home directory. - #[clap(long = "wasmer-dir", env = "WASMER_DIR")] - pub home: Option, - /// Override the registry packages are downloaded from. - #[clap(long, env = "WASMER_REGISTRY")] - registry: Option, - /// Skip all caching. - #[clap(long)] - pub disable_cache: bool, -} - -impl WasmerHome { - pub fn wasmer_home(&self) -> Result { - if let Some(wasmer_home) = &self.home { - return Ok(wasmer_home.clone()); - } - - if let Some(user_home) = dirs::home_dir() { - return Ok(user_home.join(".wasmer")); - } - - anyhow::bail!("Unable to determine the Wasmer directory"); - } - - pub fn module_cache(&self) -> ModuleCache { - if self.disable_cache { - return ModuleCache::Disabled; - }; - - self.wasmer_home() - .ok() - .and_then(|home| wasmer_cache::FileSystemCache::new(home.join("cache")).ok()) - .map(ModuleCache::Enabled) - .unwrap_or(ModuleCache::Disabled) - } -} - -impl DownloadCached for WasmerHome { - #[tracing::instrument(skip_all)] - fn download_url(&self, url: &Url) -> Result { - tracing::debug!(%url, "Downloading"); - - let home = self.wasmer_home()?; - let checkouts = wasmer_registry::get_checkouts_dir(&home); - - // This function is a bit tricky because we go to great lengths to avoid - // unnecessary downloads. - - let cache_key = Hash::generate(url.to_string().as_bytes()); - - // First, we figure out some basic information about the item - let cache_info = CacheInfo::for_url(&cache_key, &checkouts, self.disable_cache); - - // Next we check if we definitely got a cache hit - let state = match classify_cache_using_mtime(cache_info) { - Ok(path) => { - tracing::debug!(path=%path.display(), "Cache hit"); - return Ok(path); - } - Err(s) => s, - }; - - // Okay, looks like we're going to have to download the item - tracing::debug!(%url, "Sending a GET request"); - - let client = Client::new(); - - let request = client.get(url.clone()).header("Accept", "application/webc"); - - let mut response = match request.send() { - Ok(r) => r - .error_for_status() - .with_context(|| format!("The GET request to \"{url}\" was unsuccessful"))?, - Err(e) => { - // Something went wrong. If it was a connection issue and we've - // got a cached file, let's use that and emit a warning. - if e.is_connect() { - if let Some(path) = state.take_path() { - tracing::warn!( - path=%path.display(), - error=&e as &dyn std::error::Error, - "An error occurred while connecting to {}. Falling back to a cached version.", - url.host_str().unwrap_or(url.as_str()), - ); - return Ok(path); - } - } - - // Oh well, we tried. - let msg = format!("Unable to send a GET request to \"{url}\""); - return Err(Error::from(e).context(msg)); - } - }; - - tracing::debug!( - status_code=%response.status(), - url=%response.url(), - content_length=response.content_length(), - "Download started", - ); - tracing::trace!(headers=?response.headers()); - - // Now there is one last chance to avoid downloading the full file. If - // it has an ETag header, we can use that to see whether the (possibly) - // cached file is outdated. - let etag = response - .headers() - .get("Etag") - .and_then(|v| v.to_str().ok()) - .map(|etag| etag.trim().to_string()); - - if let Some(cached) = state.use_etag_to_resolve_cached_file(etag.as_deref()) { - tracing::debug!( - path=%cached.display(), - "Reusing the cached file because the ETag header is still valid", - ); - return Ok(cached); - } - - std::fs::create_dir_all(&checkouts) - .with_context(|| format!("Unable to make sure \"{}\" exists", checkouts.display()))?; - - // Note: we want to copy directly into a file so we don't hold - // everything in memory. - let (mut f, path) = if self.disable_cache { - // Leave the temporary file where it is. The OS will clean it up - // for us later, and hopefully the caller will open it before the - // temp file cleaner comes along. - let temp = NamedTempFile::new().context("Unable to create a temporary file")?; - temp.keep() - .context("Unable to persist the temporary file")? - } else { - let cached_path = checkouts.join(cache_key.to_string()); - let f = std::fs::File::create(&cached_path).with_context(|| { - format!("Unable to open \"{}\" for writing", cached_path.display()) - })?; - - (f, cached_path) - }; - - let bytes_read = std::io::copy(&mut response, &mut f) - .and_then(|bytes_read| f.flush().map(|_| bytes_read)) - .with_context(|| format!("Unable to save the response to \"{}\"", path.display()))?; - tracing::debug!(bytes_read, path=%path.display(), "Saved to disk"); - - if !self.disable_cache { - if let Some(etag) = etag { - let etag_path = path.with_extension("etag"); - tracing::debug!( - path=%etag_path.display(), - %etag, - "Saving the ETag to disk", - ); - - if let Err(e) = std::fs::write(&etag_path, etag.as_bytes()) { - tracing::warn!( - error=&e as &dyn std::error::Error, - path=%etag_path.display(), - %etag, - "Unable to save the ETag to disk", - ); - } - } - } - - Ok(path) - } - - fn download_package(&self, pkg: &Package) -> Result { - let registry = self.registry.as_deref().unwrap_or(DEFAULT_REGISTRY); - let url = package_url(registry, pkg)?; - - self.download_url(&url) - } -} - -#[derive(Debug, Clone, PartialEq)] -enum CacheInfo { - /// Caching has been disabled. - Disabled, - /// An item isn't in the cache, but could be cached later on. - Miss, - /// An item in the cache. - Hit { - path: PathBuf, - etag: Option, - last_modified: Option, - }, -} - -impl CacheInfo { - fn for_url(key: &Hash, checkout_dir: &Path, disabled: bool) -> CacheInfo { - if disabled { - return CacheInfo::Disabled; - } - - let path = checkout_dir.join(key.to_string()); - - if !path.exists() { - return CacheInfo::Miss; - } - - let etag = std::fs::read_to_string(path.with_extension("etag")).ok(); - let last_modified = path.metadata().and_then(|m| m.modified()).ok(); - - CacheInfo::Hit { - etag, - last_modified, - path, - } - } -} - -fn classify_cache_using_mtime(info: CacheInfo) -> Result { - let (path, last_modified, etag) = match info { - CacheInfo::Hit { - path, - last_modified: Some(last_modified), - etag, - .. - } => (path, last_modified, etag), - CacheInfo::Hit { - path, - last_modified: None, - etag: Some(etag), - .. - } => return Err(CacheState::PossiblyDirty { etag, path }), - CacheInfo::Hit { - etag: None, - last_modified: None, - path, - .. - } => { - return Err(CacheState::UnableToVerify { path }); - } - CacheInfo::Disabled | CacheInfo::Miss { .. } => return Err(CacheState::Miss), - }; - - if let Ok(time_since_last_modified) = last_modified.elapsed() { - if time_since_last_modified <= CACHE_INVALIDATION_THRESHOLD { - return Ok(path); - } - } - - match etag { - Some(etag) => Err(CacheState::PossiblyDirty { etag, path }), - None => Err(CacheState::UnableToVerify { path }), - } -} - -/// Classification of how valid an item is based on filesystem metadata. -#[derive(Debug)] -enum CacheState { - /// The item isn't in the cache. - Miss, - /// The cached item might be invalid, but it has an ETag we can use for - /// further validation. - PossiblyDirty { etag: String, path: PathBuf }, - /// The cached item exists on disk, but we weren't able to tell whether it is still - /// valid, and there aren't any other ways to validate it further. You can - /// probably reuse this if you are having internet issues. - UnableToVerify { path: PathBuf }, -} - -impl CacheState { - fn take_path(self) -> Option { - match self { - CacheState::PossiblyDirty { path, .. } | CacheState::UnableToVerify { path } => { - Some(path) - } - _ => None, - } - } - - fn use_etag_to_resolve_cached_file(self, new_etag: Option<&str>) -> Option { - match (new_etag, self) { - ( - Some(new_etag), - CacheState::PossiblyDirty { - etag: cached_etag, - path, - }, - ) if cached_etag == new_etag => Some(path), - _ => None, - } - } -} - -fn package_url(registry: &str, pkg: &Package) -> Result { - let registry: Url = registry - .parse() - .with_context(|| format!("Unable to parse \"{registry}\" as a URL"))?; - - let Package { - name, - namespace, - version, - } = pkg; - - let mut path = format!("{namespace}/{name}"); - if let Some(version) = version { - path.push('@'); - path.push_str(version); - } - - let url = registry - .join(&path) - .context("Unable to construct the package URL")?; - Ok(url) -} - -#[derive(Debug, Clone)] -pub enum ModuleCache { - Enabled(wasmer_cache::FileSystemCache), - Disabled, -} - -impl wasmer_cache::Cache for ModuleCache { - type SerializeError = SerializeError; - type DeserializeError = DeserializeError; - - unsafe fn load( - &self, - engine: &impl AsEngineRef, - key: Hash, - ) -> Result { - match self { - ModuleCache::Enabled(f) => f.load(engine, key), - ModuleCache::Disabled => Err(DeserializeError::Io(std::io::ErrorKind::NotFound.into())), - } - } - - fn store(&mut self, key: Hash, module: &Module) -> Result<(), Self::SerializeError> { - match self { - ModuleCache::Enabled(f) => f.store(key, module), - ModuleCache::Disabled => Ok(()), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn construct_package_urls() { - let inputs = [ - ( - "https://wapm.io/", - "syrusakbary/python", - "https://wapm.io/syrusakbary/python", - ), - ( - "https://wapm.dev", - "syrusakbary/python@1.2.3", - "https://wapm.dev/syrusakbary/python@1.2.3", - ), - ( - "https://localhost:8000/path/to/nested/dir/", - "syrusakbary/python", - "https://localhost:8000/path/to/nested/dir/syrusakbary/python", - ), - ]; - - for (registry, package, expected) in inputs { - let package: Package = package.parse().unwrap(); - - let got = package_url(registry, &package).unwrap(); - assert_eq!(got.to_string(), expected); - } - } -} diff --git a/lib/wasi/Cargo.toml b/lib/wasi/Cargo.toml index ecddf831dc4..e6159cc2aef 100644 --- a/lib/wasi/Cargo.toml +++ b/lib/wasi/Cargo.toml @@ -93,6 +93,7 @@ wasm-bindgen = ">= 0.2.74, < 0.2.85" [dev-dependencies] wasmer = { path = "../api", version = "=3.3.0", default-features = false, features = ["wat", "js-serializable-module"] } tokio = { version = "1", features = [ "sync", "macros", "rt" ], default_features = false } +pretty_assertions = "1.3.0" [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen-test = "0.3.0" diff --git a/lib/wasi/src/bin_factory/binary_package.rs b/lib/wasi/src/bin_factory/binary_package.rs index 8a3e05b7cc9..9df530639e4 100644 --- a/lib/wasi/src/bin_factory/binary_package.rs +++ b/lib/wasi/src/bin_factory/binary_package.rs @@ -1,26 +1,34 @@ -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use derivative::*; use once_cell::sync::OnceCell; use semver::Version; use virtual_fs::FileSystem; -use webc::compat::SharedBytes; +use webc::{compat::SharedBytes, Container}; -use crate::runtime::module_cache::ModuleHash; +use crate::{ + runtime::{ + module_cache::ModuleHash, + resolver::{PackageId, PackageInfo, PackageSpecifier}, + }, + WasiRuntime, +}; #[derive(Derivative, Clone)] #[derivative(Debug)] pub struct BinaryPackageCommand { name: String, + metadata: webc::metadata::Command, #[derivative(Debug = "ignore")] pub(crate) atom: SharedBytes, hash: OnceCell, } impl BinaryPackageCommand { - pub fn new(name: String, atom: SharedBytes) -> Self { + pub fn new(name: String, metadata: webc::metadata::Command, atom: SharedBytes) -> Self { Self { name, + metadata, atom, hash: OnceCell::new(), } @@ -30,6 +38,10 @@ impl BinaryPackageCommand { &self.name } + pub fn metadata(&self) -> &webc::metadata::Command { + &self.metadata + } + /// Get a reference to this [`BinaryPackageCommand`]'s atom. /// /// The address of the returned slice is guaranteed to be stable and live as @@ -44,20 +56,17 @@ impl BinaryPackageCommand { } /// A WebAssembly package that has been loaded into memory. -/// -/// You can crate a [`BinaryPackage`] using a -/// [`crate::runtime::resolver::PackageResolver`] or -/// [`crate::wapm::parse_static_webc()`]. #[derive(Derivative, Clone)] #[derivative(Debug)] pub struct BinaryPackage { pub package_name: String, pub when_cached: Option, - #[derivative(Debug = "ignore")] - pub entry: Option, + /// The name of the [`BinaryPackageCommand`] which is this package's + /// entrypoint. + pub entrypoint_cmd: Option, pub hash: OnceCell, - pub webc_fs: Option>, - pub commands: Arc>>, + pub webc_fs: Arc, + pub commands: Vec, pub uses: Vec, pub version: Version, pub module_memory_footprint: u64, @@ -65,9 +74,64 @@ pub struct BinaryPackage { } impl BinaryPackage { + /// Load a [`webc::Container`] and all its dependencies into a + /// [`BinaryPackage`]. + pub async fn from_webc( + container: &Container, + rt: &dyn WasiRuntime, + ) -> Result { + let source = rt.source(); + let root = PackageInfo::from_manifest(container.manifest())?; + let root_id = PackageId { + package_name: root.name.clone(), + version: root.version.clone(), + }; + + let resolution = crate::runtime::resolver::resolve(&root_id, &root, &*source).await?; + let pkg = rt + .package_loader() + .load_package_tree(container, &resolution) + .await + .map_err(|e| anyhow::anyhow!(e))?; + + Ok(pkg) + } + + /// Load a [`BinaryPackage`] and all its dependencies from a registry. + pub async fn from_registry( + specifier: &PackageSpecifier, + runtime: &dyn WasiRuntime, + ) -> Result { + let source = runtime.source(); + let root_summary = source.latest(specifier).await?; + let root = runtime.package_loader().load(&root_summary).await?; + let id = root_summary.package_id(); + + let resolution = crate::runtime::resolver::resolve(&id, &root_summary.pkg, &source).await?; + let pkg = runtime + .package_loader() + .load_package_tree(&root, &resolution) + .await + .map_err(|e| anyhow::anyhow!(e))?; + + Ok(pkg) + } + + pub fn get_command(&self, name: &str) -> Option<&BinaryPackageCommand> { + self.commands.iter().find(|cmd| cmd.name() == name) + } + + /// Get the bytes for the entrypoint command. + pub fn entrypoint_bytes(&self) -> Option<&[u8]> { + self.entrypoint_cmd + .as_deref() + .and_then(|name| self.get_command(name)) + .map(|entry| entry.atom()) + } + pub fn hash(&self) -> ModuleHash { *self.hash.get_or_init(|| { - if let Some(entry) = self.entry.as_ref() { + if let Some(entry) = self.entrypoint_bytes() { ModuleHash::sha256(entry) } else { ModuleHash::sha256(self.package_name.as_bytes()) diff --git a/lib/wasi/src/bin_factory/exec.rs b/lib/wasi/src/bin_factory/exec.rs index ed6d9e1dd8a..cb8c6e6444f 100644 --- a/lib/wasi/src/bin_factory/exec.rs +++ b/lib/wasi/src/bin_factory/exec.rs @@ -28,10 +28,10 @@ pub async fn spawn_exec( let compiled_modules = runtime.module_cache(); let module = compiled_modules.load(key, store.engine()).await.ok(); - let module = match (module, binary.entry.as_ref()) { + let module = match (module, binary.entrypoint_bytes()) { (Some(a), _) => a, (None, Some(entry)) => { - let module = Module::new(&store, &entry[..]).map_err(|err| { + let module = Module::new(&store, entry).map_err(|err| { error!( "failed to compile module [{}, len={}] - {}", name, diff --git a/lib/wasi/src/bin_factory/mod.rs b/lib/wasi/src/bin_factory/mod.rs index 54dcce4a4e5..307cf86ff79 100644 --- a/lib/wasi/src/bin_factory/mod.rs +++ b/lib/wasi/src/bin_factory/mod.rs @@ -7,6 +7,7 @@ use std::{ use anyhow::Context; use virtual_fs::{AsyncReadExt, FileSystem}; +use webc::Container; mod binary_package; mod exec; @@ -71,7 +72,7 @@ impl BinFactory { // Check the filesystem for the file if name.starts_with('/') { if let Some(fs) = fs { - match load_package_from_filesystem(fs, name.as_ref()).await { + match load_package_from_filesystem(fs, name.as_ref(), self.runtime()).await { Ok(pkg) => { cache.insert(name, Some(pkg.clone())); return Some(pkg); @@ -96,6 +97,7 @@ impl BinFactory { async fn load_package_from_filesystem( fs: &dyn FileSystem, path: &Path, + rt: &dyn WasiRuntime, ) -> Result { let mut f = fs .new_open_options() @@ -105,7 +107,11 @@ async fn load_package_from_filesystem( let mut data = Vec::with_capacity(f.size() as usize); f.read_to_end(&mut data).await.context("Read failed")?; - let pkg = crate::wapm::parse_static_webc(data).context("Unable to parse the package")?; + + let container = Container::from_bytes(data).context("Unable to parse the WEBC file")?; + let pkg = BinaryPackage::from_webc(&container, rt) + .await + .context("Unable to load the package")?; Ok(pkg) } diff --git a/lib/wasi/src/fs/mod.rs b/lib/wasi/src/fs/mod.rs index 2f65502da33..f1f5e3a40a0 100644 --- a/lib/wasi/src/fs/mod.rs +++ b/lib/wasi/src/fs/mod.rs @@ -6,7 +6,7 @@ use std::{ borrow::{Borrow, Cow}, collections::{HashMap, HashSet}, ops::{Deref, DerefMut}, - path::{Path, PathBuf}, + path::{Component, Path, PathBuf}, sync::{ atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}, Arc, Mutex, RwLock, Weak, @@ -409,10 +409,7 @@ impl WasiFs { let mut guard = self.has_unioned.lock().unwrap(); if !guard.contains(&package_name) { guard.insert(package_name); - - if let Some(fs) = binary.webc_fs.clone() { - sandbox_fs.union(&fs); - } + sandbox_fs.union(&binary.webc_fs); } true } @@ -1127,17 +1124,17 @@ impl WasiFs { } } Kind::Root { entries } => { - match component.as_os_str().to_string_lossy().borrow() { + match component { // the root's parent is the root - ".." => continue 'path_iter, + Component::ParentDir => continue 'path_iter, // the root's current directory is the root - "." => continue 'path_iter, - _ => (), + Component::CurDir => continue 'path_iter, + _ => {} } - if let Some(entry) = - entries.get(component.as_os_str().to_string_lossy().as_ref()) - { + let component = component.as_os_str().to_string_lossy(); + + if let Some(entry) = entries.get(component.as_ref()) { cur_inode = entry.clone(); } else { // Root is not capable of having something other then preopenned folders diff --git a/lib/wasi/src/http/mod.rs b/lib/wasi/src/http/mod.rs index 6cd101c705a..5d6c02dba31 100644 --- a/lib/wasi/src/http/mod.rs +++ b/lib/wasi/src/http/mod.rs @@ -6,6 +6,8 @@ pub mod reqwest; pub use self::client::*; +pub(crate) const USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "-", env!("CARGO_PKG_VERSION")); + /// Try to instantiate a HTTP client that is suitable for the current platform. pub fn default_http_client() -> Option { cfg_if::cfg_if! { diff --git a/lib/wasi/src/lib.rs b/lib/wasi/src/lib.rs index 12227a40a8c..c41260f82a2 100644 --- a/lib/wasi/src/lib.rs +++ b/lib/wasi/src/lib.rs @@ -29,6 +29,10 @@ compile_error!( "The `js` feature must be enabled only for the `wasm32` target (either `wasm32-unknown-unknown` or `wasm32-wasi`)." ); +#[cfg(test)] +#[macro_use] +extern crate pretty_assertions; + #[macro_use] mod macros; pub mod bin_factory; @@ -36,6 +40,7 @@ pub mod os; // TODO: should this be pub? pub mod net; // TODO: should this be pub? +pub mod capabilities; pub mod fs; pub mod http; mod rewind; @@ -45,10 +50,6 @@ pub mod runtime; mod state; mod syscalls; mod utils; -pub mod wapm; - -pub mod capabilities; -pub use rewind::*; /// WAI based bindings. mod bindings; @@ -88,21 +89,17 @@ pub use crate::{ }, WasiTtyState, }, + rewind::*, runtime::{ task_manager::{VirtualTaskManager, VirtualTaskManagerExt}, PluggableRuntime, WasiRuntime, }, - wapm::parse_static_webc, -}; - -pub use crate::utils::is_wasix_module; - -pub use crate::{ state::{ WasiEnv, WasiEnvBuilder, WasiEnvInit, WasiFunctionEnv, WasiInstanceHandles, WasiStateCreationError, ALL_RIGHTS, }, syscalls::{rewind, rewind_ext, types, unwind}, + utils::is_wasix_module, utils::{ get_wasi_version, get_wasi_versions, is_wasi_module, store::{capture_snapshot, restore_snapshot, InstanceSnapshot}, diff --git a/lib/wasi/src/os/command/builtins/cmd_wasmer.rs b/lib/wasi/src/os/command/builtins/cmd_wasmer.rs index 532409a00be..fd7a0cbbf35 100644 --- a/lib/wasi/src/os/command/builtins/cmd_wasmer.rs +++ b/lib/wasi/src/os/command/builtins/cmd_wasmer.rs @@ -71,7 +71,7 @@ impl CmdWasmer { state.args = args; env.state = Arc::new(state); - if let Some(binary) = self.get_package(what.clone()).await { + if let Ok(binary) = self.get_package(&what).await { // Now run the module spawn_exec(binary, name, store, env, &self.runtime).await } else { @@ -93,11 +93,9 @@ impl CmdWasmer { } } - pub async fn get_package(&self, name: String) -> Option { - let resolver = self.runtime.package_resolver(); - let client = self.runtime.http_client()?; - let pkg = name.parse().ok()?; - resolver.resolve_package(&pkg, &client).await.ok() + pub async fn get_package(&self, name: &str) -> Result { + let specifier = name.parse()?; + BinaryPackage::from_registry(&specifier, &*self.runtime).await } } diff --git a/lib/wasi/src/os/console/mod.rs b/lib/wasi/src/os/console/mod.rs index a806ed3195f..1329814d6fe 100644 --- a/lib/wasi/src/os/console/mod.rs +++ b/lib/wasi/src/os/console/mod.rs @@ -26,10 +26,10 @@ use wasmer_wasix_types::{types::__WASI_STDIN_FILENO, wasi::Errno}; use super::{cconst::ConsoleConst, common::*, task::TaskJoinHandle}; use crate::{ - bin_factory::{spawn_exec, BinFactory}, + bin_factory::{spawn_exec, BinFactory, BinaryPackage}, capabilities::Capabilities, os::task::{control_plane::WasiControlPlane, process::WasiProcess}, - runtime::resolver::WebcIdentifier, + runtime::resolver::PackageSpecifier, SpawnError, VirtualTaskManagerExt, WasiEnv, WasiRuntime, }; @@ -222,35 +222,37 @@ impl Console { tasks.block_on(self.draw_welcome()); } - let webc_ident: WebcIdentifier = match webc.parse() { + let webc_ident: PackageSpecifier = match webc.parse() { Ok(ident) => ident, Err(e) => { tracing::debug!(webc, error = &*e, "Unable to parse the WEBC identifier"); return Err(SpawnError::BadRequest); } }; - let client = self.runtime.http_client().ok_or(SpawnError::UnknownError)?; - let resolved_package = tasks.block_on( - self.runtime - .package_resolver() - .resolve_package(&webc_ident, &client), - ); + let resolved_package = + tasks.block_on(BinaryPackage::from_registry(&webc_ident, env.runtime())); - let binary = if let Ok(binary) = resolved_package { - binary - } else { - let mut stderr = self.stderr.clone(); - tasks.block_on(async { - virtual_fs::AsyncWriteExt::write_all( - &mut stderr, - format!("package not found [{}]\r\n", webc).as_bytes(), - ) - .await - .ok(); - }); - tracing::debug!("failed to get webc dependency - {}", webc); - return Err(SpawnError::NotFound); + let binary = match resolved_package { + Ok(pkg) => pkg, + Err(e) => { + let mut stderr = self.stderr.clone(); + tasks.block_on(async { + let mut buffer = Vec::new(); + writeln!(buffer, "Error: {e}").ok(); + let mut source = e.source(); + while let Some(s) = source { + writeln!(buffer, " Caused by: {s}").ok(); + source = s.source(); + } + + virtual_fs::AsyncWriteExt::write_all(&mut stderr, &buffer) + .await + .ok(); + }); + tracing::debug!("failed to get webc dependency - {}", webc); + return Err(SpawnError::NotFound); + } }; let wasi_process = env.process.clone(); diff --git a/lib/wasi/src/runners/emscripten.rs b/lib/wasi/src/runners/emscripten.rs index 4a4b0f8cd9e..960308ee3fd 100644 --- a/lib/wasi/src/runners/emscripten.rs +++ b/lib/wasi/src/runners/emscripten.rs @@ -1,5 +1,7 @@ //! WebC container support for running Emscripten modules +use std::sync::Arc; + use anyhow::{anyhow, Context, Error}; use serde::{Deserialize, Serialize}; use wasmer::{FunctionEnv, Instance, Module, Store}; @@ -7,25 +9,19 @@ use wasmer_emscripten::{ generate_emscripten_env, is_emscripten_module, run_emscripten_instance, EmEnv, EmscriptenGlobals, }; -use webc::{ - metadata::{annotations::Emscripten, Command}, - Container, -}; +use webc::metadata::{annotations::Emscripten, Command}; + +use crate::{bin_factory::BinaryPackage, WasiRuntime}; -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct EmscriptenRunner { args: Vec, - #[serde(skip, default)] - store: Store, } impl EmscriptenRunner { /// Constructs a new `EmscriptenRunner` given an `Store` - pub fn new(store: Store) -> Self { - Self { - args: Vec::new(), - store, - } + pub fn new() -> Self { + EmscriptenRunner::default() } /// Returns the current arguments for this `EmscriptenRunner` @@ -46,9 +42,7 @@ impl EmscriptenRunner { } impl crate::runners::Runner for EmscriptenRunner { - type Output = (); - - fn can_run_command(&self, _: &str, command: &Command) -> Result { + fn can_run_command(command: &Command) -> Result { Ok(command .runner .starts_with(webc::metadata::annotations::EMSCRIPTEN_RUNNER_URI)) @@ -58,31 +52,27 @@ impl crate::runners::Runner for EmscriptenRunner { fn run_command( &mut self, command_name: &str, - command: &Command, - container: &Container, - ) -> Result { - let Emscripten { - atom: atom_name, - main_args, - .. - } = command.annotation("emscripten")?.unwrap_or_default(); - let atom_name = atom_name.context("The atom name is required")?; - let atoms = container.atoms(); - let atom_bytes = atoms - .get(&atom_name) - .with_context(|| format!("Unable to read the \"{atom_name}\" atom"))?; - - let mut module = Module::new(&self.store, atom_bytes)?; - module.set_name(&atom_name); - - let (mut globals, env) = prepare_emscripten_env(&mut self.store, &module, &atom_name)?; + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error> { + let cmd = pkg + .get_command(command_name) + .with_context(|| format!("The package doesn't contain a \"{command_name}\" command"))?; + let Emscripten { main_args, .. } = + cmd.metadata().annotation("emscripten")?.unwrap_or_default(); + + let mut module = crate::runners::compile_module(cmd.atom(), &*runtime)?; + module.set_name(command_name); + + let mut store = runtime.new_store(); + let (mut globals, env) = prepare_emscripten_env(&mut store, &module, command_name)?; exec_module( - &mut self.store, + &mut store, &module, &mut globals, env, - &atom_name, + command_name, main_args.unwrap_or_default(), )?; diff --git a/lib/wasi/src/runners/mod.rs b/lib/wasi/src/runners/mod.rs index 3aca293256e..e83b9d91902 100644 --- a/lib/wasi/src/runners/mod.rs +++ b/lib/wasi/src/runners/mod.rs @@ -11,18 +11,56 @@ pub mod wcgi; pub use self::runner::Runner; -use anyhow::Error; -use wasmer::{Engine, Module}; +use anyhow::{Context, Error}; +use wasmer::Module; -pub type CompileModule = dyn Fn(&Engine, &[u8]) -> Result + Send + Sync; +use crate::runtime::{ + module_cache::{CacheError, ModuleHash}, + WasiRuntime, +}; +/// A directory that should be mapped from the host filesystem into a WASI +/// instance (the "guest"). #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct MappedDirectory { pub host: std::path::PathBuf, pub guest: String, } -pub(crate) fn default_compile(engine: &Engine, wasm: &[u8]) -> Result { - let module = Module::new(engine, wasm)?; +/// Compile a module, trying to use a pre-compiled version if possible. +pub(crate) fn compile_module(wasm: &[u8], runtime: &dyn WasiRuntime) -> Result { + // TODO(Michael-F-Bryan,theduke): This should be abstracted out into some + // sort of ModuleResolver component that is attached to the runtime and + // encapsulates finding a WebAssembly binary, compiling it, and caching. + + let engine = runtime.engine().context("No engine provided")?; + let task_manager = runtime.task_manager().clone(); + let module_cache = runtime.module_cache(); + + let hash = ModuleHash::sha256(wasm); + let result = task_manager.block_on(module_cache.load(hash, &engine)); + + match result { + Ok(module) => return Ok(module), + Err(CacheError::NotFound) => {} + Err(other) => { + tracing::warn!( + %hash, + error=&other as &dyn std::error::Error, + "Unable to load the cached module", + ); + } + } + + let module = Module::new(&engine, wasm)?; + + if let Err(e) = task_manager.block_on(module_cache.save(hash, &engine, &module)) { + tracing::warn!( + %hash, + error=&e as &dyn std::error::Error, + "Unable to cache the compiled module", + ); + } + Ok(module) } diff --git a/lib/wasi/src/runners/runner.rs b/lib/wasi/src/runners/runner.rs index 44248a8fdf3..04e5905fd6b 100644 --- a/lib/wasi/src/runners/runner.rs +++ b/lib/wasi/src/runners/runner.rs @@ -1,61 +1,22 @@ +use std::sync::Arc; + use anyhow::Error; -use webc::{metadata::Command, Container}; +use webc::metadata::Command; + +use crate::{bin_factory::BinaryPackage, WasiRuntime}; /// Trait that all runners have to implement pub trait Runner { - /// The return value of the output of the runner - type Output; - /// Returns whether the Runner will be able to run the `Command` - fn can_run_command(&self, command_name: &str, command: &Command) -> Result; + fn can_run_command(command: &Command) -> Result + where + Self: Sized; - /// Implementation to run the given command - /// - /// - use `cmd.annotations` to get the metadata for the given command - /// - use `container.get_atom()` to get the + /// Run a command. fn run_command( &mut self, command_name: &str, - cmd: &Command, - container: &Container, - ) -> Result; - - /// Runs the container if the container has an `entrypoint` in the manifest - fn run(&mut self, container: &Container) -> Result { - let cmd = match container.manifest().entrypoint.as_ref() { - Some(s) => s, - None => { - anyhow::bail!("Cannot run the package: not executable (no entrypoint in manifest)"); - } - }; - - self.run_cmd(container, cmd) - } - - /// Runs the given `cmd` on the container - fn run_cmd(&mut self, container: &Container, cmd: &str) -> Result { - let command_to_exec = container - .manifest() - .commands - .get(cmd) - .ok_or_else(|| anyhow::anyhow!("command {cmd:?} not found in manifest"))?; - - match self.can_run_command(cmd, command_to_exec) { - Ok(true) => {} - Ok(false) => { - anyhow::bail!( - "Cannot run command {cmd:?} with runner {:?}", - command_to_exec.runner - ); - } - Err(e) => { - anyhow::bail!( - "Cannot run command {cmd:?} with runner {:?}: {e}", - command_to_exec.runner - ); - } - } - - self.run_command(cmd, command_to_exec, container) - } + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error>; } diff --git a/lib/wasi/src/runners/wasi.rs b/lib/wasi/src/runners/wasi.rs index 0c4d08f70d4..1e330821c5f 100644 --- a/lib/wasi/src/runners/wasi.rs +++ b/lib/wasi/src/runners/wasi.rs @@ -3,48 +3,23 @@ use std::sync::Arc; use anyhow::{Context, Error}; -use serde::{Deserialize, Serialize}; -use virtual_fs::WebcVolumeFileSystem; -use wasmer::{Engine, Module, Store}; -use webc::{ - metadata::{annotations::Wasi, Command}, - Container, -}; +use webc::metadata::{annotations::Wasi, Command}; use crate::{ - runners::{wasi_common::CommonWasiOptions, CompileModule, MappedDirectory}, - PluggableRuntime, VirtualTaskManager, WasiEnvBuilder, + bin_factory::BinaryPackage, + runners::{wasi_common::CommonWasiOptions, MappedDirectory}, + WasiEnvBuilder, WasiRuntime, }; -#[derive(Serialize, Deserialize)] +#[derive(Debug, Default, Clone)] pub struct WasiRunner { wasi: CommonWasiOptions, - #[serde(skip, default)] - store: Store, - #[serde(skip, default)] - pub(crate) tasks: Option>, - #[serde(skip, default)] - compile: Option>, } impl WasiRunner { - /// Constructs a new `WasiRunner` given an `Store` - pub fn new(store: Store) -> Self { - Self { - store, - wasi: CommonWasiOptions::default(), - tasks: None, - compile: None, - } - } - - /// Sets the compile function - pub fn with_compile( - mut self, - compile: impl Fn(&Engine, &[u8]) -> Result + Send + Sync + 'static, - ) -> Self { - self.compile = Some(Box::new(compile)); - self + /// Constructs a new `WasiRunner`. + pub fn new() -> Self { + WasiRunner::default() } /// Returns the current arguments for this `WasiRunner` @@ -123,69 +98,82 @@ impl WasiRunner { self } - pub fn with_task_manager(mut self, tasks: impl VirtualTaskManager) -> Self { - self.set_task_manager(tasks); + /// Add a package that should be available to the instance at runtime. + pub fn add_injected_package(&mut self, pkg: BinaryPackage) -> &mut Self { + self.wasi.injected_packages.push(pkg); self } - pub fn set_task_manager(&mut self, tasks: impl VirtualTaskManager) { - self.tasks = Some(Arc::new(tasks)); + /// Add a package that should be available to the instance at runtime. + pub fn with_injected_package(mut self, pkg: BinaryPackage) -> Self { + self.add_injected_package(pkg); + self + } + + /// Add packages that should be available to the instance at runtime. + pub fn add_injected_packages( + &mut self, + packages: impl IntoIterator, + ) -> &mut Self { + self.wasi.injected_packages.extend(packages); + self + } + + /// Add packages that should be available to the instance at runtime. + pub fn with_injected_packages( + mut self, + packages: impl IntoIterator, + ) -> Self { + self.add_injected_packages(packages); + self } fn prepare_webc_env( &self, - container: &Container, program_name: &str, wasi: &Wasi, + pkg: &BinaryPackage, + runtime: Arc, ) -> Result { let mut builder = WasiEnvBuilder::new(program_name); - let container_fs = Arc::new(WebcVolumeFileSystem::mount_all(container)); + let container_fs = Arc::clone(&pkg.webc_fs); self.wasi .prepare_webc_env(&mut builder, container_fs, wasi)?; - if let Some(tasks) = &self.tasks { - let rt = PluggableRuntime::new(Arc::clone(tasks)); - builder.set_runtime(Arc::new(rt)); - } + builder.add_webc(pkg.clone()); + builder.set_runtime(runtime); Ok(builder) } } impl crate::runners::Runner for WasiRunner { - type Output = (); - - fn can_run_command(&self, _command_name: &str, command: &Command) -> Result { + fn can_run_command(command: &Command) -> Result { Ok(command .runner .starts_with(webc::metadata::annotations::WASI_RUNNER_URI)) } - #[tracing::instrument(skip(self, command, container))] + #[tracing::instrument(skip_all)] fn run_command( &mut self, command_name: &str, - command: &Command, - container: &Container, - ) -> Result { - let wasi = command + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error> { + let cmd = pkg + .get_command(command_name) + .with_context(|| format!("The package doesn't contain a \"{command_name}\" command"))?; + let wasi = cmd + .metadata() .annotation("wasi")? .unwrap_or_else(|| Wasi::new(command_name)); - let atom_name = &wasi.atom; - let atoms = container.atoms(); - let atom = atoms - .get(atom_name) - .with_context(|| format!("Unable to get the \"{atom_name}\" atom"))?; - - let compile = self - .compile - .as_deref() - .unwrap_or(&crate::runners::default_compile); - let mut module = compile(self.store.engine(), atom)?; - module.set_name(atom_name); - - self.prepare_webc_env(container, atom_name, &wasi)? - .run(module)?; + + let module = crate::runners::compile_module(cmd.atom(), &*runtime)?; + let mut store = runtime.new_store(); + + self.prepare_webc_env(command_name, &wasi, pkg, runtime)? + .run_with_store(module, &mut store)?; Ok(()) } diff --git a/lib/wasi/src/runners/wasi_common.rs b/lib/wasi/src/runners/wasi_common.rs index c41696f233e..f644d24b4eb 100644 --- a/lib/wasi/src/runners/wasi_common.rs +++ b/lib/wasi/src/runners/wasi_common.rs @@ -8,21 +8,22 @@ use anyhow::{Context, Error}; use virtual_fs::{FileSystem, FsError, OverlayFileSystem, RootFileSystemBuilder}; use webc::metadata::annotations::Wasi as WasiAnnotation; -use crate::{runners::MappedDirectory, WasiEnvBuilder}; +use crate::{bin_factory::BinaryPackage, runners::MappedDirectory, WasiEnvBuilder}; -#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Default, Clone)] pub(crate) struct CommonWasiOptions { pub(crate) args: Vec, pub(crate) env: HashMap, pub(crate) forward_host_env: bool, pub(crate) mapped_dirs: Vec, + pub(crate) injected_packages: Vec, } impl CommonWasiOptions { pub(crate) fn prepare_webc_env( &self, builder: &mut WasiEnvBuilder, - container_fs: Arc, + container_fs: Arc, wasi: &WasiAnnotation, ) -> Result<(), anyhow::Error> { let fs = prepare_filesystem(&self.mapped_dirs, container_fs, |path| { @@ -37,6 +38,10 @@ impl CommonWasiOptions { builder.set_fs(fs); + for pkg in &self.injected_packages { + builder.add_webc(pkg.clone()); + } + self.populate_env(wasi, builder); self.populate_args(wasi, builder); diff --git a/lib/wasi/src/runners/wcgi/handler.rs b/lib/wasi/src/runners/wcgi/handler.rs index 0957a086f9a..55d4a9ecfc3 100644 --- a/lib/wasi/src/runners/wcgi/handler.rs +++ b/lib/wasi/src/runners/wcgi/handler.rs @@ -14,7 +14,7 @@ use wcgi_host::CgiDialect; use crate::{ capabilities::Capabilities, http::HttpClientCapabilityV1, runners::wcgi::Callbacks, Pipe, - PluggableRuntime, VirtualTaskManager, WasiEnvBuilder, + VirtualTaskManager, WasiEnvBuilder, WasiRuntime, }; /// The shared object that manages the instantiaion of WASI executables and @@ -51,8 +51,6 @@ impl Handler { .prepare_environment_variables(parts, &mut request_specific_env); builder.add_envs(request_specific_env); - let rt = PluggableRuntime::new(Arc::clone(&self.task_manager)); - let builder = builder .stdin(Box::new(req_body_receiver)) .stdout(Box::new(res_body_sender)) @@ -61,8 +59,7 @@ impl Handler { insecure_allow_all: true, http_client: HttpClientCapabilityV1::new_allow_all(), threading: Default::default(), - }) - .runtime(Arc::new(rt)); + }); let module = self.module.clone(); @@ -71,14 +68,16 @@ impl Handler { "Calling into the WCGI executable", ); - let done = self - .task_manager + let task_manager = self.runtime.task_manager(); + let mut store = self.runtime.new_store(); + + let done = task_manager .runtime() - .spawn_blocking(move || builder.run(module)) + .spawn_blocking(move || builder.run_with_store(module, &mut store)) .map_err(Error::from) .and_then(|r| async { r.map_err(Error::from) }); - let handle = self.task_manager.runtime().clone(); + let handle = task_manager.runtime().clone(); let callbacks = Arc::clone(&self.callbacks); handle.spawn( @@ -88,7 +87,7 @@ impl Handler { .in_current_span(), ); - self.task_manager.runtime().spawn( + task_manager.runtime().spawn( async move { if let Err(e) = drive_request_to_completion(&handle, done, body, req_body_sender).await @@ -220,7 +219,7 @@ pub(crate) struct SharedState { #[derivative(Debug = "ignore")] pub(crate) callbacks: Arc, #[derivative(Debug = "ignore")] - pub(crate) task_manager: Arc, + pub(crate) runtime: Arc, } impl Service> for Handler { diff --git a/lib/wasi/src/runners/wcgi/runner.rs b/lib/wasi/src/runners/wcgi/runner.rs index b75c733946b..5a610ae001c 100644 --- a/lib/wasi/src/runners/wcgi/runner.rs +++ b/lib/wasi/src/runners/wcgi/runner.rs @@ -7,57 +7,97 @@ use hyper::Body; use tower::{make::Shared, ServiceBuilder}; use tower_http::{catch_panic::CatchPanicLayer, cors::CorsLayer, trace::TraceLayer}; use tracing::Span; -use virtual_fs::{FileSystem, WebcVolumeFileSystem}; -use wasmer::{Engine, Module, Store}; use wcgi_host::CgiDialect; -use webc::{ - compat::SharedBytes, - metadata::{ - annotations::{Wasi, Wcgi}, - Command, Manifest, - }, - Container, +use webc::metadata::{ + annotations::{Wasi, Wcgi}, + Command, }; use crate::{ + bin_factory::BinaryPackage, runners::{ wasi_common::CommonWasiOptions, wcgi::handler::{Handler, SharedState}, - CompileModule, MappedDirectory, + MappedDirectory, }, - runtime::task_manager::tokio::TokioTaskManager, - PluggableRuntime, VirtualTaskManager, WasiEnvBuilder, + WasiEnvBuilder, WasiRuntime, }; +#[derive(Debug, Default)] pub struct WcgiRunner { - program_name: String, config: Config, - compile: Option>, } -// TODO(Michael-F-Bryan): When we rewrite the existing runner infrastructure, -// make the "Runner" trait contain just these two methods. impl WcgiRunner { - fn supports(cmd: &Command) -> Result { - Ok(cmd - .runner - .starts_with(webc::metadata::annotations::WCGI_RUNNER_URI)) + pub fn new() -> Self { + WcgiRunner::default() + } + + pub fn config(&mut self) -> &mut Config { + &mut self.config } - #[tracing::instrument(skip(self, ctx))] - fn run(&mut self, command_name: &str, ctx: &RunnerContext<'_>) -> Result<(), Error> { - let wasi: Wasi = ctx - .command() - .annotation("wasi") - .context("Unable to retrieve the WASI metadata")? + #[tracing::instrument(skip_all)] + fn prepare_handler( + &mut self, + command_name: &str, + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result { + let cmd = pkg + .get_command(command_name) + .with_context(|| format!("The package doesn't contain a \"{command_name}\" command"))?; + let metadata = cmd.metadata(); + let wasi = metadata + .annotation("wasi")? .unwrap_or_else(|| Wasi::new(command_name)); - let module = self - .load_module(&wasi, ctx) - .context("Couldn't load the module")?; + let module = crate::runners::compile_module(cmd.atom(), &*runtime)?; + + let Wcgi { dialect, .. } = metadata.annotation("wcgi")?.unwrap_or_default(); + let dialect = match dialect { + Some(d) => d.parse().context("Unable to parse the CGI dialect")?, + None => CgiDialect::Wcgi, + }; + + let container_fs = Arc::clone(&pkg.webc_fs); + + let wasi_common = self.config.wasi.clone(); + let rt = Arc::clone(&runtime); + let setup_builder = move |builder: &mut WasiEnvBuilder| { + wasi_common.prepare_webc_env(builder, Arc::clone(&container_fs), &wasi)?; + builder.set_runtime(Arc::clone(&rt)); + + Ok(()) + }; + + let shared = SharedState { + module, + dialect, + program_name: command_name.to_string(), + setup_builder: Box::new(setup_builder), + callbacks: Arc::clone(&self.config.callbacks), + runtime, + }; + + Ok(Handler::new(shared)) + } +} + +impl crate::runners::Runner for WcgiRunner { + fn can_run_command(command: &Command) -> Result { + Ok(command + .runner + .starts_with(webc::metadata::annotations::WCGI_RUNNER_URI)) + } - let handler = self.create_handler(module, &wasi, ctx)?; - let task_manager = Arc::clone(&handler.task_manager); + fn run_command( + &mut self, + command_name: &str, + pkg: &BinaryPackage, + runtime: Arc, + ) -> Result<(), Error> { + let handler = self.prepare_handler(command_name, pkg, Arc::clone(&runtime))?; let callbacks = Arc::clone(&self.config.callbacks); let service = ServiceBuilder::new() @@ -83,7 +123,8 @@ impl WcgiRunner { let address = self.config.addr; tracing::info!(%address, "Starting the server"); - task_manager + runtime + .task_manager() .block_on(async { let (shutdown, abort_handle) = futures::future::abortable(futures::future::pending::<()>()); @@ -104,176 +145,16 @@ impl WcgiRunner { } } -impl WcgiRunner { - pub fn new(program_name: impl Into) -> Self { - WcgiRunner { - program_name: program_name.into(), - config: Config::default(), - compile: None, - } - } - - pub fn config(&mut self) -> &mut Config { - &mut self.config - } - - /// Sets the compile function - pub fn with_compile( - mut self, - compile: impl Fn(&Engine, &[u8]) -> Result + Send + Sync + 'static, - ) -> Self { - self.compile = Some(Arc::new(compile)); - self - } - - fn load_module(&mut self, wasi: &Wasi, ctx: &RunnerContext<'_>) -> Result { - let atom_name = &wasi.atom; - let atom = ctx - .get_atom(atom_name) - .with_context(|| format!("Unable to retrieve the \"{atom_name}\" atom"))?; - - let module = ctx.compile(&atom).context("Unable to compile the atom")?; - - Ok(module) - } - - fn create_handler( - &self, - module: Module, - wasi: &Wasi, - ctx: &RunnerContext<'_>, - ) -> Result { - let Wcgi { dialect, .. } = ctx.command().annotation("wcgi")?.unwrap_or_default(); - - let dialect = match dialect { - Some(d) => d.parse().context("Unable to parse the CGI dialect")?, - None => CgiDialect::Wcgi, - }; - - let shared = SharedState { - module, - dialect, - program_name: self.program_name.clone(), - setup_builder: Box::new(self.setup_builder(ctx, wasi)), - callbacks: Arc::clone(&self.config.callbacks), - task_manager: self - .config - .task_manager - .clone() - .unwrap_or_else(|| Arc::new(TokioTaskManager::default())), - }; - - Ok(Handler::new(shared)) - } - - fn setup_builder( - &self, - ctx: &RunnerContext<'_>, - wasi: &Wasi, - ) -> impl Fn(&mut WasiEnvBuilder) -> Result<(), Error> + Send + Sync { - let container_fs = ctx.container_fs(); - let wasi_common = self.config.wasi.clone(); - let wasi = wasi.clone(); - let tasks = self.config.task_manager.clone(); - - move |builder| { - wasi_common.prepare_webc_env(builder, Arc::clone(&container_fs), &wasi)?; - - if let Some(tasks) = &tasks { - let rt = PluggableRuntime::new(Arc::clone(tasks)); - builder.set_runtime(Arc::new(rt)); - } - - Ok(()) - } - } -} - -// TODO(Michael-F-Bryan): Pass this to Runner::run() as a "&dyn RunnerContext" -// when we rewrite the "Runner" trait. -struct RunnerContext<'a> { - container: &'a Container, - command: &'a Command, - compile: Option>, - engine: Engine, - store: Arc, -} - -#[allow(dead_code)] -impl RunnerContext<'_> { - fn command(&self) -> &Command { - self.command - } - - fn manifest(&self) -> &Manifest { - self.container.manifest() - } - - fn store(&self) -> &Store { - &self.store - } - - fn get_atom(&self, name: &str) -> Option { - self.container.atoms().remove(name) - } - - fn container_fs(&self) -> Arc { - Arc::new(WebcVolumeFileSystem::mount_all(self.container)) - } - - fn compile(&self, wasm: &[u8]) -> Result { - let compile = self - .compile - .as_deref() - .unwrap_or(&crate::runners::default_compile); - compile(&self.engine, wasm) - } -} - -impl crate::runners::Runner for WcgiRunner { - type Output = (); - - fn can_run_command(&self, _: &str, command: &Command) -> Result { - WcgiRunner::supports(command) - } - - fn run_command( - &mut self, - command_name: &str, - command: &Command, - container: &Container, - ) -> Result { - let store = self.config.store.clone().unwrap_or_default(); - - let ctx = RunnerContext { - container, - command, - engine: store.engine().clone(), - store, - compile: self.compile.clone(), - }; - - WcgiRunner::run(self, command_name, &ctx) - } -} - #[derive(derivative::Derivative)] #[derivative(Debug)] pub struct Config { - task_manager: Option>, wasi: CommonWasiOptions, addr: SocketAddr, #[derivative(Debug = "ignore")] callbacks: Arc, - store: Option>, } impl Config { - pub fn task_manager(&mut self, task_manager: impl VirtualTaskManager) -> &mut Self { - self.task_manager = Some(Arc::new(task_manager)); - self - } - pub fn addr(&mut self, addr: SocketAddr) -> &mut Self { self.addr = addr; self @@ -340,8 +221,18 @@ impl Config { self } - pub fn store(&mut self, store: Store) -> &mut Self { - self.store = Some(Arc::new(store)); + /// Add a package that should be available to the instance at runtime. + pub fn inject_package(&mut self, pkg: BinaryPackage) -> &mut Self { + self.wasi.injected_packages.push(pkg); + self + } + + /// Add packages that should be available to the instance at runtime. + pub fn inject_packages( + &mut self, + packages: impl IntoIterator, + ) -> &mut Self { + self.wasi.injected_packages.extend(packages); self } } @@ -349,11 +240,9 @@ impl Config { impl Default for Config { fn default() -> Self { Self { - task_manager: None, addr: ([127, 0, 0, 1], 8000).into(), wasi: CommonWasiOptions::default(), callbacks: Arc::new(NoopCallbacks), - store: None, } } } diff --git a/lib/wasi/src/runtime/mod.rs b/lib/wasi/src/runtime/mod.rs index f1ddb77cca9..b8fa9726496 100644 --- a/lib/wasi/src/runtime/mod.rs +++ b/lib/wasi/src/runtime/mod.rs @@ -1,9 +1,8 @@ pub mod module_cache; +pub mod package_loader; pub mod resolver; pub mod task_manager; -use crate::{http::DynHttpClient, os::TtyBridge, WasiTtyState}; - pub use self::task_manager::{SpawnMemoryType, VirtualTaskManager}; use std::{ @@ -14,9 +13,15 @@ use std::{ use derivative::Derivative; use virtual_net::{DynVirtualNetworking, VirtualNetworking}; -use crate::runtime::{ - module_cache::ModuleCache, - resolver::{PackageResolver, RegistryResolver}, +use crate::{ + http::DynHttpClient, + os::TtyBridge, + runtime::{ + module_cache::ModuleCache, + package_loader::{BuiltinPackageLoader, PackageLoader}, + resolver::{MultiSource, Source, WapmSource}, + }, + WasiTtyState, }; /// Represents an implementation of the WASI runtime - by default everything is @@ -24,7 +29,7 @@ use crate::runtime::{ #[allow(unused_variables)] pub trait WasiRuntime where - Self: fmt::Debug + Sync, + Self: fmt::Debug, { /// Provides access to all the networking related functions such as sockets. /// By default networking is not implemented. @@ -33,11 +38,15 @@ where /// Retrieve the active [`VirtualTaskManager`]. fn task_manager(&self) -> &Arc; - fn package_resolver(&self) -> Arc; + /// A package loader. + fn package_loader(&self) -> Arc; /// A cache for compiled modules. fn module_cache(&self) -> Arc; + /// The package registry. + fn source(&self) -> Arc; + /// Get a [`wasmer::Engine`] for module compilation. fn engine(&self) -> Option { None @@ -99,7 +108,8 @@ pub struct PluggableRuntime { pub rt: Arc, pub networking: DynVirtualNetworking, pub http_client: Option, - pub resolver: Arc, + pub package_loader: Arc, + pub source: Arc, pub engine: Option, pub module_cache: Arc, #[derivative(Debug = "ignore")] @@ -119,8 +129,16 @@ impl PluggableRuntime { let http_client = crate::http::default_http_client().map(|client| Arc::new(client) as DynHttpClient); - let resolver = - RegistryResolver::from_env().expect("Loading the builtin resolver should never fail"); + let loader = BuiltinPackageLoader::from_env() + .expect("Loading the builtin resolver should never fail"); + + let mut source = MultiSource::new(); + if let Some(client) = &http_client { + source.add_source(WapmSource::new( + WapmSource::WAPM_PROD_ENDPOINT.parse().unwrap(), + client.clone(), + )); + } Self { rt, @@ -128,7 +146,8 @@ impl PluggableRuntime { http_client, engine: None, tty: None, - resolver: Arc::new(resolver), + source: Arc::new(source), + package_loader: Arc::new(loader), module_cache: Arc::new(module_cache::in_memory()), } } @@ -151,19 +170,24 @@ impl PluggableRuntime { self } - pub fn set_module_cache(&mut self, module_cache: M) -> &mut Self - where - M: ModuleCache + Send + Sync + 'static, - { + pub fn set_module_cache( + &mut self, + module_cache: impl ModuleCache + Send + Sync + 'static, + ) -> &mut Self { self.module_cache = Arc::new(module_cache); self } - pub fn set_resolver( + pub fn set_source(&mut self, source: impl Source + Send + Sync + 'static) -> &mut Self { + self.source = Arc::new(source); + self + } + + pub fn set_package_loader( &mut self, - resolver: impl PackageResolver + Send + Sync + 'static, + package_loader: impl PackageLoader + Send + Sync + 'static, ) -> &mut Self { - self.resolver = Arc::new(resolver); + self.package_loader = Arc::new(package_loader); self } } @@ -177,8 +201,16 @@ impl WasiRuntime for PluggableRuntime { self.http_client.as_ref() } - fn package_resolver(&self) -> Arc { - Arc::clone(&self.resolver) + fn package_loader(&self) -> Arc { + Arc::clone(&self.package_loader) + } + + fn source(&self) -> Arc { + Arc::clone(&self.source) + } + + fn engine(&self) -> Option { + self.engine.clone() } fn new_store(&self) -> wasmer::Store { diff --git a/lib/wasi/src/runtime/module_cache/and_then.rs b/lib/wasi/src/runtime/module_cache/fallback.rs similarity index 60% rename from lib/wasi/src/runtime/module_cache/and_then.rs rename to lib/wasi/src/runtime/module_cache/fallback.rs index e6a9669192e..3c1ffa28fa3 100644 --- a/lib/wasi/src/runtime/module_cache/and_then.rs +++ b/lib/wasi/src/runtime/module_cache/fallback.rs @@ -2,19 +2,36 @@ use wasmer::{Engine, Module}; use crate::runtime::module_cache::{CacheError, ModuleCache, ModuleHash}; -/// A [`ModuleCache`] combinator which will try operations on one cache -/// and fall back to a secondary cache if they fail. +/// [`FallbackCache`] is a combinator for the [`ModuleCache`] trait that enables +/// the chaining of two caching strategies together, typically via +/// [`ModuleCache::with_fallback()`]. /// -/// Constructed via [`ModuleCache::and_then()`]. +/// All operations are attempted using primary cache first, and if that fails, +/// falls back to using the fallback cache. By chaining different caches +/// together with [`FallbackCache`], you can create a caching hierarchy tailored +/// to your application's specific needs, balancing performance, resource usage, +/// and persistence. +/// +/// A key assumption of [`FallbackCache`] is that **all operations on the +/// fallback implementation will be significantly slower than the primary one**. +/// +/// ## Cache Promotion +/// +/// Whenever there is a cache miss on the primary cache and the fallback is +/// able to load a module, that module is automatically added to the primary +/// cache to improve the speed of future lookups. +/// +/// This "cache promotion" strategy helps keep frequently accessed modules in +/// the faster primary cache. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct AndThen { +pub struct FallbackCache { primary: Primary, - secondary: Secondary, + fallback: Fallback, } -impl AndThen { - pub(crate) fn new(primary: Primary, secondary: Secondary) -> Self { - AndThen { primary, secondary } +impl FallbackCache { + pub(crate) fn new(primary: Primary, fallback: Fallback) -> Self { + FallbackCache { primary, fallback } } pub fn primary(&self) -> &Primary { @@ -25,25 +42,25 @@ impl AndThen { &mut self.primary } - pub fn secondary(&self) -> &Secondary { - &self.secondary + pub fn fallback(&self) -> &Fallback { + &self.fallback } - pub fn secondary_mut(&mut self) -> &mut Secondary { - &mut self.secondary + pub fn fallback_mut(&mut self) -> &mut Fallback { + &mut self.fallback } - pub fn into_inner(self) -> (Primary, Secondary) { - let AndThen { primary, secondary } = self; - (primary, secondary) + pub fn into_inner(self) -> (Primary, Fallback) { + let FallbackCache { primary, fallback } = self; + (primary, fallback) } } #[async_trait::async_trait] -impl ModuleCache for AndThen +impl ModuleCache for FallbackCache where Primary: ModuleCache + Send + Sync, - Secondary: ModuleCache + Send + Sync, + Fallback: ModuleCache + Send + Sync, { async fn load(&self, key: ModuleHash, engine: &Engine) -> Result { let primary_error = match self.primary.load(key, engine).await { @@ -51,14 +68,14 @@ where Err(e) => e, }; - if let Ok(m) = self.secondary.load(key, engine).await { - // Now we've got a module, let's make sure it ends up in the primary - // cache too. + if let Ok(m) = self.fallback.load(key, engine).await { + // Now we've got a module, let's make sure it is promoted to the + // primary cache. if let Err(e) = self.primary.save(key, engine, &m).await { tracing::warn!( %key, error = &e as &dyn std::error::Error, - "Unable to save a module to the primary cache", + "Unable to promote a module to the primary cache", ); } @@ -76,7 +93,7 @@ where ) -> Result<(), CacheError> { futures::try_join!( self.primary.save(key, engine, module), - self.secondary.save(key, engine, module) + self.fallback.save(key, engine, module) )?; Ok(()) } @@ -162,13 +179,13 @@ mod tests { async fn load_from_primary() { let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); let primary = SharedCache::default(); - let secondary = SharedCache::default(); + let fallback = SharedCache::default(); primary.save(key, &engine, &module).await.unwrap(); let primary = Spy::new(primary); - let secondary = Spy::new(secondary); - let cache = AndThen::new(&primary, &secondary); + let fallback = Spy::new(fallback); + let cache = FallbackCache::new(&primary, &fallback); let got = cache.load(key, &engine).await.unwrap(); @@ -176,32 +193,32 @@ mod tests { assert_eq!(module, got); assert_eq!(primary.success(), 1); assert_eq!(primary.failures(), 0); - // but the secondary wasn't touched at all - assert_eq!(secondary.success(), 0); - assert_eq!(secondary.failures(), 0); - // And the secondary still doesn't have our module - assert!(secondary.load(key, &engine).await.is_err()); + // but the fallback wasn't touched at all + assert_eq!(fallback.success(), 0); + assert_eq!(fallback.failures(), 0); + // And the fallback still doesn't have our module + assert!(fallback.load(key, &engine).await.is_err()); } #[tokio::test] - async fn loading_from_secondary_also_populates_primary() { + async fn loading_from_fallback_also_populates_primary() { let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); let primary = SharedCache::default(); - let secondary = SharedCache::default(); - secondary.save(key, &engine, &module).await.unwrap(); + let fallback = SharedCache::default(); + fallback.save(key, &engine, &module).await.unwrap(); let primary = Spy::new(primary); - let secondary = Spy::new(secondary); - let cache = AndThen::new(&primary, &secondary); + let fallback = Spy::new(fallback); + let cache = FallbackCache::new(&primary, &fallback); let got = cache.load(key, &engine).await.unwrap(); // We should have received the same module assert_eq!(module, got); - // We got a hit on the secondary - assert_eq!(secondary.success(), 1); - assert_eq!(secondary.failures(), 0); + // We got a hit on the fallback + assert_eq!(fallback.success(), 1); + assert_eq!(fallback.failures(), 0); // the load() on our primary failed assert_eq!(primary.failures(), 1); // but afterwards, we updated the primary cache with our module @@ -213,14 +230,14 @@ mod tests { async fn saving_will_update_both() { let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); let primary = SharedCache::default(); - let secondary = SharedCache::default(); - let cache = AndThen::new(&primary, &secondary); + let fallback = SharedCache::default(); + let cache = FallbackCache::new(&primary, &fallback); cache.save(key, &engine, &module).await.unwrap(); assert_eq!(primary.load(key, &engine).await.unwrap(), module); - assert_eq!(secondary.load(key, &engine).await.unwrap(), module); + assert_eq!(fallback.load(key, &engine).await.unwrap(), module); } } diff --git a/lib/wasi/src/runtime/module_cache/filesystem.rs b/lib/wasi/src/runtime/module_cache/filesystem.rs index 9cabf9bd2d6..bfbbb188966 100644 --- a/lib/wasi/src/runtime/module_cache/filesystem.rs +++ b/lib/wasi/src/runtime/module_cache/filesystem.rs @@ -19,6 +19,12 @@ impl FileSystemCache { } } + /// Get the directory that is typically used when caching compiled + /// packages inside `$WASMER_DIR`. + pub fn default_cache_dir(wasmer_dir: impl AsRef) -> PathBuf { + wasmer_dir.as_ref().join("compiled") + } + pub fn cache_dir(&self) -> &Path { &self.cache_dir } @@ -168,7 +174,7 @@ mod tests { let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); let cache = FileSystemCache::new(temp.path()); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); let expected_path = cache.path(key, engine.deterministic_id()); cache.save(key, &engine, &module).await.unwrap(); @@ -184,7 +190,7 @@ mod tests { let cache_dir = temp.path().join("this").join("doesn't").join("exist"); assert!(!cache_dir.exists()); let cache = FileSystemCache::new(&cache_dir); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); cache.save(key, &engine, &module).await.unwrap(); @@ -195,7 +201,7 @@ mod tests { async fn missing_file() { let temp = TempDir::new().unwrap(); let engine = Engine::default(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); let cache = FileSystemCache::new(temp.path()); let err = cache.load(key, &engine).await.unwrap_err(); @@ -208,7 +214,7 @@ mod tests { let temp = TempDir::new().unwrap(); let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); let cache = FileSystemCache::new(temp.path()); let expected_path = cache.path(key, engine.deterministic_id()); std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap(); diff --git a/lib/wasi/src/runtime/module_cache/mod.rs b/lib/wasi/src/runtime/module_cache/mod.rs index 928dd05dc8e..6537c059073 100644 --- a/lib/wasi/src/runtime/module_cache/mod.rs +++ b/lib/wasi/src/runtime/module_cache/mod.rs @@ -1,11 +1,44 @@ -mod and_then; +//! Cache pre-compiled [`wasmer::Module`]s. +//! +//! The core of this module is the [`ModuleCache`] trait, which is designed to +//! be implemented by different cache storage strategies, such as in-memory +//! caches ([`SharedCache`] and [`ThreadLocalCache`]), file-based caches +//! ([`FileSystemCache`]), or distributed caches. Implementing custom caching +//! strategies allows you to optimize for your specific use case. +//! +//! ## Assumptions and Requirements +//! +//! The `module_cache` module makes several assumptions: +//! +//! - Cache keys are unique, typically derived from the original `*.wasm` or +//! `*.wat` file, and using the same key to load or save will always result in +//! the "same" module. +//! - The [`ModuleCache::load()`] method will be called more often than the +//! [`ModuleCache::save()`] method, allowing for cache implementations to +//! optimize their strategy accordingly. +//! +//! Cache implementations are encouraged to take +//! [`wasmer::Engine::deterministic_id()`] into account when saving and loading +//! cached modules to ensure correct module retrieval. +//! +//! Cache implementations should choose a suitable eviction policy and implement +//! invalidation transparently as part of [`ModuleCache::load()`] or +//! [`ModuleCache::save()`]. +//! +//! ## Combinators +//! +//! The `module_cache` module provides combinators for extending and combining +//! caching strategies. For example, you could use the [`FallbackCache`] to +//! chain a fast in-memory cache with a slower file-based cache as a fallback. + +mod fallback; mod filesystem; mod shared; mod thread_local; mod types; pub use self::{ - and_then::AndThen, + fallback::FallbackCache, filesystem::FileSystemCache, shared::SharedCache, thread_local::ThreadLocalCache, diff --git a/lib/wasi/src/runtime/module_cache/shared.rs b/lib/wasi/src/runtime/module_cache/shared.rs index 05476e26a55..cec47cfde0b 100644 --- a/lib/wasi/src/runtime/module_cache/shared.rs +++ b/lib/wasi/src/runtime/module_cache/shared.rs @@ -57,7 +57,7 @@ mod tests { let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); let cache = SharedCache::default(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); cache.save(key, &engine, &module).await.unwrap(); let round_tripped = cache.load(key, &engine).await.unwrap(); diff --git a/lib/wasi/src/runtime/module_cache/thread_local.rs b/lib/wasi/src/runtime/module_cache/thread_local.rs index f7212c11f8f..ca8abf3d430 100644 --- a/lib/wasi/src/runtime/module_cache/thread_local.rs +++ b/lib/wasi/src/runtime/module_cache/thread_local.rs @@ -63,7 +63,7 @@ mod tests { let engine = Engine::default(); let module = Module::new(&engine, ADD_WAT).unwrap(); let cache = ThreadLocalCache::default(); - let key = ModuleHash::from_raw([0; 32]); + let key = ModuleHash::from_bytes([0; 32]); cache.save(key, &engine, &module).await.unwrap(); let round_tripped = cache.load(key, &engine).await.unwrap(); diff --git a/lib/wasi/src/runtime/module_cache/types.rs b/lib/wasi/src/runtime/module_cache/types.rs index 8ed0355ec4a..804f8992ca1 100644 --- a/lib/wasi/src/runtime/module_cache/types.rs +++ b/lib/wasi/src/runtime/module_cache/types.rs @@ -7,7 +7,7 @@ use std::{ use sha2::{Digest, Sha256}; use wasmer::{Engine, Module}; -use crate::runtime::module_cache::AndThen; +use crate::runtime::module_cache::FallbackCache; /// A cache for compiled WebAssembly modules. /// @@ -45,7 +45,8 @@ pub trait ModuleCache: Debug { module: &Module, ) -> Result<(), CacheError>; - /// Chain a second cache onto this one. + /// Chain a second [`ModuleCache`] that will be used as a fallback if + /// lookups on the primary cache fail. /// /// The general assumption is that each subsequent cache in the chain will /// be significantly slower than the previous one. @@ -56,14 +57,14 @@ pub trait ModuleCache: Debug { /// }; /// /// let cache = SharedCache::default() - /// .and_then(FileSystemCache::new("~/.local/cache")); + /// .with_fallback(FileSystemCache::new("~/.local/cache")); /// ``` - fn and_then(self, other: C) -> AndThen + fn with_fallback(self, other: C) -> FallbackCache where Self: Sized, C: ModuleCache, { - AndThen::new(self, other) + FallbackCache::new(self, other) } } @@ -126,7 +127,7 @@ pub struct ModuleHash([u8; 32]); impl ModuleHash { /// Create a new [`ModuleHash`] from the raw SHA-256 hash. - pub fn from_raw(key: [u8; 32]) -> Self { + pub fn from_bytes(key: [u8; 32]) -> Self { ModuleHash(key) } @@ -136,11 +137,11 @@ impl ModuleHash { let mut hasher = Sha256::default(); hasher.update(wasm); - ModuleHash::from_raw(hasher.finalize().into()) + ModuleHash::from_bytes(hasher.finalize().into()) } /// Get the raw SHA-256 hash. - pub fn as_raw(self) -> [u8; 32] { + pub fn as_bytes(self) -> [u8; 32] { self.0 } } @@ -166,7 +167,7 @@ mod tests { #[test] fn key_is_displayed_as_hex() { - let key = ModuleHash::from_raw([ + let key = ModuleHash::from_bytes([ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, @@ -191,6 +192,6 @@ mod tests { let hash = ModuleHash::sha256(wasm); - assert_eq!(hash.as_raw(), raw); + assert_eq!(hash.as_bytes(), raw); } } diff --git a/lib/wasi/src/runtime/package_loader/builtin_loader.rs b/lib/wasi/src/runtime/package_loader/builtin_loader.rs new file mode 100644 index 00000000000..20b4da26a78 --- /dev/null +++ b/lib/wasi/src/runtime/package_loader/builtin_loader.rs @@ -0,0 +1,399 @@ +use std::{ + collections::HashMap, + fmt::Write as _, + io::{ErrorKind, Write as _}, + path::{Path, PathBuf}, + sync::{Arc, RwLock}, +}; + +use anyhow::{Context, Error}; +use bytes::Bytes; +use tempfile::NamedTempFile; +use webc::{ + compat::{Container, ContainerError}, + DetectError, +}; + +use crate::{ + bin_factory::BinaryPackage, + http::{HttpClient, HttpRequest, HttpResponse, USER_AGENT}, + runtime::{ + package_loader::PackageLoader, + resolver::{DistributionInfo, PackageSummary, Resolution, WebcHash}, + }, +}; + +/// The builtin [`PackageLoader`] that is used by the `wasmer` CLI and +/// respects `$WASMER_DIR`. +#[derive(Debug)] +pub struct BuiltinPackageLoader { + client: Arc, + in_memory: InMemoryCache, + fs: FileSystemCache, +} + +impl BuiltinPackageLoader { + pub fn new(cache_dir: impl Into) -> Self { + let client = crate::http::default_http_client().unwrap(); + BuiltinPackageLoader::new_with_client(cache_dir, Arc::new(client)) + } + + pub fn new_with_client( + cache_dir: impl Into, + client: Arc, + ) -> Self { + BuiltinPackageLoader { + fs: FileSystemCache { + cache_dir: cache_dir.into(), + }, + in_memory: InMemoryCache::default(), + client, + } + } + + /// Get the directory that is typically used when caching downloaded + /// packages inside `$WASMER_DIR`. + pub fn default_cache_dir(wasmer_dir: impl AsRef) -> PathBuf { + wasmer_dir.as_ref().join("checkouts") + } + + /// Create a new [`BuiltinPackageLoader`] based on `$WASMER_DIR` and the + /// global Wasmer config. + pub fn from_env() -> Result { + let wasmer_dir = discover_wasmer_dir().context("Unable to determine $WASMER_DIR")?; + let client = crate::http::default_http_client().context("No HTTP client available")?; + let cache_dir = BuiltinPackageLoader::default_cache_dir(&wasmer_dir); + + Ok(BuiltinPackageLoader::new_with_client( + cache_dir, + Arc::new(client), + )) + } + + #[tracing::instrument(level = "debug", skip_all, fields(pkg.hash=%hash))] + async fn get_cached(&self, hash: &WebcHash) -> Result, Error> { + if let Some(cached) = self.in_memory.lookup(hash) { + return Ok(Some(cached)); + } + + if let Some(cached) = self.fs.lookup(hash).await? { + // Note: We want to propagate it to the in-memory cache, too + tracing::debug!("Copying from the filesystem cache to the in-memory cache",); + self.in_memory.save(&cached, *hash); + return Ok(Some(cached)); + } + + Ok(None) + } + + async fn download(&self, dist: &DistributionInfo) -> Result { + if dist.webc.scheme() == "file" { + // Note: The Url::to_file_path() method is platform-specific + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] + if let Ok(path) = dist.webc.to_file_path() { + // FIXME: This will block the thread + let bytes = std::fs::read(&path) + .with_context(|| format!("Unable to read \"{}\"", path.display()))?; + return Ok(bytes.into()); + } + } + + let request = HttpRequest { + url: dist.webc.to_string(), + method: "GET".to_string(), + headers: vec![ + ("Accept".to_string(), "application/webc".to_string()), + ("User-Agent".to_string(), USER_AGENT.to_string()), + ], + body: None, + options: Default::default(), + }; + + let HttpResponse { + body, + ok, + status, + status_text, + .. + } = self.client.request(request).await?; + + if !ok { + anyhow::bail!("{status} {status_text}"); + } + + let body = body.context("The response didn't contain a body")?; + + Ok(body.into()) + } + + async fn save_and_load_as_mmapped( + &self, + webc: &[u8], + dist: &DistributionInfo, + ) -> Result { + // First, save it to disk + self.fs.save(webc, dist).await?; + + // Now try to load it again. The resulting container should use + // a memory-mapped file rather than an in-memory buffer. + match self.fs.lookup(&dist.webc_sha256).await? { + Some(container) => { + // we also want to make sure it's in the in-memory cache + self.in_memory.save(&container, dist.webc_sha256); + + Ok(container) + } + None => { + // Something really weird has occurred and we can't see the + // saved file. Just error out and let the fallback code do its + // thing. + Err(Error::msg("Unable to load the downloaded memory from disk")) + } + } + } +} + +#[async_trait::async_trait] +impl PackageLoader for BuiltinPackageLoader { + #[tracing::instrument( + level="debug", + skip_all, + fields( + pkg.name=summary.pkg.name.as_str(), + pkg.version=%summary.pkg.version, + ), + )] + async fn load(&self, summary: &PackageSummary) -> Result { + if let Some(container) = self.get_cached(&summary.dist.webc_sha256).await? { + tracing::debug!("Cache hit!"); + return Ok(container); + } + + // looks like we had a cache miss and need to download it manually + let bytes = self + .download(&summary.dist) + .await + .with_context(|| format!("Unable to download \"{}\"", summary.dist.webc))?; + + // We want to cache the container we downloaded, but we want to do it + // in a smart way to keep memory usage down. + + match self.save_and_load_as_mmapped(&bytes, &summary.dist).await { + Ok(container) => { + tracing::debug!("Cached to disk"); + // The happy path - we've saved to both caches and loaded the + // container from disk (hopefully using mmap) so we're done. + return Ok(container); + } + Err(e) => { + tracing::warn!( + error=&*e, + pkg.name=%summary.pkg.name, + pkg.version=%summary.pkg.version, + pkg.hash=%summary.dist.webc_sha256, + pkg.url=%summary.dist.webc, + "Unable to save the downloaded package to disk", + ); + // The sad path - looks like we'll need to keep the whole thing + // in memory. + let container = Container::from_bytes(bytes)?; + // We still want to cache it, of course + self.in_memory.save(&container, summary.dist.webc_sha256); + Ok(container) + } + } + } + + async fn load_package_tree( + &self, + root: &Container, + resolution: &Resolution, + ) -> Result { + super::load_package_tree(root, self, resolution).await + } +} + +fn discover_wasmer_dir() -> Option { + // TODO: We should reuse the same logic from the wasmer CLI. + std::env::var("WASMER_DIR") + .map(PathBuf::from) + .ok() + .or_else(|| { + #[allow(deprecated)] + std::env::home_dir().map(|home| home.join(".wasmer")) + }) +} + +// FIXME: This implementation will block the async runtime and should use +// some sort of spawn_blocking() call to run it in the background. +#[derive(Debug)] +struct FileSystemCache { + cache_dir: PathBuf, +} + +impl FileSystemCache { + async fn lookup(&self, hash: &WebcHash) -> Result, Error> { + let path = self.path(hash); + + match Container::from_disk(&path) { + Ok(c) => Ok(Some(c)), + Err(ContainerError::Open { error, .. }) + | Err(ContainerError::Read { error, .. }) + | Err(ContainerError::Detect(DetectError::Io(error))) + if error.kind() == ErrorKind::NotFound => + { + Ok(None) + } + Err(e) => { + let msg = format!("Unable to read \"{}\"", path.display()); + Err(Error::new(e).context(msg)) + } + } + } + + async fn save(&self, webc: &[u8], dist: &DistributionInfo) -> Result<(), Error> { + let path = self.path(&dist.webc_sha256); + + let parent = path.parent().expect("Always within cache_dir"); + + std::fs::create_dir_all(parent) + .with_context(|| format!("Unable to create \"{}\"", parent.display()))?; + + let mut temp = NamedTempFile::new_in(parent)?; + temp.write_all(webc)?; + temp.flush()?; + temp.as_file_mut().sync_all()?; + temp.persist(&path)?; + + tracing::debug!( + pkg.hash=%dist.webc_sha256, + pkg.url=%dist.webc, + path=%path.display(), + num_bytes=webc.len(), + "Saved to disk", + ); + + Ok(()) + } + + fn path(&self, hash: &WebcHash) -> PathBuf { + let hash = hash.as_bytes(); + let mut filename = String::with_capacity(hash.len() * 2); + for b in hash { + write!(filename, "{b:02x}").unwrap(); + } + filename.push_str(".bin"); + + self.cache_dir.join(filename) + } +} + +#[derive(Debug, Default)] +struct InMemoryCache(RwLock>); + +impl InMemoryCache { + fn lookup(&self, hash: &WebcHash) -> Option { + self.0.read().unwrap().get(hash).cloned() + } + + fn save(&self, container: &Container, hash: WebcHash) { + let mut cache = self.0.write().unwrap(); + cache.entry(hash).or_insert_with(|| container.clone()); + } +} + +#[cfg(test)] +mod tests { + use std::{collections::VecDeque, sync::Mutex}; + + use futures::future::BoxFuture; + use tempfile::TempDir; + + use crate::{ + http::{HttpRequest, HttpResponse}, + runtime::resolver::PackageInfo, + }; + + use super::*; + + const PYTHON: &[u8] = include_bytes!("../../../../c-api/examples/assets/python-0.1.0.wasmer"); + + #[derive(Debug)] + pub(crate) struct DummyClient { + requests: Mutex>, + responses: Mutex>, + } + + impl DummyClient { + pub fn with_responses(responses: impl IntoIterator) -> Self { + DummyClient { + requests: Mutex::new(Vec::new()), + responses: Mutex::new(responses.into_iter().collect()), + } + } + } + + impl HttpClient for DummyClient { + fn request( + &self, + request: HttpRequest, + ) -> BoxFuture<'_, Result> { + let response = self.responses.lock().unwrap().pop_front().unwrap(); + self.requests.lock().unwrap().push(request); + Box::pin(async { Ok(response) }) + } + } + + #[tokio::test] + async fn cache_misses_will_trigger_a_download() { + let temp = TempDir::new().unwrap(); + let client = Arc::new(DummyClient::with_responses([HttpResponse { + pos: 0, + body: Some(PYTHON.to_vec()), + ok: true, + redirected: false, + status: 200, + status_text: "OK".to_string(), + headers: Vec::new(), + }])); + let loader = BuiltinPackageLoader::new_with_client(temp.path(), client.clone()); + let summary = PackageSummary { + pkg: PackageInfo { + name: "python/python".to_string(), + version: "0.1.0".parse().unwrap(), + dependencies: Vec::new(), + commands: Vec::new(), + entrypoint: Some("asdf".to_string()), + }, + dist: DistributionInfo { + webc: "https://wapm.io/python/python".parse().unwrap(), + webc_sha256: [0xaa; 32].into(), + }, + }; + + let container = loader.load(&summary).await.unwrap(); + + // A HTTP request was sent + let requests = client.requests.lock().unwrap(); + let request = &requests[0]; + assert_eq!(request.url, summary.dist.webc.to_string()); + assert_eq!(request.method, "GET"); + assert_eq!( + request.headers, + [ + ("Accept".to_string(), "application/webc".to_string()), + ("User-Agent".to_string(), USER_AGENT.to_string()), + ] + ); + // Make sure we got the right package + let manifest = container.manifest(); + assert_eq!(manifest.entrypoint.as_deref(), Some("python")); + // it should have been automatically saved to disk + let path = loader.fs.path(&summary.dist.webc_sha256); + assert!(path.exists()); + assert_eq!(std::fs::read(&path).unwrap(), PYTHON); + // and cached in memory for next time + let in_memory = loader.in_memory.0.read().unwrap(); + assert!(in_memory.contains_key(&summary.dist.webc_sha256)); + } +} diff --git a/lib/wasi/src/runtime/package_loader/load_package_tree.rs b/lib/wasi/src/runtime/package_loader/load_package_tree.rs new file mode 100644 index 00000000000..36199b8dea2 --- /dev/null +++ b/lib/wasi/src/runtime/package_loader/load_package_tree.rs @@ -0,0 +1,263 @@ +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + path::Path, + sync::Arc, +}; + +use anyhow::{Context, Error}; +use futures::{stream::FuturesUnordered, TryStreamExt}; +use once_cell::sync::OnceCell; +use virtual_fs::{FileSystem, WebcVolumeFileSystem}; +use webc::compat::Container; + +use crate::{ + bin_factory::{BinaryPackage, BinaryPackageCommand}, + runtime::{ + package_loader::PackageLoader, + resolver::{ + DependencyGraph, ItemLocation, PackageId, PackageSummary, Resolution, ResolvedPackage, + }, + }, +}; + +/// Given a fully resolved package, load it into memory for execution. +#[tracing::instrument(level = "debug", skip_all)] +pub async fn load_package_tree( + root: &Container, + loader: &dyn PackageLoader, + resolution: &Resolution, +) -> Result { + let mut containers = fetch_dependencies(loader, &resolution.package, &resolution.graph).await?; + containers.insert(resolution.package.root_package.clone(), root.clone()); + let fs = filesystem(&containers, &resolution.package)?; + + let root = &resolution.package.root_package; + let commands: Vec = commands(&resolution.package.commands, &containers)?; + + let file_system_memory_footprint = count_file_system(&fs, Path::new("/")); + let atoms_in_use: HashSet<_> = commands.iter().map(|cmd| cmd.atom()).collect(); + let module_memory_footprint = atoms_in_use + .iter() + .fold(0, |footprint, atom| footprint + atom.len() as u64); + + let loaded = BinaryPackage { + package_name: root.package_name.clone(), + version: root.version.clone(), + when_cached: crate::syscalls::platform_clock_time_get( + wasmer_wasix_types::wasi::Snapshot0Clockid::Monotonic, + 1_000_000, + ) + .ok() + .map(|ts| ts as u128), + hash: OnceCell::new(), + entrypoint_cmd: resolution.package.entrypoint.clone(), + webc_fs: Arc::new(fs), + commands, + uses: Vec::new(), + module_memory_footprint, + file_system_memory_footprint, + }; + + Ok(loaded) +} + +fn commands( + commands: &BTreeMap, + containers: &HashMap, +) -> Result, Error> { + let mut pkg_commands = Vec::new(); + + for ( + name, + ItemLocation { + name: original_name, + package, + }, + ) in commands + { + let webc = &containers[package]; + let manifest = webc.manifest(); + let command_metadata = &manifest.commands[original_name]; + + if let Some(cmd) = load_binary_command(webc, name, command_metadata)? { + pkg_commands.push(cmd); + } + } + + Ok(pkg_commands) +} + +fn load_binary_command( + webc: &Container, + name: &str, + cmd: &webc::metadata::Command, +) -> Result, anyhow::Error> { + let atom_name = match atom_name_for_command(name, cmd)? { + Some(name) => name, + None => { + tracing::warn!( + cmd.name=name, + cmd.runner=%cmd.runner, + "Skipping unsupported command", + ); + return Ok(None); + } + }; + + let atom = webc.get_atom(&atom_name); + + if atom.is_none() && cmd.annotations.is_empty() { + return Ok(legacy_atom_hack(webc, name, cmd)); + } + + let atom = atom + .with_context(|| format!("The '{name}' command uses the '{atom_name}' atom, but it isn't present in the WEBC file"))?; + + let cmd = BinaryPackageCommand::new(name.to_string(), cmd.clone(), atom); + + Ok(Some(cmd)) +} + +fn atom_name_for_command( + command_name: &str, + cmd: &webc::metadata::Command, +) -> Result, anyhow::Error> { + use webc::metadata::annotations::{ + Emscripten, Wasi, EMSCRIPTEN_RUNNER_URI, WASI_RUNNER_URI, WCGI_RUNNER_URI, + }; + + // FIXME: command metadata should include an "atom: Option" field + // because it's so common, rather than relying on each runner to include + // annotations where "atom" just so happens to contain the atom's name + // (like in Wasi and Emscripten) + + if let Some(Wasi { atom, .. }) = cmd + .annotation("wasi") + .context("Unable to deserialize 'wasi' annotations")? + { + return Ok(Some(atom)); + } + + if let Some(Emscripten { + atom: Some(atom), .. + }) = cmd + .annotation("emscripten") + .context("Unable to deserialize 'emscripten' annotations")? + { + return Ok(Some(atom)); + } + + if [WASI_RUNNER_URI, WCGI_RUNNER_URI, EMSCRIPTEN_RUNNER_URI] + .iter() + .any(|uri| cmd.runner.starts_with(uri)) + { + // Note: We use the command name as the atom name as a special case + // for known runner types because sometimes people will construct + // a manifest by hand instead of using wapm2pirita. + tracing::debug!( + command = command_name, + "No annotations specifying the atom name found. Falling back to the command name" + ); + return Ok(Some(command_name.to_string())); + } + + Ok(None) +} + +/// HACK: Some older packages like `sharrattj/bash` and `sharrattj/coreutils` +/// contain commands with no annotations. When this happens, you can just assume +/// it wants to use the first atom in the WEBC file. +/// +/// That works because most of these packages only have a single atom (e.g. in +/// `sharrattj/coreutils` there are commands for `ls`, `pwd`, and so on, but +/// under the hood they all use the `coreutils` atom). +/// +/// See +/// for more. +fn legacy_atom_hack( + webc: &Container, + command_name: &str, + metadata: &webc::metadata::Command, +) -> Option { + let (name, atom) = webc.atoms().into_iter().next()?; + + tracing::debug!( + command_name, + atom.name = name.as_str(), + atom.len = atom.len(), + "(hack) The command metadata is malformed. Falling back to the first atom in the WEBC file", + ); + + Some(BinaryPackageCommand::new( + command_name.to_string(), + metadata.clone(), + atom, + )) +} + +async fn fetch_dependencies( + loader: &dyn PackageLoader, + pkg: &ResolvedPackage, + graph: &DependencyGraph, +) -> Result, Error> { + let mut packages = HashSet::new(); + + for loc in pkg.commands.values() { + packages.insert(loc.package.clone()); + } + + for mapping in &pkg.filesystem { + packages.insert(mapping.package.clone()); + } + + // We don't need to download the root package + packages.remove(&pkg.root_package); + + let packages: FuturesUnordered<_> = packages + .into_iter() + .map(|id| async { + let summary = PackageSummary { + pkg: graph.package_info[&id].clone(), + dist: graph.distribution[&id].clone(), + }; + loader.load(&summary).await.map(|webc| (id, webc)) + }) + .collect(); + + let packages: HashMap = packages.try_collect().await?; + + Ok(packages) +} + +fn filesystem( + packages: &HashMap, + pkg: &ResolvedPackage, +) -> Result { + // FIXME: Take the [fs] table into account + // See for more + let root = &packages[&pkg.root_package]; + let fs = WebcVolumeFileSystem::mount_all(root); + Ok(fs) +} + +fn count_file_system(fs: &dyn FileSystem, path: &Path) -> u64 { + let mut total = 0; + + let dir = match fs.read_dir(path) { + Ok(d) => d, + Err(_err) => { + return 0; + } + }; + + for entry in dir.flatten() { + if let Ok(meta) = entry.metadata() { + total += meta.len(); + if meta.is_dir() { + total += count_file_system(fs, entry.path.as_path()); + } + } + } + + total +} diff --git a/lib/wasi/src/runtime/package_loader/mod.rs b/lib/wasi/src/runtime/package_loader/mod.rs new file mode 100644 index 00000000000..aa9f9392b22 --- /dev/null +++ b/lib/wasi/src/runtime/package_loader/mod.rs @@ -0,0 +1,8 @@ +mod builtin_loader; +mod load_package_tree; +mod types; + +pub use self::{ + builtin_loader::BuiltinPackageLoader, load_package_tree::load_package_tree, + types::PackageLoader, +}; diff --git a/lib/wasi/src/runtime/package_loader/types.rs b/lib/wasi/src/runtime/package_loader/types.rs new file mode 100644 index 00000000000..afb3e924828 --- /dev/null +++ b/lib/wasi/src/runtime/package_loader/types.rs @@ -0,0 +1,43 @@ +use std::{fmt::Debug, ops::Deref}; + +use anyhow::Error; +use webc::compat::Container; + +use crate::{ + bin_factory::BinaryPackage, + runtime::resolver::{PackageSummary, Resolution}, +}; + +#[async_trait::async_trait] +pub trait PackageLoader: Send + Sync + Debug { + async fn load(&self, summary: &PackageSummary) -> Result; + + /// Load a resolved package into memory so it can be executed. + /// + /// A good default implementation is to just call + /// [`load_package_tree()`][super::load_package_tree()]. + async fn load_package_tree( + &self, + root: &Container, + resolution: &Resolution, + ) -> Result; +} + +#[async_trait::async_trait] +impl PackageLoader for D +where + D: Deref + Debug + Send + Sync, + P: PackageLoader + ?Sized + 'static, +{ + async fn load(&self, summary: &PackageSummary) -> Result { + (**self).load(summary).await + } + + async fn load_package_tree( + &self, + root: &Container, + resolution: &Resolution, + ) -> Result { + (**self).load_package_tree(root, resolution).await + } +} diff --git a/lib/wasi/src/runtime/resolver/cache.rs b/lib/wasi/src/runtime/resolver/cache.rs deleted file mode 100644 index f0b41777a32..00000000000 --- a/lib/wasi/src/runtime/resolver/cache.rs +++ /dev/null @@ -1,201 +0,0 @@ -use std::{collections::HashMap, sync::RwLock}; - -use semver::VersionReq; - -use crate::{ - bin_factory::BinaryPackage, - http::HttpClient, - runtime::resolver::{PackageResolver, ResolverError, WebcIdentifier}, -}; - -/// A resolver that wraps a [`PackageResolver`] with an in-memory cache. -#[derive(Debug)] -pub struct InMemoryCache { - resolver: R, - packages: RwLock>>, -} - -impl InMemoryCache { - pub fn new(resolver: R) -> Self { - InMemoryCache { - resolver, - packages: RwLock::new(HashMap::new()), - } - } - - pub fn get_ref(&self) -> &R { - &self.resolver - } - - pub fn get_mut(&mut self) -> &mut R { - &mut self.resolver - } - - pub fn into_inner(self) -> R { - self.resolver - } - - fn lookup(&self, package_name: &str, version_constraint: &VersionReq) -> Option { - let packages = self.packages.read().unwrap(); - let candidates = packages.get(package_name)?; - - let pkg = candidates - .iter() - .find(|pkg| version_constraint.matches(&pkg.version))?; - - Some(pkg.clone()) - } - - fn save(&self, pkg: BinaryPackage) { - let mut packages = self.packages.write().unwrap(); - let candidates = packages.entry(pkg.package_name.clone()).or_default(); - candidates.push(pkg); - // Note: We want to sort in descending order so lookups will always - // yield the most recent compatible version. - candidates.sort_by(|left, right| right.version.cmp(&left.version)); - } -} - -#[async_trait::async_trait] -impl PackageResolver for InMemoryCache -where - R: PackageResolver + Send + Sync, -{ - async fn resolve_package( - &self, - ident: &WebcIdentifier, - client: &(dyn HttpClient + Send + Sync), - ) -> Result { - if let Some(cached) = self.lookup(&ident.full_name, &ident.version) { - // Cache hit! - tracing::debug!(package=?ident, "The resolved package was already cached"); - return Ok(cached); - } - - // the slow path - let pkg = self.resolver.resolve_package(ident, client).await?; - - tracing::debug!( - request.name = ident.full_name.as_str(), - request.version = %ident.version, - resolved.name = pkg.package_name.as_str(), - resolved.version = %pkg.version, - "Adding resolved package to the cache", - ); - self.save(pkg.clone()); - - Ok(pkg) - } -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - - use once_cell::sync::OnceCell; - - use super::*; - - #[derive(Debug, Default)] - struct DummyResolver { - calls: Mutex>, - } - - #[async_trait::async_trait] - impl PackageResolver for DummyResolver { - async fn resolve_package( - &self, - ident: &WebcIdentifier, - _client: &(dyn HttpClient + Send + Sync), - ) -> Result { - self.calls.lock().unwrap().push(ident.clone()); - Err(ResolverError::UnknownPackage(ident.clone())) - } - } - - fn dummy_pkg(name: &str, version: &str) -> BinaryPackage { - BinaryPackage { - package_name: name.into(), - version: version.parse().unwrap(), - when_cached: None, - entry: None, - hash: OnceCell::new(), - webc_fs: None, - commands: Arc::default(), - uses: Vec::new(), - module_memory_footprint: 0, - file_system_memory_footprint: 0, - } - } - - #[derive(Debug)] - struct DummyHttpClient; - - impl HttpClient for DummyHttpClient { - fn request( - &self, - _request: crate::http::HttpRequest, - ) -> futures::future::BoxFuture<'_, Result> - { - unreachable!() - } - } - - #[tokio::test] - async fn cache_hit() { - let resolver = DummyResolver::default(); - let cache = InMemoryCache::new(resolver); - let ident: WebcIdentifier = "python/python".parse().unwrap(); - cache.save(dummy_pkg("python/python", "0.0.0")); - - let pkg = cache - .resolve_package(&ident, &DummyHttpClient) - .await - .unwrap(); - - assert_eq!(pkg.version.to_string(), "0.0.0"); - } - - #[tokio::test] - async fn semver_allows_wiggle_room_with_version_numbers() { - let resolver = DummyResolver::default(); - let cache = InMemoryCache::new(resolver); - cache.save(dummy_pkg("python/python", "1.0.0")); - cache.save(dummy_pkg("python/python", "1.1.0")); - cache.save(dummy_pkg("python/python", "2.0.0")); - - let pkg = cache - .resolve_package(&"python/python@^1.0.5".parse().unwrap(), &DummyHttpClient) - .await - .unwrap(); - assert_eq!(pkg.version.to_string(), "1.1.0"); - - let pkg = cache - .resolve_package(&"python/python@1".parse().unwrap(), &DummyHttpClient) - .await - .unwrap(); - assert_eq!(pkg.version.to_string(), "1.1.0"); - - let result = cache - .resolve_package(&"python/python@=2.0.1".parse().unwrap(), &DummyHttpClient) - .await; - assert!(result.is_err()); - } - - #[tokio::test] - async fn cache_miss() { - let resolver = DummyResolver::default(); - let cache = InMemoryCache::new(resolver); - let ident: WebcIdentifier = "python/python".parse().unwrap(); - - let expected_err = cache - .resolve_package(&ident, &DummyHttpClient) - .await - .unwrap_err(); - - assert!(matches!(expected_err, ResolverError::UnknownPackage(_))); - // there should have been one call to the wrapped resolver - let calls = cache.get_ref().calls.lock().unwrap(); - assert_eq!(&*calls, &[ident]); - } -} diff --git a/lib/wasi/src/runtime/resolver/filesystem_source.rs b/lib/wasi/src/runtime/resolver/filesystem_source.rs new file mode 100644 index 00000000000..60952e33844 --- /dev/null +++ b/lib/wasi/src/runtime/resolver/filesystem_source.rs @@ -0,0 +1,45 @@ +use anyhow::{Context, Error}; +use webc::compat::Container; + +use crate::runtime::resolver::{ + DistributionInfo, PackageInfo, PackageSpecifier, PackageSummary, Source, WebcHash, +}; + +/// A [`Source`] that knows how to query files on the filesystem. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct FileSystemSource {} + +#[async_trait::async_trait] +impl Source for FileSystemSource { + #[tracing::instrument(level = "debug", skip_all, fields(%package))] + async fn query(&self, package: &PackageSpecifier) -> Result, Error> { + let path = match package { + PackageSpecifier::Path(path) => path.canonicalize().with_context(|| { + format!( + "Unable to get the canonical form for \"{}\"", + path.display() + ) + })?, + _ => return Ok(Vec::new()), + }; + + // FIXME: These two operations will block + let webc_sha256 = WebcHash::for_file(&path) + .with_context(|| format!("Unable to hash \"{}\"", path.display()))?; + let container = Container::from_disk(&path) + .with_context(|| format!("Unable to parse \"{}\"", path.display()))?; + + let url = crate::runtime::resolver::polyfills::url_from_file_path(&path) + .ok_or_else(|| anyhow::anyhow!("Unable to turn \"{}\" into a URL", path.display()))?; + + let summary = PackageSummary { + pkg: PackageInfo::from_manifest(container.manifest())?, + dist: DistributionInfo { + webc: url, + webc_sha256, + }, + }; + + Ok(vec![summary]) + } +} diff --git a/lib/wasi/src/runtime/resolver/in_memory_source.rs b/lib/wasi/src/runtime/resolver/in_memory_source.rs new file mode 100644 index 00000000000..a8bb600ae1e --- /dev/null +++ b/lib/wasi/src/runtime/resolver/in_memory_source.rs @@ -0,0 +1,185 @@ +use std::{ + collections::{BTreeMap, VecDeque}, + fs::File, + path::{Path, PathBuf}, +}; + +use anyhow::{Context, Error}; +use semver::Version; + +use crate::runtime::resolver::{PackageSpecifier, PackageSummary, Source}; + +/// A [`Source`] that tracks packages in memory. +/// +/// Primarily used during testing. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct InMemorySource { + packages: BTreeMap>, +} + +impl InMemorySource { + pub fn new() -> Self { + InMemorySource::default() + } + + /// Recursively walk a directory, adding all valid WEBC files to the source. + pub fn from_directory_tree(dir: impl Into) -> Result { + let mut source = InMemorySource::default(); + + let mut to_check: VecDeque = VecDeque::new(); + to_check.push_back(dir.into()); + + fn process_entry( + path: &Path, + source: &mut InMemorySource, + to_check: &mut VecDeque, + ) -> Result<(), Error> { + let metadata = std::fs::metadata(path).context("Unable to get filesystem metadata")?; + + if metadata.is_dir() { + for entry in path.read_dir().context("Unable to read the directory")? { + to_check.push_back(entry?.path()); + } + } else if metadata.is_file() { + let f = File::open(path).context("Unable to open the file")?; + if webc::detect(f).is_ok() { + source + .add_webc(path) + .with_context(|| format!("Unable to load \"{}\"", path.display()))?; + } + } + + Ok(()) + } + + while let Some(path) = to_check.pop_front() { + process_entry(&path, &mut source, &mut to_check) + .with_context(|| format!("Unable to add entries from \"{}\"", path.display()))?; + } + + Ok(source) + } + + /// Add a new [`PackageSummary`] to the [`InMemorySource`]. + pub fn add(&mut self, summary: PackageSummary) { + let summaries = self.packages.entry(summary.pkg.name.clone()).or_default(); + summaries.push(summary); + summaries.sort_by(|left, right| left.pkg.version.cmp(&right.pkg.version)); + summaries.dedup_by(|left, right| left.pkg.version == right.pkg.version); + } + + pub fn add_webc(&mut self, path: impl AsRef) -> Result<(), Error> { + let summary = PackageSummary::from_webc_file(path)?; + self.add(summary); + + Ok(()) + } + + pub fn packages(&self) -> &BTreeMap> { + &self.packages + } + + pub fn get(&self, package_name: &str, version: &Version) -> Option<&PackageSummary> { + let summaries = self.packages.get(package_name)?; + summaries.iter().find(|s| s.pkg.version == *version) + } +} + +#[async_trait::async_trait] +impl Source for InMemorySource { + #[tracing::instrument(level = "debug", skip_all, fields(%package))] + async fn query(&self, package: &PackageSpecifier) -> Result, Error> { + match package { + PackageSpecifier::Registry { full_name, version } => { + match self.packages.get(full_name) { + Some(summaries) => { + let matches: Vec<_> = summaries + .iter() + .filter(|summary| version.matches(&summary.pkg.version)) + .cloned() + .collect(); + + tracing::debug!( + matches = ?matches + .iter() + .map(|summary| summary.package_id().to_string()) + .collect::>(), + ); + + Ok(matches) + } + None => Ok(Vec::new()), + } + } + PackageSpecifier::Url(_) | PackageSpecifier::Path(_) => Ok(Vec::new()), + } + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use crate::runtime::resolver::{ + inputs::{DistributionInfo, PackageInfo}, + Dependency, WebcHash, + }; + + use super::*; + + const PYTHON: &[u8] = include_bytes!("../../../../c-api/examples/assets/python-0.1.0.wasmer"); + const COREUTILS_16: &[u8] = include_bytes!("../../../../../tests/integration/cli/tests/webc/coreutils-1.0.16-e27dbb4f-2ef2-4b44-b46a-ddd86497c6d7.webc"); + const COREUTILS_11: &[u8] = include_bytes!("../../../../../tests/integration/cli/tests/webc/coreutils-1.0.11-9d7746ca-694f-11ed-b932-dead3543c068.webc"); + const BASH: &[u8] = include_bytes!("../../../../../tests/integration/cli/tests/webc/bash-1.0.16-f097441a-a80b-4e0d-87d7-684918ef4bb6.webc"); + + #[test] + fn load_a_directory_tree() { + let temp = TempDir::new().unwrap(); + std::fs::write(temp.path().join("python-0.1.0.webc"), PYTHON).unwrap(); + std::fs::write(temp.path().join("coreutils-1.0.16.webc"), COREUTILS_16).unwrap(); + std::fs::write(temp.path().join("coreutils-1.0.11.webc"), COREUTILS_11).unwrap(); + let nested = temp.path().join("nested"); + std::fs::create_dir(&nested).unwrap(); + let bash = nested.join("bash-1.0.12.webc"); + std::fs::write(&bash, BASH).unwrap(); + + let source = InMemorySource::from_directory_tree(temp.path()).unwrap(); + + assert_eq!( + source + .packages + .keys() + .map(|k| k.as_str()) + .collect::>(), + ["python", "sharrattj/bash", "sharrattj/coreutils"] + ); + assert_eq!(source.packages["sharrattj/coreutils"].len(), 2); + assert_eq!( + source.packages["sharrattj/bash"][0], + PackageSummary { + pkg: PackageInfo { + name: "sharrattj/bash".to_string(), + version: "1.0.16".parse().unwrap(), + dependencies: vec![Dependency { + alias: "coreutils".to_string(), + pkg: "sharrattj/coreutils@^1.0.16".parse().unwrap() + }], + commands: vec![crate::runtime::resolver::Command { + name: "bash".to_string(), + }], + entrypoint: Some("bash".to_string()), + }, + dist: DistributionInfo { + webc: crate::runtime::resolver::polyfills::url_from_file_path( + bash.canonicalize().unwrap() + ) + .unwrap(), + webc_sha256: WebcHash::from_bytes([ + 161, 101, 23, 194, 244, 92, 186, 213, 143, 33, 200, 128, 238, 23, 185, 174, + 180, 195, 144, 145, 78, 17, 227, 159, 118, 64, 83, 153, 0, 205, 253, 215, + ]), + }, + } + ); + } +} diff --git a/lib/wasi/src/runtime/resolver/inputs.rs b/lib/wasi/src/runtime/resolver/inputs.rs new file mode 100644 index 00000000000..9c236fed848 --- /dev/null +++ b/lib/wasi/src/runtime/resolver/inputs.rs @@ -0,0 +1,362 @@ +use std::{ + fmt::{self, Display, Formatter}, + fs::File, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, + str::FromStr, +}; + +use anyhow::{Context, Error}; +use semver::{Version, VersionReq}; +use sha2::{Digest, Sha256}; +use url::Url; +use webc::{ + metadata::{annotations::Wapm as WapmAnnotations, Manifest, UrlOrManifest}, + Container, +}; + +use crate::runtime::resolver::PackageId; + +/// A reference to *some* package somewhere that the user wants to run. +/// +/// # Security Considerations +/// +/// The [`PackageSpecifier::Path`] variant doesn't specify which filesystem a +/// [`Source`][source] will eventually query. Consumers of [`PackageSpecifier`] +/// should be wary of sandbox escapes. +/// +/// [source]: crate::runtime::resolver::Source +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PackageSpecifier { + Registry { + full_name: String, + version: VersionReq, + }, + Url(Url), + /// A `*.webc` file on disk. + Path(PathBuf), +} + +impl PackageSpecifier { + pub fn parse(s: &str) -> Result { + s.parse() + } +} + +impl FromStr for PackageSpecifier { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + if let Ok(url) = Url::parse(s) { + if url.has_host() { + return Ok(PackageSpecifier::Url(url)); + } + } + + // TODO: Replace this with something more rigorous that can also handle + // the locator field + let (full_name, version) = match s.split_once('@') { + Some((n, v)) => (n, v), + None => (s, "*"), + }; + + let invalid_character = full_name + .char_indices() + .find(|(_, c)| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '.'| '-'|'_' | '/')); + if let Some((index, c)) = invalid_character { + anyhow::bail!("Invalid character, {c:?}, at offset {index}"); + } + + let version = if version == "latest" { + // let people write "some/package@latest" + VersionReq::STAR + } else { + version + .parse() + .with_context(|| format!("Invalid version number, \"{version}\""))? + }; + + Ok(PackageSpecifier::Registry { + full_name: full_name.to_string(), + version, + }) + } +} + +impl Display for PackageSpecifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PackageSpecifier::Registry { full_name, version } => write!(f, "{full_name}@{version}"), + PackageSpecifier::Url(url) => Display::fmt(url, f), + PackageSpecifier::Path(path) => write!(f, "{}", path.display()), + } + } +} + +/// A dependency constraint. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Dependency { + pub alias: String, + pub pkg: PackageSpecifier, +} + +impl Dependency { + pub fn package_name(&self) -> Option<&str> { + match &self.pkg { + PackageSpecifier::Registry { full_name, .. } => Some(full_name), + _ => None, + } + } + + pub fn alias(&self) -> &str { + &self.alias + } + + pub fn version(&self) -> Option<&VersionReq> { + match &self.pkg { + PackageSpecifier::Registry { version, .. } => Some(version), + _ => None, + } + } +} + +/// Some metadata a [`Source`][source] can provide about a package without +/// needing to download the entire `*.webc` file. +/// +/// [source]: crate::runtime::resolver::Source +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PackageSummary { + pub pkg: PackageInfo, + pub dist: DistributionInfo, +} + +impl PackageSummary { + pub fn package_id(&self) -> PackageId { + self.pkg.id() + } + + pub fn from_webc_file(path: impl AsRef) -> Result { + let path = path.as_ref().canonicalize()?; + let container = Container::from_disk(&path)?; + let webc_sha256 = WebcHash::for_file(&path)?; + let url = + crate::runtime::resolver::polyfills::url_from_file_path(&path).ok_or_else(|| { + anyhow::anyhow!("Unable to turn \"{}\" into a file:// URL", path.display()) + })?; + + let pkg = PackageInfo::from_manifest(container.manifest())?; + let dist = DistributionInfo { + webc: url, + webc_sha256, + }; + + Ok(PackageSummary { pkg, dist }) + } +} + +/// Information about a package's contents. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PackageInfo { + /// The package's full name (i.e. `wasmer/wapm2pirita`). + pub name: String, + /// The package version. + pub version: Version, + /// Commands this package exposes to the outside world. + pub commands: Vec, + /// The name of a [`Command`] that should be used as this package's + /// entrypoint. + pub entrypoint: Option, + /// Any dependencies this package may have. + pub dependencies: Vec, +} + +impl PackageInfo { + pub fn from_manifest(manifest: &Manifest) -> Result { + let WapmAnnotations { name, version, .. } = manifest + .package_annotation("wapm")? + .context("Unable to find the \"wapm\" annotations")?; + + let dependencies = manifest + .use_map + .iter() + .map(|(alias, value)| { + Ok(Dependency { + alias: alias.clone(), + pkg: url_or_manifest_to_specifier(value)?, + }) + }) + .collect::, Error>>()?; + + let commands = manifest + .commands + .iter() + .map(|(name, _value)| crate::runtime::resolver::Command { + name: name.to_string(), + }) + .collect(); + + Ok(PackageInfo { + name, + version: version.parse()?, + dependencies, + commands, + entrypoint: manifest.entrypoint.clone(), + }) + } + + pub fn id(&self) -> PackageId { + PackageId { + package_name: self.name.clone(), + version: self.version.clone(), + } + } +} + +fn url_or_manifest_to_specifier(value: &UrlOrManifest) -> Result { + match value { + UrlOrManifest::Url(url) => Ok(PackageSpecifier::Url(url.clone())), + UrlOrManifest::Manifest(manifest) => { + if let Ok(Some(WapmAnnotations { name, version, .. })) = + manifest.package_annotation("wapm") + { + let version = version.parse()?; + return Ok(PackageSpecifier::Registry { + full_name: name, + version, + }); + } + + if let Some(origin) = manifest + .origin + .as_deref() + .and_then(|origin| Url::parse(origin).ok()) + { + return Ok(PackageSpecifier::Url(origin)); + } + + Err(Error::msg( + "Unable to determine a package specifier for a vendored dependency", + )) + } + UrlOrManifest::RegistryDependentUrl(specifier) => specifier.parse(), + } +} + +/// Information used when retrieving a package. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DistributionInfo { + /// A URL that can be used to download the `*.webc` file. + pub webc: Url, + /// A SHA-256 checksum for the `*.webc` file. + pub webc_sha256: WebcHash, +} + +/// The SHA-256 hash of a `*.webc` file. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct WebcHash([u8; 32]); + +impl WebcHash { + pub fn from_bytes(bytes: [u8; 32]) -> Self { + WebcHash(bytes) + } + + pub fn for_file(path: impl AsRef) -> Result { + let mut hasher = Sha256::default(); + let mut reader = BufReader::new(File::open(path)?); + + loop { + let buffer = reader.fill_buf()?; + if buffer.is_empty() { + break; + } + hasher.update(buffer); + let bytes_read = buffer.len(); + reader.consume(bytes_read); + } + + let hash = hasher.finalize().into(); + Ok(WebcHash::from_bytes(hash)) + } + + /// Generate a new [`WebcHash`] based on the SHA-256 hash of some bytes. + pub fn sha256(webc: impl AsRef<[u8]>) -> Self { + let webc = webc.as_ref(); + + let mut hasher = Sha256::default(); + hasher.update(webc); + WebcHash::from_bytes(hasher.finalize().into()) + } + + pub fn as_bytes(self) -> [u8; 32] { + self.0 + } +} + +impl From<[u8; 32]> for WebcHash { + fn from(bytes: [u8; 32]) -> Self { + WebcHash::from_bytes(bytes) + } +} + +impl Display for WebcHash { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + for byte in self.0 { + write!(f, "{byte:02X}")?; + } + + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Command { + pub name: String, +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + + #[test] + fn parse_some_package_specifiers() { + let inputs = [ + ( + "first", + PackageSpecifier::Registry { + full_name: "first".to_string(), + version: VersionReq::STAR, + }, + ), + ( + "namespace/package", + PackageSpecifier::Registry { + full_name: "namespace/package".to_string(), + version: VersionReq::STAR, + }, + ), + ( + "namespace/package@1.0.0", + PackageSpecifier::Registry { + full_name: "namespace/package".to_string(), + version: "1.0.0".parse().unwrap(), + }, + ), + ( + "namespace/package@latest", + PackageSpecifier::Registry { + full_name: "namespace/package".to_string(), + version: VersionReq::STAR, + }, + ), + ( + "https://wapm/io/namespace/package@1.0.0", + PackageSpecifier::Url("https://wapm/io/namespace/package@1.0.0".parse().unwrap()), + ), + ]; + + for (src, expected) in inputs { + let parsed = PackageSpecifier::from_str(src).unwrap(); + assert_eq!(parsed, expected); + } + } +} diff --git a/lib/wasi/src/runtime/resolver/mod.rs b/lib/wasi/src/runtime/resolver/mod.rs index ad774a7e2f7..4cb65aa0b08 100644 --- a/lib/wasi/src/runtime/resolver/mod.rs +++ b/lib/wasi/src/runtime/resolver/mod.rs @@ -1,12 +1,27 @@ -mod cache; -mod registry; -mod types; +mod filesystem_source; +mod in_memory_source; +mod inputs; +mod multi_source_registry; +mod outputs; +pub(crate) mod polyfills; +mod resolve; +mod source; +mod wapm_source; +mod web_source; pub use self::{ - cache::InMemoryCache, - registry::RegistryResolver, - types::{ - FileSystemMapping, Locator, PackageResolver, ResolvedCommand, ResolvedPackage, - ResolverError, WebcIdentifier, + filesystem_source::FileSystemSource, + in_memory_source::InMemorySource, + inputs::{ + Command, Dependency, DistributionInfo, PackageInfo, PackageSpecifier, PackageSummary, + WebcHash, }, + multi_source_registry::MultiSource, + outputs::{ + DependencyGraph, FileSystemMapping, ItemLocation, PackageId, Resolution, ResolvedPackage, + }, + resolve::resolve, + source::Source, + wapm_source::WapmSource, + web_source::WebSource, }; diff --git a/lib/wasi/src/runtime/resolver/multi_source_registry.rs b/lib/wasi/src/runtime/resolver/multi_source_registry.rs new file mode 100644 index 00000000000..5d71315a9c1 --- /dev/null +++ b/lib/wasi/src/runtime/resolver/multi_source_registry.rs @@ -0,0 +1,50 @@ +use std::sync::Arc; + +use anyhow::Error; + +use crate::runtime::resolver::{PackageSpecifier, PackageSummary, Source}; + +/// A [`Source`] that works by querying multiple [`Source`]s in succession. +/// +/// The first [`Source`] to return one or more [`Summaries`][PackageSummary] +/// will be treated as the canonical source for that [`Dependency`][dep] and no +/// further [`Source`]s will be queried. +/// +/// [dep]: crate::runtime::resolver::Dependency +#[derive(Debug, Clone)] +pub struct MultiSource { + sources: Vec>, +} + +impl MultiSource { + pub const fn new() -> Self { + MultiSource { + sources: Vec::new(), + } + } + + pub fn add_source(&mut self, source: impl Source + Send + Sync + 'static) -> &mut Self { + self.add_shared_source(Arc::new(source)); + self + } + + pub fn add_shared_source(&mut self, source: Arc) -> &mut Self { + self.sources.push(source); + self + } +} + +#[async_trait::async_trait] +impl Source for MultiSource { + #[tracing::instrument(level = "debug", skip_all, fields(%package))] + async fn query(&self, package: &PackageSpecifier) -> Result, Error> { + for source in &self.sources { + let result = source.query(package).await?; + if !result.is_empty() { + return Ok(result); + } + } + + anyhow::bail!("Unable to find any packages that satisfy the query") + } +} diff --git a/lib/wasi/src/runtime/resolver/outputs.rs b/lib/wasi/src/runtime/resolver/outputs.rs new file mode 100644 index 00000000000..e2289531e7a --- /dev/null +++ b/lib/wasi/src/runtime/resolver/outputs.rs @@ -0,0 +1,79 @@ +use std::{ + collections::{BTreeMap, HashMap}, + fmt::{self, Display, Formatter}, + path::PathBuf, + unreachable, +}; + +use semver::Version; + +use crate::runtime::resolver::{DistributionInfo, PackageInfo}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Resolution { + pub package: ResolvedPackage, + pub graph: DependencyGraph, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ItemLocation { + /// The item's original name. + pub name: String, + /// The package this item comes from. + pub package: PackageId, +} + +/// An identifier for a package within a dependency graph. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PackageId { + pub package_name: String, + pub version: Version, +} + +impl Display for PackageId { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let PackageId { + package_name, + version, + } = self; + write!(f, "{package_name}@{version}") + } +} + +/// A dependency graph. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DependencyGraph { + pub root: PackageId, + pub dependencies: HashMap>, + pub package_info: HashMap, + pub distribution: HashMap, +} + +impl DependencyGraph { + pub fn root_info(&self) -> &PackageInfo { + match self.package_info.get(&self.root) { + Some(info) => info, + None => unreachable!( + "The dependency graph should always have package info for the root package, {}", + self.root + ), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileSystemMapping { + pub mount_path: PathBuf, + pub volume_name: String, + pub package: PackageId, +} + +/// A package that has been resolved, but is not yet runnable. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ResolvedPackage { + pub root_package: PackageId, + pub commands: BTreeMap, + pub entrypoint: Option, + /// A mapping from paths to the volumes that should be mounted there. + pub filesystem: Vec, +} diff --git a/lib/wasi/src/runtime/resolver/polyfills.rs b/lib/wasi/src/runtime/resolver/polyfills.rs new file mode 100644 index 00000000000..7f7614ab6a1 --- /dev/null +++ b/lib/wasi/src/runtime/resolver/polyfills.rs @@ -0,0 +1,50 @@ +use std::path::Path; + +use url::Url; + +/// Polyfill for [`Url::from_file_path()`] that works on `wasm32-unknown-unknown`. +pub(crate) fn url_from_file_path(path: impl AsRef) -> Option { + let path = path.as_ref(); + + if !path.is_absolute() { + return None; + } + + let mut buffer = String::new(); + + for component in path { + if !buffer.ends_with('/') { + buffer.push('/'); + } + + buffer.push_str(component.to_str()?); + } + + buffer.insert_str(0, "file://"); + + buffer.parse().ok() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(unix)] + fn behaviour_is_identical() { + let inputs = [ + "/", + "/path", + "/path/to/file.txt", + "./path/to/file.txt", + ".", + "", + ]; + + for path in inputs { + let got = url_from_file_path(path); + let expected = Url::from_file_path(path).ok(); + assert_eq!(got, expected, "Mismatch for \"{path}\""); + } + } +} diff --git a/lib/wasi/src/runtime/resolver/registry.rs b/lib/wasi/src/runtime/resolver/registry.rs deleted file mode 100644 index f9761792b5e..00000000000 --- a/lib/wasi/src/runtime/resolver/registry.rs +++ /dev/null @@ -1,135 +0,0 @@ -use std::path::PathBuf; - -use anyhow::Context; -use url::Url; - -use crate::{ - bin_factory::BinaryPackage, - http::HttpClient, - runtime::resolver::{types::ResolverError, types::WebcIdentifier, PackageResolver}, -}; - -/// A [`PackageResolver`] that will resolve packages by fetching them from the -/// WAPM registry. -/// -/// Any downloaded assets will be cached on disk. -#[derive(Debug, Clone)] -pub struct RegistryResolver { - cache_dir: PathBuf, - registry_endpoint: Url, - /// A list of [`BinaryPackage`]s that have already been loaded into memory - /// by the user. - // TODO: Remove this "preload" hack and update the snapshot tests to - // use a local registry instead of "--include-webc" - preloaded: Vec, -} - -impl RegistryResolver { - pub const WAPM_DEV_ENDPOINT: &str = "https://registry.wapm.dev/graphql"; - pub const WAPM_PROD_ENDPOINT: &str = "https://registry.wapm.io/graphql"; - - pub fn new(cache_dir: impl Into, registry_endpoint: Url) -> Self { - RegistryResolver { - cache_dir: cache_dir.into(), - registry_endpoint, - preloaded: Vec::new(), - } - } - - /// Create a [`RegistryResolver`] using the current Wasmer toolchain - /// installation. - pub fn from_env() -> Result { - // FIXME: respect active registry setting in wasmer.toml... We currently - // do things the hard way because pulling in the wasmer-registry crate - // would add loads of extra dependencies and make it harder to build - // wasmer-wasix when "js" is enabled. - let wasmer_home = std::env::var_os("WASMER_HOME") - .map(PathBuf::from) - .or_else(|| { - #[allow(deprecated)] - std::env::home_dir().map(|home| home.join(".wasmer")) - }) - .context("Unable to determine Wasmer's home directory")?; - - let endpoint = RegistryResolver::WAPM_PROD_ENDPOINT.parse()?; - - Ok(RegistryResolver::new(wasmer_home, endpoint)) - } - - /// Add a preloaded [`BinaryPackage`] to the list of preloaded packages. - /// - /// The [`RegistryResolver`] adds a mechanism that allows you to "preload" a - /// [`BinaryPackage`] that already exists in memory. The - /// [`PackageResolver::resolve_package()`] method will first check this list - /// for a compatible package before checking WAPM. - /// - /// **This mechanism should only be used for testing**. Expect it to be - /// removed in future versions in favour of a local registry. - pub fn add_preload(&mut self, pkg: BinaryPackage) -> &mut Self { - self.preloaded.push(pkg); - self - } - - fn lookup_preloaded(&self, pkg: &WebcIdentifier) -> Option<&BinaryPackage> { - self.preloaded.iter().find(|candidate| { - candidate.package_name == pkg.full_name && pkg.version.matches(&candidate.version) - }) - } -} - -#[async_trait::async_trait] -impl PackageResolver for RegistryResolver { - async fn resolve_package( - &self, - pkg: &WebcIdentifier, - client: &(dyn HttpClient + Send + Sync), - ) -> Result { - if let Some(preloaded) = self.lookup_preloaded(pkg) { - return Ok(preloaded.clone()); - } - - crate::wapm::fetch_webc( - &self.cache_dir, - &pkg.full_name, - client, - &self.registry_endpoint, - ) - .await - .map_err(|e| ResolverError::Other(e.into())) - } -} - -#[cfg(test)] -mod tests { - use tempfile::TempDir; - - use super::*; - - #[tokio::test] - #[cfg_attr(not(feature = "host-reqwest"), ignore = "Requires a HTTP client")] - async fn resolved_webc_files_are_cached_locally() { - let temp = TempDir::new().unwrap(); - let resolver = RegistryResolver::new( - temp.path(), - RegistryResolver::WAPM_PROD_ENDPOINT.parse().unwrap(), - ); - let client = crate::http::default_http_client().expect("This test requires a HTTP client"); - let ident = WebcIdentifier::parse("wasmer/sha2@0.1.0").unwrap(); - - let pkg = resolver.resolve_package(&ident, &client).await.unwrap(); - - assert_eq!(pkg.package_name, "wasmer/sha2"); - assert_eq!(pkg.version.to_string(), "0.1.0"); - let filenames: Vec<_> = temp - .path() - .read_dir() - .unwrap() - .flatten() - .map(|entry| entry.file_name().to_str().unwrap().to_string()) - .collect(); - assert_eq!( - filenames, - ["wasmer_sha2_sha2-0.1.0-2ada887a-9bb8-11ed-82ff-b2315a79a72a.webc"] - ); - } -} diff --git a/lib/wasi/src/runtime/resolver/resolve.rs b/lib/wasi/src/runtime/resolver/resolve.rs new file mode 100644 index 00000000000..0b7650efab2 --- /dev/null +++ b/lib/wasi/src/runtime/resolver/resolve.rs @@ -0,0 +1,900 @@ +use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; + +use semver::Version; + +use crate::runtime::resolver::{ + DependencyGraph, ItemLocation, PackageId, PackageInfo, PackageSummary, Resolution, + ResolvedPackage, Source, +}; + +use super::FileSystemMapping; + +/// Given the [`PackageInfo`] for a root package, resolve its dependency graph +/// and figure out how it could be executed. +#[tracing::instrument(level = "debug", skip_all)] +pub async fn resolve( + root_id: &PackageId, + root: &PackageInfo, + source: &dyn Source, +) -> Result { + let graph = resolve_dependency_graph(root_id, root, source).await?; + let package = resolve_package(&graph)?; + + Ok(Resolution { graph, package }) +} + +#[derive(Debug, thiserror::Error)] +pub enum ResolveError { + #[error(transparent)] + Registry(anyhow::Error), + #[error("Dependency cycle detected: {}", print_cycle(_0))] + Cycle(Vec), + #[error( + "Multiple versions of {package_name} were found {}", + versions.iter().map(|v| v.to_string()).collect::>().join(", "), + )] + DuplicateVersions { + package_name: String, + versions: Vec, + }, +} + +impl ResolveError { + pub fn as_cycle(&self) -> Option<&[PackageId]> { + match self { + ResolveError::Cycle(cycle) => Some(cycle), + _ => None, + } + } +} + +fn print_cycle(packages: &[PackageId]) -> String { + packages + .iter() + .map(|pkg_id| { + let PackageId { + package_name, + version, + .. + } = pkg_id; + format!("{package_name}@{version}") + }) + .collect::>() + .join(" → ") +} + +async fn resolve_dependency_graph( + root_id: &PackageId, + root: &PackageInfo, + source: &dyn Source, +) -> Result { + let mut dependencies = HashMap::new(); + let mut package_info = HashMap::new(); + let mut distribution = HashMap::new(); + + package_info.insert(root_id.clone(), root.clone()); + + let mut to_visit = VecDeque::new(); + + to_visit.push_back((root_id.clone(), root.clone())); + + while let Some((id, info)) = to_visit.pop_front() { + let mut deps = HashMap::new(); + + for dep in &info.dependencies { + let dep_summary = source + .latest(&dep.pkg) + .await + .map_err(ResolveError::Registry)?; + deps.insert(dep.alias().to_string(), dep_summary.package_id()); + let dep_id = dep_summary.package_id(); + + if dependencies.contains_key(&dep_id) { + // We don't need to visit this dependency again + continue; + } + + let PackageSummary { pkg, dist } = dep_summary; + + to_visit.push_back((dep_id.clone(), pkg.clone())); + package_info.insert(dep_id.clone(), pkg); + distribution.insert(dep_id, dist); + } + + dependencies.insert(id, deps); + } + + let graph = DependencyGraph { + root: root_id.clone(), + dependencies, + package_info, + distribution, + }; + + check_for_cycles(&graph.dependencies, &graph.root)?; + check_for_duplicate_versions(graph.dependencies.keys())?; + log_dependencies(&graph); + + Ok(graph) +} + +#[tracing::instrument(level = "debug", name = "dependencies", skip_all)] +fn log_dependencies(graph: &DependencyGraph) { + let DependencyGraph { + root, dependencies, .. + } = graph; + + tracing::debug!( + %root, + dependency_count=dependencies.len(), + "Resolved dependencies", + ); + + if tracing::enabled!(tracing::Level::TRACE) { + let mut to_print = VecDeque::new(); + let mut visited = HashSet::new(); + to_print.push_back(root); + while let Some(next) = to_print.pop_front() { + visited.insert(next); + + let deps = &dependencies[next]; + let pretty: BTreeMap<_, _> = deps + .iter() + .map(|(name, pkg_id)| (name, pkg_id.to_string())) + .collect(); + + tracing::trace!( + package=%next, + dependencies=?pretty, + ); + + to_print.extend(deps.values().filter(|pkg| !visited.contains(pkg))); + } + } +} + +/// As a workaround for the lack of "proper" dependency merging, we'll make sure +/// only one copy of each package is in the dependency tree. If the same package +/// is included in the tree multiple times, they all need to use the exact same +/// version otherwise it's an error. +fn check_for_duplicate_versions<'a, I>(package_ids: I) -> Result<(), ResolveError> +where + I: Iterator, +{ + let mut package_versions: HashMap<&str, HashSet<&Version>> = HashMap::new(); + + for PackageId { + package_name, + version, + } in package_ids + { + package_versions + .entry(package_name) + .or_default() + .insert(version); + } + + for (package_name, versions) in package_versions { + if versions.len() > 1 { + let mut versions: Vec<_> = versions.into_iter().cloned().collect(); + versions.sort(); + return Err(ResolveError::DuplicateVersions { + package_name: package_name.to_string(), + versions, + }); + } + } + + Ok(()) +} + +/// Check for dependency cycles by doing a Depth First Search of the graph, +/// starting at the root. +fn check_for_cycles( + dependencies: &HashMap>, + root: &PackageId, +) -> Result<(), ResolveError> { + fn search<'a>( + dependencies: &'a HashMap>, + id: &'a PackageId, + visited: &mut HashSet<&'a PackageId>, + stack: &mut Vec<&'a PackageId>, + ) -> Result<(), ResolveError> { + if let Some(index) = stack.iter().position(|item| *item == id) { + // we've detected a cycle! + let mut cycle: Vec<_> = stack.drain(index..).cloned().collect(); + cycle.push(id.clone()); + return Err(ResolveError::Cycle(cycle)); + } + + if visited.contains(&id) { + // We already know this dependency is fine + return Ok(()); + } + + stack.push(id); + for dep in dependencies[id].values() { + search(dependencies, dep, visited, stack)?; + } + stack.pop(); + + Ok(()) + } + + let mut visited = HashSet::new(); + let mut stack = Vec::new(); + + search(dependencies, root, &mut visited, &mut stack) +} + +/// Given a [`DependencyGraph`], figure out how the resulting "package" would +/// look when loaded at runtime. +fn resolve_package(dependency_graph: &DependencyGraph) -> Result { + // FIXME: This code is all super naive and will break the moment there + // are any conflicts or duplicate names. + tracing::trace!("Resolving the package"); + + let mut commands = BTreeMap::new(); + + let filesystem = resolve_filesystem_mapping(dependency_graph)?; + + let mut to_check = VecDeque::new(); + let mut visited = HashSet::new(); + + to_check.push_back(&dependency_graph.root); + + let mut entrypoint = dependency_graph.root_info().entrypoint.clone(); + + while let Some(next) = to_check.pop_front() { + visited.insert(next); + let pkg = &dependency_graph.package_info[next]; + + // set the entrypoint, if necessary + if entrypoint.is_none() { + if let Some(entry) = &pkg.entrypoint { + tracing::trace!( + entrypoint = entry.as_str(), + parent.name=next.package_name.as_str(), + parent.version=%next.version, + "Inheriting the entrypoint", + ); + + entrypoint = Some(entry.clone()); + } + } + + // Blindly copy across all commands + for cmd in &pkg.commands { + let resolved = ItemLocation { + name: cmd.name.clone(), + package: next.clone(), + }; + tracing::trace!( + command.name=cmd.name.as_str(), + pkg.name=next.package_name.as_str(), + pkg.version=%next.version, + "Discovered command", + ); + commands.insert(cmd.name.clone(), resolved); + } + + let remaining_dependencies = dependency_graph.dependencies[next] + .values() + .filter(|id| !visited.contains(id)); + to_check.extend(remaining_dependencies); + } + + Ok(ResolvedPackage { + root_package: dependency_graph.root.clone(), + commands, + entrypoint, + filesystem, + }) +} + +fn resolve_filesystem_mapping( + _dependency_graph: &DependencyGraph, +) -> Result, ResolveError> { + // TODO: Add filesystem mappings to summary and figure out the final mapping + // for this dependency graph. + // See for more. + Ok(Vec::new()) +} + +#[cfg(test)] +mod tests { + use crate::runtime::resolver::{ + inputs::{DistributionInfo, PackageInfo}, + Dependency, InMemorySource, MultiSource, PackageSpecifier, + }; + + use super::*; + + struct RegistryBuilder(InMemorySource); + + impl RegistryBuilder { + fn new() -> Self { + RegistryBuilder(InMemorySource::new()) + } + + fn register(&mut self, name: &str, version: &str) -> AddPackageVersion<'_> { + let pkg = PackageInfo { + name: name.to_string(), + version: version.parse().unwrap(), + dependencies: Vec::new(), + commands: Vec::new(), + entrypoint: None, + }; + let dist = DistributionInfo { + webc: format!("http://localhost/{name}@{version}") + .parse() + .unwrap(), + webc_sha256: [0; 32].into(), + }; + let summary = PackageSummary { pkg, dist }; + + AddPackageVersion { + builder: &mut self.0, + summary, + } + } + + fn finish(&self) -> MultiSource { + let mut registry = MultiSource::new(); + registry.add_source(self.0.clone()); + registry + } + + fn get(&self, package: &str, version: &str) -> &PackageSummary { + let version = version.parse().unwrap(); + self.0.get(package, &version).unwrap() + } + + fn start_dependency_graph(&self) -> DependencyGraphBuilder<'_> { + DependencyGraphBuilder { + dependencies: HashMap::new(), + source: &self.0, + } + } + } + + #[derive(Debug)] + struct AddPackageVersion<'builder> { + builder: &'builder mut InMemorySource, + summary: PackageSummary, + } + + impl<'builder> AddPackageVersion<'builder> { + fn with_dependency(&mut self, name: &str, version_constraint: &str) -> &mut Self { + self.with_aliased_dependency(name, name, version_constraint) + } + + fn with_aliased_dependency( + &mut self, + alias: &str, + name: &str, + version_constraint: &str, + ) -> &mut Self { + let pkg = PackageSpecifier::Registry { + full_name: name.to_string(), + version: version_constraint.parse().unwrap(), + }; + + self.summary.pkg.dependencies.push(Dependency { + alias: alias.to_string(), + pkg, + }); + + self + } + + fn with_command(&mut self, name: &str) -> &mut Self { + self.summary + .pkg + .commands + .push(crate::runtime::resolver::Command { + name: name.to_string(), + }); + self + } + + fn with_entrypoint(&mut self, name: &str) -> &mut Self { + self.summary.pkg.entrypoint = Some(name.to_string()); + self + } + } + + impl<'builder> Drop for AddPackageVersion<'builder> { + fn drop(&mut self) { + let summary = self.summary.clone(); + self.builder.add(summary); + } + } + + #[derive(Debug)] + struct DependencyGraphBuilder<'source> { + dependencies: HashMap>, + source: &'source InMemorySource, + } + + impl<'source> DependencyGraphBuilder<'source> { + fn insert( + &mut self, + package: &str, + version: &str, + ) -> DependencyGraphEntryBuilder<'source, '_> { + let version = version.parse().unwrap(); + let pkg_id = self.source.get(package, &version).unwrap().package_id(); + DependencyGraphEntryBuilder { + builder: self, + pkg_id, + dependencies: HashMap::new(), + } + } + + fn finish(self) -> HashMap> { + self.dependencies + } + } + + #[derive(Debug)] + struct DependencyGraphEntryBuilder<'source, 'builder> { + builder: &'builder mut DependencyGraphBuilder<'source>, + pkg_id: PackageId, + dependencies: HashMap, + } + + impl<'source, 'builder> DependencyGraphEntryBuilder<'source, 'builder> { + fn with_dependency(&mut self, name: &str, version: &str) -> &mut Self { + self.with_aliased_dependency(name, name, version) + } + + fn with_aliased_dependency(&mut self, alias: &str, name: &str, version: &str) -> &mut Self { + let version = version.parse().unwrap(); + let dep_id = self + .builder + .source + .get(name, &version) + .unwrap() + .package_id(); + self.dependencies.insert(alias.to_string(), dep_id); + self + } + } + + impl<'source, 'builder> Drop for DependencyGraphEntryBuilder<'source, 'builder> { + fn drop(&mut self) { + self.builder + .dependencies + .insert(self.pkg_id.clone(), self.dependencies.clone()); + } + } + + macro_rules! map { + ( + $( + $key:expr => $value:expr + ),* + $(,)? + ) => { + vec![ + $( ($key.into(), $value.into()) ),* + ] + .into_iter() + .collect() + } + } + + #[tokio::test] + async fn no_deps_and_no_commands() { + let mut builder = RegistryBuilder::new(); + builder.register("root", "1.0.0"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph.insert("root", "1.0.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: BTreeMap::new(), + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn no_deps_one_command() { + let mut builder = RegistryBuilder::new(); + builder.register("root", "1.0.0").with_command("asdf"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph.insert("root", "1.0.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: map! { + "asdf" => ItemLocation { + name: "asdf".to_string(), + package: root.package_id(), + }, + }, + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn single_dependency() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("dep", "=1.0.0"); + builder.register("dep", "1.0.0"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph + .insert("root", "1.0.0") + .with_dependency("dep", "1.0.0"); + dependency_graph.insert("dep", "1.0.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: BTreeMap::new(), + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn linear_dependency_chain() { + let mut builder = RegistryBuilder::new(); + builder + .register("first", "1.0.0") + .with_dependency("second", "=1.0.0"); + builder + .register("second", "1.0.0") + .with_dependency("third", "=1.0.0"); + builder.register("third", "1.0.0"); + let registry = builder.finish(); + let root = builder.get("first", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph + .insert("first", "1.0.0") + .with_dependency("second", "1.0.0"); + dependency_graph + .insert("second", "1.0.0") + .with_dependency("third", "1.0.0"); + dependency_graph.insert("third", "1.0.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: BTreeMap::new(), + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn pick_the_latest_dependency_when_multiple_are_possible() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("dep", "^1.0.0"); + builder.register("dep", "1.0.0"); + builder.register("dep", "1.0.1"); + builder.register("dep", "1.0.2"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph + .insert("root", "1.0.0") + .with_dependency("dep", "1.0.2"); + dependency_graph.insert("dep", "1.0.2"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: BTreeMap::new(), + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn version_merging_isnt_implemented_yet() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("first", "=1.0.0") + .with_dependency("second", "=1.0.0"); + builder + .register("first", "1.0.0") + .with_dependency("common", "^1.0.0"); + builder + .register("second", "1.0.0") + .with_dependency("common", ">1.1,<1.3"); + builder.register("common", "1.0.0"); + builder.register("common", "1.1.0"); + builder.register("common", "1.2.0"); + builder.register("common", "1.5.0"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let result = resolve(&root.package_id(), &root.pkg, ®istry).await; + + match result { + Err(ResolveError::DuplicateVersions { + package_name, + versions, + }) => { + assert_eq!(package_name, "common"); + assert_eq!( + versions, + [ + Version::parse("1.2.0").unwrap(), + Version::parse("1.5.0").unwrap(), + ] + ); + } + _ => unreachable!("Expected a duplicate versions error, found {:?}", result), + } + } + + #[tokio::test] + #[ignore = "Version merging isn't implemented"] + async fn merge_compatible_versions() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("first", "=1.0.0") + .with_dependency("second", "=1.0.0"); + builder + .register("first", "1.0.0") + .with_dependency("common", "^1.0.0"); + builder + .register("second", "1.0.0") + .with_dependency("common", ">1.1,<1.3"); + builder.register("common", "1.0.0"); + builder.register("common", "1.1.0"); + builder.register("common", "1.2.0"); + builder.register("common", "1.5.0"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph + .insert("root", "1.0.0") + .with_dependency("first", "1.0.0") + .with_dependency("second", "1.0.0"); + dependency_graph + .insert("first", "1.0.0") + .with_dependency("common", "1.2.0"); + dependency_graph + .insert("second", "1.0.0") + .with_dependency("common", "1.2.0"); + dependency_graph.insert("common", "1.2.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: BTreeMap::new(), + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn commands_from_dependencies_end_up_in_the_package() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("first", "=1.0.0") + .with_dependency("second", "=1.0.0"); + builder + .register("first", "1.0.0") + .with_command("first-command"); + builder + .register("second", "1.0.0") + .with_command("second-command"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph + .insert("root", "1.0.0") + .with_dependency("first", "1.0.0") + .with_dependency("second", "1.0.0"); + dependency_graph.insert("first", "1.0.0"); + dependency_graph.insert("second", "1.0.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: map! { + "first-command" => ItemLocation { + name: "first-command".to_string(), + package: builder.get("first", "1.0.0").package_id(), + }, + "second-command" => ItemLocation { + name: "second-command".to_string(), + package: builder.get("second", "1.0.0").package_id(), + }, + }, + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + #[ignore = "TODO: Re-order the way commands are resolved"] + async fn commands_in_root_shadow_their_dependencies() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("dep", "=1.0.0") + .with_command("command"); + builder.register("dep", "1.0.0").with_command("command"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + let mut dependency_graph = builder.start_dependency_graph(); + dependency_graph + .insert("root", "1.0.0") + .with_dependency("dep", "1.0.0"); + dependency_graph.insert("dep", "1.0.0"); + assert_eq!(resolution.graph.dependencies, dependency_graph.finish()); + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: map! { + "command" => ItemLocation { + name: "command".to_string(), + package: builder.get("root", "1.0.0").package_id(), + }, + }, + entrypoint: None, + filesystem: Vec::new(), + } + ); + } + + #[tokio::test] + async fn cyclic_dependencies() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("dep", "=1.0.0"); + builder + .register("dep", "1.0.0") + .with_dependency("root", "=1.0.0"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let err = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap_err(); + + let cycle = err.as_cycle().unwrap().to_vec(); + assert_eq!( + cycle, + [ + builder.get("root", "1.0.0").package_id(), + builder.get("dep", "1.0.0").package_id(), + builder.get("root", "1.0.0").package_id(), + ] + ); + } + + #[tokio::test] + async fn entrypoint_is_inherited() { + let mut builder = RegistryBuilder::new(); + builder + .register("root", "1.0.0") + .with_dependency("dep", "=1.0.0"); + builder + .register("dep", "1.0.0") + .with_command("entry") + .with_entrypoint("entry"); + let registry = builder.finish(); + let root = builder.get("root", "1.0.0"); + + let resolution = resolve(&root.package_id(), &root.pkg, ®istry) + .await + .unwrap(); + + assert_eq!( + resolution.package, + ResolvedPackage { + root_package: root.package_id(), + commands: map! { + "entry" => ItemLocation { + name: "entry".to_string(), + package: builder.get("dep", "1.0.0").package_id(), + }, + }, + entrypoint: Some("entry".to_string()), + filesystem: Vec::new(), + } + ); + } + + #[test] + fn cyclic_error_message() { + let cycle = [ + PackageId { + package_name: "root".to_string(), + version: "1.0.0".parse().unwrap(), + }, + PackageId { + package_name: "dep".to_string(), + version: "1.0.0".parse().unwrap(), + }, + PackageId { + package_name: "root".to_string(), + version: "1.0.0".parse().unwrap(), + }, + ]; + + let message = print_cycle(&cycle); + + assert_eq!(message, "root@1.0.0 → dep@1.0.0 → root@1.0.0"); + } +} diff --git a/lib/wasi/src/runtime/resolver/source.rs b/lib/wasi/src/runtime/resolver/source.rs new file mode 100644 index 00000000000..bcaf8b1c03f --- /dev/null +++ b/lib/wasi/src/runtime/resolver/source.rs @@ -0,0 +1,43 @@ +use std::fmt::Debug; + +use anyhow::Error; + +use crate::runtime::resolver::{PackageSpecifier, PackageSummary}; + +/// Something that packages can be downloaded from. +#[async_trait::async_trait] +pub trait Source: Sync + Debug { + /// Ask this source which packages would satisfy a particular + /// [`Dependency`][dep] constraint. + /// + /// # Assumptions + /// + /// It is not an error if there are no package versions that may satisfy + /// the dependency, even if the [`Source`] doesn't know of a package + /// with that name. + /// + /// [dep]: crate::runtime::resolver::Dependency + /// [reg]: crate::runtime::resolver::Registry + async fn query(&self, package: &PackageSpecifier) -> Result, Error>; + + /// Run [`Source::query()`] and get the [`PackageSummary`] for the latest + /// version. + async fn latest(&self, pkg: &PackageSpecifier) -> Result { + let candidates = self.query(pkg).await?; + candidates + .into_iter() + .max_by(|left, right| left.pkg.version.cmp(&right.pkg.version)) + .ok_or_else(|| Error::msg("Couldn't find a package version satisfying that constraint")) + } +} + +#[async_trait::async_trait] +impl Source for D +where + D: std::ops::Deref + Debug + Send + Sync, + S: Source + ?Sized + Send + Sync + 'static, +{ + async fn query(&self, package: &PackageSpecifier) -> Result, Error> { + (**self).query(package).await + } +} diff --git a/lib/wasi/src/runtime/resolver/types.rs b/lib/wasi/src/runtime/resolver/types.rs deleted file mode 100644 index 19ef9380d52..00000000000 --- a/lib/wasi/src/runtime/resolver/types.rs +++ /dev/null @@ -1,200 +0,0 @@ -use std::{ - collections::BTreeMap, - fmt::{Debug, Display}, - ops::Deref, - path::PathBuf, - str::FromStr, -}; - -use anyhow::Context; -use semver::VersionReq; - -use crate::{bin_factory::BinaryPackage, http::HttpClient, runtime::resolver::InMemoryCache}; - -#[async_trait::async_trait] -pub trait PackageResolver: Debug { - /// Resolve a package, loading all dependencies. - async fn resolve_package( - &self, - pkg: &WebcIdentifier, - client: &(dyn HttpClient + Send + Sync), - ) -> Result; - - /// Wrap the [`PackageResolver`] in basic in-memory cache. - fn with_cache(self) -> InMemoryCache - where - Self: Sized, - { - InMemoryCache::new(self) - } -} - -#[async_trait::async_trait] -impl PackageResolver for D -where - D: Deref + Debug + Send + Sync, - R: PackageResolver + Send + Sync + ?Sized, -{ - /// Resolve a package, loading all dependencies. - async fn resolve_package( - &self, - pkg: &WebcIdentifier, - client: &(dyn HttpClient + Send + Sync), - ) -> Result { - (**self).resolve_package(pkg, client).await - } -} - -#[derive(Debug, PartialEq, Eq, Hash, Clone)] -pub struct WebcIdentifier { - /// The package's full name (i.e. `wasmer/wapm2pirita`). - pub full_name: String, - pub locator: Locator, - /// A semver-compliant version constraint. - pub version: VersionReq, -} - -impl WebcIdentifier { - pub fn parse(ident: &str) -> Result { - ident.parse() - } -} - -impl FromStr for WebcIdentifier { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - // TODO: Replace this with something more rigorous that can also handle - // the locator field - let (full_name, version) = match s.split_once('@') { - Some((n, v)) => (n, v), - None => (s, "*"), - }; - - let invalid_character = full_name - .char_indices() - .find(|(_, c)| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '.'| '-'|'_' | '/')); - if let Some((index, c)) = invalid_character { - anyhow::bail!("Invalid character, {c:?}, at offset {index}"); - } - - let version = version - .parse() - .with_context(|| format!("Invalid version number, \"{version}\""))?; - - Ok(WebcIdentifier { - full_name: full_name.to_string(), - locator: Locator::Registry, - version, - }) - } -} - -impl Display for WebcIdentifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let WebcIdentifier { - full_name, - locator, - version, - } = self; - - write!(f, "{full_name}@{version}")?; - - match locator { - Locator::Registry => {} - Locator::Local(path) => write!(f, " ({})", path.display())?, - Locator::Url(url) => write!(f, " ({url})")?, - } - - Ok(()) - } -} - -#[derive(Debug, PartialEq, Eq, Hash, Clone)] -pub enum Locator { - /// The current registry. - Registry, - /// A package on the current machine. - Local(PathBuf), - /// An exact URL. - Url(url::Url), -} - -#[derive(Debug, thiserror::Error)] -pub enum ResolverError { - #[error("Unknown package, {_0}")] - UnknownPackage(WebcIdentifier), - #[error(transparent)] - Other(Box), -} - -#[derive(Debug, Clone)] -pub struct ResolvedPackage { - pub commands: BTreeMap, - pub entrypoint: Option, - /// A mapping from paths to the volumes that should be mounted there. - pub filesystem: Vec, -} - -impl From for BinaryPackage { - fn from(_: ResolvedPackage) -> Self { - todo!() - } -} - -impl From for ResolvedPackage { - fn from(_: BinaryPackage) -> Self { - todo!() - } -} - -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct ResolvedCommand { - pub metadata: webc::metadata::Command, -} - -#[derive(Debug, Clone)] -pub struct FileSystemMapping { - pub mount_path: PathBuf, - pub volume: webc::compat::Volume, -} - -#[cfg(test)] -pub(crate) mod tests { - use super::*; - - #[test] - fn parse_some_webc_identifiers() { - let inputs = [ - ( - "first", - WebcIdentifier { - full_name: "first".to_string(), - locator: Locator::Registry, - version: VersionReq::STAR, - }, - ), - ( - "namespace/package", - WebcIdentifier { - full_name: "namespace/package".to_string(), - locator: Locator::Registry, - version: VersionReq::STAR, - }, - ), - ( - "namespace/package@1.0.0", - WebcIdentifier { - full_name: "namespace/package".to_string(), - locator: Locator::Registry, - version: "1.0.0".parse().unwrap(), - }, - ), - ]; - - for (src, expected) in inputs { - let parsed = WebcIdentifier::from_str(src).unwrap(); - assert_eq!(parsed, expected); - } - } -} diff --git a/lib/wasi/src/runtime/resolver/wapm_source.rs b/lib/wasi/src/runtime/resolver/wapm_source.rs new file mode 100644 index 00000000000..d6e80c4345f --- /dev/null +++ b/lib/wasi/src/runtime/resolver/wapm_source.rs @@ -0,0 +1,296 @@ +use std::sync::Arc; + +use anyhow::{Context, Error}; +use semver::Version; +use url::Url; +use webc::metadata::Manifest; + +use crate::{ + http::{HttpClient, HttpRequest, HttpResponse}, + runtime::resolver::{ + DistributionInfo, PackageInfo, PackageSpecifier, PackageSummary, Source, WebcHash, + }, +}; + +/// A [`Source`] which will resolve dependencies by pinging a WAPM-like GraphQL +/// endpoint. +#[derive(Debug, Clone)] +pub struct WapmSource { + registry_endpoint: Url, + client: Arc, +} + +impl WapmSource { + pub const WAPM_DEV_ENDPOINT: &str = "https://registry.wapm.dev/graphql"; + pub const WAPM_PROD_ENDPOINT: &str = "https://registry.wapm.io/graphql"; + + pub fn new(registry_endpoint: Url, client: Arc) -> Self { + WapmSource { + registry_endpoint, + client, + } + } +} + +#[async_trait::async_trait] +impl Source for WapmSource { + #[tracing::instrument(level = "debug", skip_all, fields(%package))] + async fn query(&self, package: &PackageSpecifier) -> Result, Error> { + let (full_name, version_constraint) = match package { + PackageSpecifier::Registry { full_name, version } => (full_name, version), + _ => return Ok(Vec::new()), + }; + + #[derive(serde::Serialize)] + struct Body { + query: String, + } + + let body = Body { + query: WAPM_WEBC_QUERY_ALL.replace("$NAME", full_name), + }; + let body = serde_json::to_string(&body)?; + + let request = HttpRequest { + url: self.registry_endpoint.to_string(), + method: "POST".to_string(), + body: Some(body.into_bytes()), + headers: vec![ + ( + "User-Agent".to_string(), + crate::http::USER_AGENT.to_string(), + ), + ("Content-Type".to_string(), "application/json".to_string()), + ], + options: Default::default(), + }; + + let HttpResponse { + ok, + status, + status_text, + body, + .. + } = self.client.request(request).await?; + + if !ok { + let url = &self.registry_endpoint; + anyhow::bail!("\"{url}\" replied with {status} {status_text}"); + } + + let body = body.unwrap_or_default(); + let response: WapmWebQuery = + serde_json::from_slice(&body).context("Unable to deserialize the response")?; + + let mut summaries = Vec::new(); + + let versions = match response.data.get_package { + Some(WapmWebQueryGetPackage { versions }) => versions, + None => return Ok(Vec::new()), + }; + + for pkg_version in versions { + let version = Version::parse(&pkg_version.version)?; + if version_constraint.matches(&version) { + let summary = decode_summary(pkg_version)?; + summaries.push(summary); + } + } + + Ok(summaries) + } +} + +fn decode_summary(pkg_version: WapmWebQueryGetPackageVersion) -> Result { + let WapmWebQueryGetPackageVersion { + manifest, + distribution: + WapmWebQueryGetPackageVersionDistribution { + pirita_download_url, + pirita_sha256_hash, + }, + .. + } = pkg_version; + + let manifest: Manifest = serde_json::from_slice(manifest.as_bytes()) + .context("Unable to deserialize the manifest")?; + + let mut webc_sha256 = [0_u8; 32]; + hex::decode_to_slice(&pirita_sha256_hash, &mut webc_sha256)?; + let webc_sha256 = WebcHash::from_bytes(webc_sha256); + + Ok(PackageSummary { + pkg: PackageInfo::from_manifest(&manifest)?, + dist: DistributionInfo { + webc: pirita_download_url.parse()?, + webc_sha256, + }, + }) +} + +#[allow(dead_code)] +pub const WAPM_WEBC_QUERY_ALL: &str = r#"{ + getPackage(name: "$NAME") { + versions { + version + piritaManifest + distribution { + piritaDownloadUrl + piritaSha256Hash + } + } + } +}"#; + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct WapmWebQuery { + #[serde(rename = "data")] + pub data: WapmWebQueryData, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct WapmWebQueryData { + #[serde(rename = "getPackage")] + pub get_package: Option, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct WapmWebQueryGetPackage { + pub versions: Vec, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct WapmWebQueryGetPackageVersion { + pub version: String, + /// A JSON string containing a [`Manifest`] definition. + #[serde(rename = "piritaManifest")] + pub manifest: String, + pub distribution: WapmWebQueryGetPackageVersionDistribution, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] +pub struct WapmWebQueryGetPackageVersionDistribution { + #[serde(rename = "piritaDownloadUrl")] + pub pirita_download_url: String, + #[serde(rename = "piritaSha256Hash")] + pub pirita_sha256_hash: String, +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::runtime::resolver::inputs::{DistributionInfo, PackageInfo}; + + use super::*; + + const WASMER_PACK_CLI_REQUEST: &[u8] = br#"{"query": "{\n getPackage(name: \"wasmer/wasmer-pack-cli\") {\n versions {\n version\n piritaManifest\n distribution {\n piritaDownloadUrl\n piritaSha256Hash\n }\n }\n }\n}"}"#; + const WASMER_PACK_CLI_RESPONSE: &[u8] = br#"{"data":{"getPackage":{"versions":[{"version":"0.7.0","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:FesCIAS6URjrIAAyy4G5u5HjJjGQBLGmnafjHPHRvqo=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"/home/consulting/Documents/wasmer/wasmer-pack/crates/cli/../../README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.7.0\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.7.0-0e384e88-ab70-11ed-b0ed-b22ba48456e7.webc","piritaSha256Hash":"d085869201aa602673f70abbd5e14e5a6936216fa93314c5b103cda3da56e29e"}},{"version":"0.6.0","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:CzzhNaav3gjBkCJECGbk7e+qAKurWbcIAzQvEqsr2Co=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"/home/consulting/Documents/wasmer/wasmer-pack/crates/cli/../../README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.6.0\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.6.0-654a2ed8-875f-11ed-90e2-c6aeb50490de.webc","piritaSha256Hash":"7e1add1640d0037ff6a726cd7e14ea36159ec2db8cb6debd0e42fa2739bea52b"}},{"version":"0.5.3","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:qdiJVfpi4icJXdR7Y5US/pJ4PjqbAq9PkU+obMZIMlE=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"/home/runner/work/wasmer-pack/wasmer-pack/crates/cli/../../README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.5.3\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.5.3-4a2b9764-728c-11ed-9fe4-86bf77232c64.webc","piritaSha256Hash":"44fdcdde23d34175887243d7c375e4e4a7e6e2cd1ae063ebffbede4d1f68f14a"}},{"version":"0.5.2","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:xiwrUFAo+cU1xW/IE6MVseiyjNGHtXooRlkYKiOKzQc=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"/home/consulting/Documents/wasmer/wasmer-pack/crates/cli/../../README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.5.2\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.5.2.webc","piritaSha256Hash":"d1dbc8168c3a2491a7158017a9c88df9e0c15bed88ebcd6d9d756e4b03adde95"}},{"version":"0.5.1","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:TliPwutfkFvRite/3/k3OpLqvV0EBKGwyp3L5UjCuEI=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"/home/runner/work/wasmer-pack/wasmer-pack/crates/cli/../../README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.5.1\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.5.1.webc","piritaSha256Hash":"c42924619660e2befd69b5c72729388985dcdcbf912d51a00015237fec3e1ade"}},{"version":"0.5.0","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:6UD7NS4KtyNYa3TcnKOvd+kd3LxBCw+JQ8UWRpMXeC0=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.5.0\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.5.0.webc","piritaSha256Hash":"d30ca468372faa96469163d2d1546dd34be9505c680677e6ab86a528a268e5f5"}},{"version":"0.5.0-rc.1","piritaManifest":"{\"atoms\": {\"wasmer-pack\": {\"kind\": \"https://webc.org/kind/wasm\", \"signature\": \"sha256:ThybHIc2elJEcDdQiq5ffT1TVaNs70+WAqoKw4Tkh3E=\"}}, \"package\": {\"wapm\": {\"name\": \"wasmer/wasmer-pack-cli\", \"readme\": {\"path\": \"README.md\", \"volume\": \"metadata\"}, \"license\": \"MIT\", \"version\": \"0.5.0-rc.1\", \"homepage\": \"https://wasmer.io/\", \"repository\": \"https://github.com/wasmerio/wasmer-pack\", \"description\": \"A code generator that lets you treat WebAssembly modules like native dependencies.\"}}, \"commands\": {\"wasmer-pack\": {\"runner\": \"https://webc.org/runner/wasi/command@unstable_\", \"annotations\": {\"wasi\": {\"atom\": \"wasmer-pack\", \"package\": \"wasmer/wasmer-pack-cli\", \"main_args\": null}}}}, \"entrypoint\": \"wasmer-pack\"}","distribution":{"piritaDownloadUrl":"https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.5.0-rc.1.webc","piritaSha256Hash":"0cd5d6e4c33c92c52784afed3a60c056953104d719717948d4663ff2521fe2bb"}}]}}}"#; + + #[derive(Debug, Default)] + struct DummyClient; + + impl HttpClient for DummyClient { + fn request( + &self, + request: HttpRequest, + ) -> futures::future::BoxFuture<'_, Result> { + // You can check the response with: + // curl https://registry.wapm.io/graphql \ + // -H "Content-Type: application/json" \ + // -X POST \ + // -d '@wasmer_pack_cli_request.json' > wasmer_pack_cli_response.json + assert_eq!(request.method, "POST"); + assert_eq!(request.url, WapmSource::WAPM_PROD_ENDPOINT); + let headers: HashMap = request.headers.into_iter().collect(); + assert_eq!(headers.len(), 2); + assert_eq!(headers["User-Agent"], crate::http::USER_AGENT); + assert_eq!(headers["Content-Type"], "application/json"); + + let body: serde_json::Value = + serde_json::from_slice(request.body.as_deref().unwrap()).unwrap(); + let expected_body: serde_json::Value = + serde_json::from_slice(WASMER_PACK_CLI_REQUEST).unwrap(); + assert_eq!(body, expected_body); + + Box::pin(async { + Ok(HttpResponse { + pos: 0, + body: Some(WASMER_PACK_CLI_RESPONSE.to_vec()), + ok: true, + redirected: false, + status: 200, + status_text: "OK".to_string(), + headers: Vec::new(), + }) + }) + } + } + + #[tokio::test] + async fn run_known_query() { + let client = Arc::new(DummyClient::default()); + let registry_endpoint = WapmSource::WAPM_PROD_ENDPOINT.parse().unwrap(); + let request = PackageSpecifier::Registry { + full_name: "wasmer/wasmer-pack-cli".to_string(), + version: "^0.6".parse().unwrap(), + }; + let source = WapmSource::new(registry_endpoint, client); + + let summaries = source.query(&request).await.unwrap(); + + assert_eq!( + summaries, + [PackageSummary { + pkg: PackageInfo { + name: "wasmer/wasmer-pack-cli".to_string(), + version: Version::new(0, 6, 0), + dependencies: Vec::new(), + commands: vec![ + crate::runtime::resolver::Command { + name: "wasmer-pack".to_string(), + }, + ], + entrypoint: Some("wasmer-pack".to_string()), + }, + dist: DistributionInfo { + webc: "https://registry-cdn.wapm.io/packages/wasmer/wasmer-pack-cli/wasmer-pack-cli-0.6.0-654a2ed8-875f-11ed-90e2-c6aeb50490de.webc".parse().unwrap(), + webc_sha256: WebcHash::from_bytes([ + 126, + 26, + 221, + 22, + 64, + 208, + 3, + 127, + 246, + 167, + 38, + 205, + 126, + 20, + 234, + 54, + 21, + 158, + 194, + 219, + 140, + 182, + 222, + 189, + 14, + 66, + 250, + 39, + 57, + 190, + 165, + 43, + ]), + } + }] + ); + } +} diff --git a/lib/wasi/src/runtime/resolver/web_source.rs b/lib/wasi/src/runtime/resolver/web_source.rs new file mode 100644 index 00000000000..bc715e33e30 --- /dev/null +++ b/lib/wasi/src/runtime/resolver/web_source.rs @@ -0,0 +1,568 @@ +use std::{ + fmt::Write as _, + io::Write, + path::{Path, PathBuf}, + sync::Arc, + time::{Duration, SystemTime}, +}; + +use anyhow::{Context, Error}; +use sha2::{Digest, Sha256}; +use tempfile::NamedTempFile; +use url::Url; +use webc::compat::Container; + +use crate::{ + http::{HttpClient, HttpRequest, HttpResponse, USER_AGENT}, + runtime::resolver::{ + DistributionInfo, PackageInfo, PackageSpecifier, PackageSummary, Source, WebcHash, + }, +}; + +/// A [`Source`] which can query arbitrary packages on the internet. +/// +/// # Implementation Notes +/// +/// Unlike other [`Source`] implementations, this will need to download +/// a package if it is a [`PackageSpecifier::Url`]. Optionally, these downloaded +/// packages can be cached in a local directory. +/// +/// After a certain period ([`WebSource::with_retry_period()`]), the +/// [`WebSource`] will re-check the uploaded source to make sure the cached +/// package is still valid. This checking is done using the [ETag][ETag] header, +/// if available. +/// +/// [ETag]: https://en.wikipedia.org/wiki/HTTP_ETag +#[derive(Debug, Clone)] +pub struct WebSource { + cache_dir: PathBuf, + client: Arc, + retry_period: Duration, +} + +impl WebSource { + pub const DEFAULT_RETRY_PERIOD: Duration = Duration::from_secs(5 * 60); + + pub fn new(cache_dir: impl Into, client: Arc) -> Self { + WebSource { + cache_dir: cache_dir.into(), + client, + retry_period: WebSource::DEFAULT_RETRY_PERIOD, + } + } + + /// Set the period after which an item should be marked as "possibly dirty" + /// in the cache. + pub fn with_retry_period(self, retry_period: Duration) -> Self { + WebSource { + retry_period, + ..self + } + } + + /// Get the directory that is typically used when caching downloaded + /// packages inside `$WASMER_DIR`. + pub fn default_cache_dir(wasmer_dir: impl AsRef) -> PathBuf { + wasmer_dir.as_ref().join("downloads") + } + + /// Download a package and cache it locally. + #[tracing::instrument(skip_all, fields(%url))] + async fn get_locally_cached_file(&self, url: &Url) -> Result { + // This function is a bit tricky because we go to great lengths to avoid + // unnecessary downloads. + + let cache_key = sha256(url.as_str().as_bytes()); + + // First, we figure out some basic information about the item + let cache_info = CacheInfo::for_url(&cache_key, &self.cache_dir); + + // Next we check if we definitely got a cache hit + let state = match classify_cache_using_mtime(cache_info, self.retry_period) { + Ok(path) => { + tracing::debug!(path=%path.display(), "Cache hit"); + return Ok(path); + } + Err(s) => s, + }; + + // Let's check if the ETag is still valid + if let CacheState::PossiblyDirty { etag, path } = &state { + match self.get_etag(url).await { + Ok(new_etag) if new_etag == *etag => { + return Ok(path.clone()); + } + Ok(different_etag) => { + tracing::debug!( + original_etag=%etag, + new_etag=%different_etag, + path=%path.display(), + "File has been updated. Redownloading.", + ); + } + Err(e) => { + tracing::debug!( + error=&*e, + path=%path.display(), + original_etag=%etag, + "Unable to check if the etag is out of date", + ) + } + } + } + + // Oh well, looks like we'll need to download it again + let (bytes, etag) = match self.fetch(url).await { + Ok((bytes, etag)) => (bytes, etag), + Err(e) => { + tracing::warn!(error = &*e, "Download failed"); + match state.take_path() { + Some(path) => { + tracing::debug!( + path=%path.display(), + "Using a possibly stale cached file", + ); + return Ok(path); + } + None => { + return Err(e); + } + } + } + }; + + let path = self.cache_dir.join(&cache_key); + self.atomically_save_file(&path, &bytes) + .await + .with_context(|| { + format!( + "Unable to save the downloaded file to \"{}\"", + path.display() + ) + })?; + + if let Some(etag) = etag { + if let Err(e) = self + .atomically_save_file(path.with_extension("etag"), etag.as_bytes()) + .await + { + tracing::warn!( + error=&*e, + %etag, + %url, + path=%path.display(), + "Unable to save the etag file", + ) + } + } + + Ok(path) + } + + async fn atomically_save_file(&self, path: impl AsRef, data: &[u8]) -> Result<(), Error> { + // FIXME: This will all block the main thread + + let path = path.as_ref(); + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("Unable to create \"{}\"", parent.display()))?; + } + + let mut temp = NamedTempFile::new_in(&self.cache_dir)?; + temp.write_all(data)?; + temp.as_file().sync_all()?; + temp.persist(path)?; + + Ok(()) + } + + async fn get_etag(&self, url: &Url) -> Result { + let request = HttpRequest { + url: url.to_string(), + method: "HEAD".to_string(), + headers: headers(), + body: None, + options: Default::default(), + }; + let HttpResponse { + ok, + status, + status_text, + headers, + .. + } = self.client.request(request).await?; + + if !ok { + anyhow::bail!("HEAD request to \"{url}\" failed with {status} {status_text}"); + } + + let etag = headers + .into_iter() + .find(|(name, _)| name.to_string().to_lowercase() == "etag") + .map(|(_, value)| value) + .context("The HEAD request didn't contain an ETag header`")?; + + Ok(etag) + } + + async fn fetch(&self, url: &Url) -> Result<(Vec, Option), Error> { + let request = HttpRequest { + url: url.to_string(), + method: "GET".to_string(), + headers: headers(), + body: None, + options: Default::default(), + }; + let HttpResponse { + ok, + status, + status_text, + headers, + body, + .. + } = self.client.request(request).await?; + + if !ok { + anyhow::bail!("HEAD request to \"{url}\" failed with {status} {status_text}"); + } + + let body = body.context("Response didn't contain a body")?; + + let etag = headers + .into_iter() + .find(|(name, _)| name.to_string().to_lowercase() == "etag") + .map(|(_, value)| value); + + Ok((body, etag)) + } +} + +fn headers() -> Vec<(String, String)> { + vec![ + ("Accept".to_string(), "application/webc".to_string()), + ("User-Agent".to_string(), USER_AGENT.to_string()), + ] +} + +#[async_trait::async_trait] +impl Source for WebSource { + #[tracing::instrument(level = "debug", skip_all, fields(%package))] + async fn query(&self, package: &PackageSpecifier) -> Result, Error> { + let url = match package { + PackageSpecifier::Url(url) => url, + _ => return Ok(Vec::new()), + }; + + let local_path = self + .get_locally_cached_file(url) + .await + .context("Unable to get the locally cached file")?; + + // FIXME: this will block + let webc_sha256 = WebcHash::for_file(&local_path)?; + + // Note: We want to use Container::from_disk() rather than the bytes + // our HTTP client gave us because then we can use memory-mapped files + let container = Container::from_disk(&local_path)?; + let pkg = PackageInfo::from_manifest(container.manifest())?; + let dist = DistributionInfo { + webc: url.clone(), + webc_sha256, + }; + + Ok(vec![PackageSummary { pkg, dist }]) + } +} + +fn sha256(bytes: &[u8]) -> String { + let mut hasher = Sha256::default(); + hasher.update(bytes); + let hash = hasher.finalize(); + let mut buffer = String::with_capacity(hash.len() * 2); + for byte in hash { + write!(buffer, "{byte:02X}").expect("Unreachable"); + } + + buffer +} + +#[derive(Debug, Clone, PartialEq)] +enum CacheInfo { + /// An item isn't in the cache, but could be cached later on. + Miss, + /// An item in the cache. + Hit { + path: PathBuf, + etag: Option, + last_modified: Option, + }, +} + +impl CacheInfo { + fn for_url(key: &str, checkout_dir: &Path) -> CacheInfo { + let path = checkout_dir.join(key); + + if !path.exists() { + return CacheInfo::Miss; + } + + let etag = std::fs::read_to_string(path.with_extension("etag")).ok(); + let last_modified = path.metadata().and_then(|m| m.modified()).ok(); + + CacheInfo::Hit { + etag, + last_modified, + path, + } + } +} + +fn classify_cache_using_mtime( + info: CacheInfo, + invalidation_threshold: Duration, +) -> Result { + let (path, last_modified, etag) = match info { + CacheInfo::Hit { + path, + last_modified: Some(last_modified), + etag, + .. + } => (path, last_modified, etag), + CacheInfo::Hit { + path, + last_modified: None, + etag: Some(etag), + .. + } => return Err(CacheState::PossiblyDirty { etag, path }), + CacheInfo::Hit { + etag: None, + last_modified: None, + path, + .. + } => { + return Err(CacheState::UnableToVerify { path }); + } + CacheInfo::Miss { .. } => return Err(CacheState::Miss), + }; + + if let Ok(time_since_last_modified) = last_modified.elapsed() { + if time_since_last_modified <= invalidation_threshold { + return Ok(path); + } + } + + match etag { + Some(etag) => Err(CacheState::PossiblyDirty { etag, path }), + None => Err(CacheState::UnableToVerify { path }), + } +} + +/// Classification of how valid an item is based on filesystem metadata. +#[derive(Debug)] +enum CacheState { + /// The item isn't in the cache. + Miss, + /// The cached item might be invalid, but it has an ETag we can use for + /// further validation. + PossiblyDirty { etag: String, path: PathBuf }, + /// The cached item exists on disk, but we weren't able to tell whether it is still + /// valid, and there aren't any other ways to validate it further. You can + /// probably reuse this if you are having internet issues. + UnableToVerify { path: PathBuf }, +} + +impl CacheState { + fn take_path(self) -> Option { + match self { + CacheState::PossiblyDirty { path, .. } | CacheState::UnableToVerify { path } => { + Some(path) + } + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use std::{collections::VecDeque, sync::Mutex}; + + use futures::future::BoxFuture; + use tempfile::TempDir; + + use super::*; + + const PYTHON: &[u8] = include_bytes!("../../../../c-api/examples/assets/python-0.1.0.wasmer"); + const COREUTILS: &[u8] = include_bytes!("../../../../../tests/integration/cli/tests/webc/coreutils-1.0.16-e27dbb4f-2ef2-4b44-b46a-ddd86497c6d7.webc"); + const DUMMY_URL: &str = "http://my-registry.io/some/package"; + const DUMMY_URL_HASH: &str = "4D7481F44E1D971A8C60D3C7BD505E2727602CF9369ED623920E029C2BA2351D"; + + #[derive(Debug)] + pub(crate) struct DummyClient { + requests: Mutex>, + responses: Mutex>, + } + + impl DummyClient { + pub fn with_responses(responses: impl IntoIterator) -> Self { + DummyClient { + requests: Mutex::new(Vec::new()), + responses: Mutex::new(responses.into_iter().collect()), + } + } + } + + impl HttpClient for DummyClient { + fn request( + &self, + request: HttpRequest, + ) -> BoxFuture<'_, Result> { + let response = self.responses.lock().unwrap().pop_front().unwrap(); + self.requests.lock().unwrap().push(request); + Box::pin(async { Ok(response) }) + } + } + + struct ResponseBuilder(HttpResponse); + + impl ResponseBuilder { + pub fn new() -> Self { + ResponseBuilder(HttpResponse { + pos: 0, + body: None, + ok: true, + redirected: false, + status: 200, + status_text: "OK".to_string(), + headers: Vec::new(), + }) + } + + pub fn with_status(mut self, code: u16, text: impl Into) -> Self { + self.0.status = code; + self.0.status_text = text.into(); + self + } + + pub fn with_body(mut self, body: impl Into>) -> Self { + self.0.body = Some(body.into()); + self + } + + pub fn with_etag(self, value: impl Into) -> Self { + self.with_header("ETag", value) + } + + pub fn with_header(mut self, name: impl Into, value: impl Into) -> Self { + self.0.headers.push((name.into(), value.into())); + self + } + + pub fn build(self) -> HttpResponse { + self.0 + } + } + + #[tokio::test] + async fn empty_cache_does_a_full_download() { + let dummy_etag = "This is an etag"; + let temp = TempDir::new().unwrap(); + let client = DummyClient::with_responses([ResponseBuilder::new() + .with_body(PYTHON) + .with_etag(dummy_etag) + .build()]); + let source = WebSource::new(temp.path(), Arc::new(client)); + let spec = PackageSpecifier::Url(DUMMY_URL.parse().unwrap()); + + let summaries = source.query(&spec).await.unwrap(); + + // We got the right response, as expected + assert_eq!(summaries.len(), 1); + assert_eq!(summaries[0].pkg.name, "python"); + // But we should have also cached the file and etag + let path = temp.path().join(DUMMY_URL_HASH); + assert!(path.exists()); + let etag_path = path.with_extension("etag"); + assert!(etag_path.exists()); + // And they should contain the correct content + assert_eq!(std::fs::read_to_string(etag_path).unwrap(), dummy_etag); + assert_eq!(std::fs::read(path).unwrap(), PYTHON); + } + + #[tokio::test] + async fn cache_hit() { + let temp = TempDir::new().unwrap(); + let client = Arc::new(DummyClient::with_responses([])); + let source = WebSource::new(temp.path(), client.clone()); + let spec = PackageSpecifier::Url(DUMMY_URL.parse().unwrap()); + // Prime the cache + std::fs::write(temp.path().join(DUMMY_URL_HASH), PYTHON).unwrap(); + + let summaries = source.query(&spec).await.unwrap(); + + // We got the right response, as expected + assert_eq!(summaries.len(), 1); + assert_eq!(summaries[0].pkg.name, "python"); + // And no requests were sent + assert_eq!(client.requests.lock().unwrap().len(), 0); + } + + #[tokio::test] + async fn fall_back_to_stale_cache_if_request_fails() { + let temp = TempDir::new().unwrap(); + let client = Arc::new(DummyClient::with_responses([ResponseBuilder::new() + .with_status(500, "Internal Server Error") + .build()])); + // Add something to the cache + let python_path = temp.path().join(DUMMY_URL_HASH); + std::fs::write(&python_path, PYTHON).unwrap(); + let source = WebSource::new(temp.path(), client.clone()).with_retry_period(Duration::ZERO); + let spec = PackageSpecifier::Url(DUMMY_URL.parse().unwrap()); + + let summaries = source.query(&spec).await.unwrap(); + + // We got the right response, as expected + assert_eq!(summaries.len(), 1); + assert_eq!(summaries[0].pkg.name, "python"); + // And one request was sent + assert_eq!(client.requests.lock().unwrap().len(), 1); + // The etag file wasn't written + assert!(!python_path.with_extension("etag").exists()); + } + + #[tokio::test] + async fn download_again_if_etag_is_different() { + let temp = TempDir::new().unwrap(); + let client = Arc::new(DummyClient::with_responses([ + ResponseBuilder::new().with_etag("coreutils").build(), + ResponseBuilder::new() + .with_body(COREUTILS) + .with_etag("coreutils") + .build(), + ])); + // Add Python to the cache + let path = temp.path().join(DUMMY_URL_HASH); + std::fs::write(&path, PYTHON).unwrap(); + std::fs::write(path.with_extension("etag"), "python").unwrap(); + // but create a source that will always want to re-check the etags + let source = + WebSource::new(temp.path(), client.clone()).with_retry_period(Duration::new(0, 0)); + let spec = PackageSpecifier::Url(DUMMY_URL.parse().unwrap()); + + let summaries = source.query(&spec).await.unwrap(); + + // Instead of Python (the originally cached item), we should get coreutils + assert_eq!(summaries.len(), 1); + assert_eq!(summaries[0].pkg.name, "sharrattj/coreutils"); + // both a HEAD and GET request were sent + let requests = client.requests.lock().unwrap(); + assert_eq!(requests.len(), 2); + assert_eq!(requests[0].method, "HEAD"); + assert_eq!(requests[1].method, "GET"); + // The etag file was also updated + assert_eq!( + std::fs::read_to_string(path.with_extension("etag")).unwrap(), + "coreutils" + ); + } +} diff --git a/lib/wasi/src/state/builder.rs b/lib/wasi/src/state/builder.rs index d716706f4c2..a7125f28382 100644 --- a/lib/wasi/src/state/builder.rs +++ b/lib/wasi/src/state/builder.rs @@ -15,7 +15,7 @@ use wasmer_wasix_types::wasi::Errno; #[cfg(feature = "sys")] use crate::PluggableRuntime; use crate::{ - bin_factory::BinFactory, + bin_factory::{BinFactory, BinaryPackage}, capabilities::Capabilities, fs::{WasiFs, WasiFsRoot, WasiInodes}, os::task::control_plane::{ControlPlaneConfig, ControlPlaneError, WasiControlPlane}, @@ -62,7 +62,7 @@ pub struct WasiEnvBuilder { pub(super) runtime: Option>, /// List of webc dependencies to be injected. - pub(super) uses: Vec, + pub(super) uses: Vec, /// List of host commands to map into the WASI instance. pub(super) map_commands: HashMap, @@ -105,7 +105,7 @@ pub enum WasiStateCreationError { #[error("wasi filesystem setup error: `{0}`")] WasiFsSetupError(String), #[error(transparent)] - FileSystemError(FsError), + FileSystemError(#[from] FsError), #[error("wasi inherit error: `{0}`")] WasiInheritError(String), #[error("wasi include package: `{0}`")] @@ -270,22 +270,25 @@ impl WasiEnvBuilder { } /// Adds a container this module inherits from - pub fn use_webc(mut self, webc: Name) -> Self - where - Name: AsRef, - { - self.uses.push(webc.as_ref().to_string()); + pub fn use_webc(mut self, pkg: BinaryPackage) -> Self { + self.add_webc(pkg); + self + } + + /// Adds a container this module inherits from + pub fn add_webc(&mut self, pkg: BinaryPackage) -> &mut Self { + self.uses.push(pkg); self } /// Adds a list of other containers this module inherits from pub fn uses(mut self, uses: I) -> Self where - I: IntoIterator, + I: IntoIterator, { - uses.into_iter().for_each(|inherit| { - self.uses.push(inherit); - }); + for pkg in uses { + self.add_webc(pkg); + } self } @@ -785,7 +788,7 @@ impl WasiEnvBuilder { let start = instance.exports.get_function("_start")?; env.data(store).thread.set_status_running(); - let res = crate::run_wasi_func_start(start, store); + let mut res = crate::run_wasi_func_start(start, store); tracing::trace!( "wasi[{}:{}]::main exit (code = {:?})", @@ -796,7 +799,16 @@ impl WasiEnvBuilder { let exit_code = match &res { Ok(_) => Errno::Success.into(), - Err(err) => err.as_exit_code().unwrap_or_else(|| Errno::Noexec.into()), + Err(err) => match err.as_exit_code() { + Some(code) if code.is_success() => { + // This is actually not an error, so we need to fix up the + // result + res = Ok(()); + Errno::Success.into() + } + Some(other) => other, + None => Errno::Noexec.into(), + }, }; env.cleanup(store, Some(exit_code)); diff --git a/lib/wasi/src/state/env.rs b/lib/wasi/src/state/env.rs index 869613051f5..2051040645e 100644 --- a/lib/wasi/src/state/env.rs +++ b/lib/wasi/src/state/env.rs @@ -1,10 +1,15 @@ -use std::{collections::HashMap, ops::Deref, path::PathBuf, sync::Arc, time::Duration}; +use std::{ + collections::HashMap, + ops::Deref, + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; use derivative::Derivative; use rand::Rng; -use semver::Version; use tracing::{trace, warn}; -use virtual_fs::{FsError, VirtualFile}; +use virtual_fs::{AsyncWriteExt, FileSystem, FsError, VirtualFile}; use virtual_net::DynVirtualNetworking; use wasmer::{ AsStoreMut, AsStoreRef, FunctionEnvMut, Global, Instance, Memory, MemoryType, MemoryView, @@ -16,19 +21,16 @@ use wasmer_wasix_types::{ }; use crate::{ - bin_factory::BinFactory, + bin_factory::{BinFactory, BinaryPackage}, capabilities::Capabilities, fs::{WasiFsRoot, WasiInodes}, import_object_for_all_wasi_versions, - os::{ - command::builtins::cmd_wasmer::CmdWasmer, - task::{ - control_plane::ControlPlaneError, - process::{WasiProcess, WasiProcessId}, - thread::{WasiMemoryLayout, WasiThread, WasiThreadHandle, WasiThreadId}, - }, + os::task::{ + control_plane::ControlPlaneError, + process::{WasiProcess, WasiProcessId}, + thread::{WasiMemoryLayout, WasiThread, WasiThreadHandle, WasiThreadId}, }, - runtime::SpawnMemoryType, + runtime::{resolver::PackageSpecifier, SpawnMemoryType}, syscalls::{__asyncify_light, platform_clock_time_get}, VirtualTaskManager, WasiControlPlane, WasiEnvBuilder, WasiError, WasiFunctionEnv, WasiRuntime, WasiRuntimeError, WasiStateCreationError, WasiVFork, @@ -208,7 +210,7 @@ impl WasiInstanceHandles { pub struct WasiEnvInit { pub(crate) state: WasiState, pub runtime: Arc, - pub webc_dependencies: Vec, + pub webc_dependencies: Vec, pub mapped_commands: HashMap, pub bin_factory: BinFactory, pub capabilities: Capabilities, @@ -422,7 +424,9 @@ impl WasiEnv { env.owned_handles.push(thread); // TODO: should not be here - should be callers responsibility! - env.uses(init.webc_dependencies)?; + for pkg in &init.webc_dependencies { + env.use_package(pkg)?; + } #[cfg(feature = "sys")] env.map_commands(init.mapped_commands.clone())?; @@ -833,114 +837,125 @@ impl WasiEnv { (memory, state, inodes) } - pub fn uses(&self, uses: I) -> Result<(), WasiStateCreationError> - where - I: IntoIterator, - { - // Load all the containers that we inherit from - use std::collections::VecDeque; - #[allow(unused_imports)] - use std::path::Path; + /// Make all the commands in a [`BinaryPackage`] available to the WASI + /// instance. + /// + /// The [`BinaryPackageCommand::atom()`][cmd-atom] will be saved to + /// `/bin/command`. + /// + /// This will also merge the command's filesystem + /// ([`BinaryPackage::webc_fs`][pkg-fs]) into the current filesystem. + /// + /// [cmd-atom]: crate::bin_factory::BinaryPackageCommand::atom() + /// [pkg-fs]: crate::bin_factory::BinaryPackage::webc_fs + pub fn use_package(&self, pkg: &BinaryPackage) -> Result<(), WasiStateCreationError> { + // PERF: We should avoid all these copies in the WasiFsRoot::Backing case. + + let root_fs = &self.state.fs.root_fs; + // We first need to copy any files in the package over to the + // temporary file system + match root_fs { + WasiFsRoot::Sandbox(root_fs) => { + root_fs.union(&pkg.webc_fs); + } + WasiFsRoot::Backing(_fs) => { + tracing::warn!("TODO: Manually copy each file across one-by-one"); + } + } - #[allow(unused_imports)] - use virtual_fs::FileSystem; + // Next, make sure all commands will be available - let mut already: HashMap = HashMap::new(); - - let mut use_packages = uses.into_iter().collect::>(); - - let cmd_wasmer = self - .bin_factory - .commands - .get("/bin/wasmer") - .and_then(|cmd| cmd.as_any().downcast_ref::()); - - let tasks = self.runtime.task_manager(); - - while let Some(use_package) = use_packages.pop_back() { - if let Some(package) = cmd_wasmer - .as_ref() - .and_then(|cmd| tasks.block_on(cmd.get_package(use_package.clone()))) - { - // If its already been added make sure the version is correct - let package_name = package.package_name.to_string(); - if let Some(version) = already.get(&package_name) { - if *version != package.version { - return Err(WasiStateCreationError::WasiInheritError(format!( - "webc package version conflict for {} - {} vs {}", - use_package, version, package.version - ))); - } - continue; - } - already.insert(package_name, package.version.clone()); + if !pkg.commands.is_empty() { + let _ = root_fs.create_dir(Path::new("/bin")); - // Add the additional dependencies - for dependency in package.uses.clone() { - use_packages.push_back(dependency); - } + for command in &pkg.commands { + let path = format!("/bin/{}", command.name()); + let path = Path::new(path.as_str()); - if let WasiFsRoot::Sandbox(root_fs) = &self.state.fs.root_fs { - // We first need to copy any files in the package over to the temporary file system - if let Some(fs) = package.webc_fs.as_ref() { - root_fs.union(fs); + match root_fs { + WasiFsRoot::Sandbox(root_fs) => { + // As a short-cut, when we are using a TmpFileSystem + // we can (unsafely) add the file to the filesystem + // without any copying. + + // FIXME(Michael-F-Bryan): This is pretty sketchy. + // We should be using some sort of reference-counted + // pointer to some bytes that are either on the heap + // or from a memory-mapped file. However, that's not + // possible here because things like memfs and + // WasiEnv are expecting a Cow<'static, [u8]>. It's + // too hard to refactor those at the moment, and we + // were pulling the same trick before by storing an + // "ownership" object in the BinaryPackageCommand, + // so as long as packages aren't removed from the + // module cache it should be fine. + // See https://github.com/wasmerio/wasmer/issues/3875 + let atom: &'static [u8] = unsafe { std::mem::transmute(command.atom()) }; + + if let Err(err) = root_fs + .new_open_options_ext() + .insert_ro_file(path, atom.into()) + { + tracing::debug!( + "failed to add package [{}] command [{}] - {}", + pkg.package_name, + command.name(), + err + ); + continue; + } } - - // Add all the commands as binaries in the bin folder - - let commands = package.commands.read().unwrap(); - if !commands.is_empty() { - let _ = root_fs.create_dir(Path::new("/bin")); - for command in commands.iter() { - let path = format!("/bin/{}", command.name()); - let path = Path::new(path.as_str()); - - // FIXME(Michael-F-Bryan): This is pretty sketchy. - // We should be using some sort of reference-counted - // pointer to some bytes that are either on the heap - // or from a memory-mapped file. However, that's not - // possible here because things like memfs and - // WasiEnv are expecting a Cow<'static, [u8]>. It's - // too hard to refactor those at the moment, and we - // were pulling the same trick before by storing an - // "ownership" object in the BinaryPackageCommand, - // so as long as packages aren't removed from the - // module cache it should be fine. - let atom: &'static [u8] = - unsafe { std::mem::transmute(command.atom()) }; - - if let Err(err) = root_fs - .new_open_options_ext() - .insert_ro_file(path, atom.into()) - { - tracing::debug!( - "failed to add package [{}] command [{}] - {}", - use_package, + WasiFsRoot::Backing(fs) => { + // Looks like we need to make the copy + let mut f = fs.new_open_options().create(true).write(true).open(path)?; + self.tasks() + .block_on(f.write_all(command.atom())) + .map_err(|e| { + WasiStateCreationError::WasiIncludePackageError(format!( + "Unable to save \"{}\" to \"{}\": {e}", command.name(), - err - ); - continue; - } - - // Add the binary package to the bin factory (zero copy the atom) - let mut package = package.clone(); - package.entry = Some(atom.into()); - self.bin_factory - .set_binary(path.as_os_str().to_string_lossy().as_ref(), package); - } + path.display() + )) + })?; } - } else { - return Err(WasiStateCreationError::WasiInheritError( - "failed to add package as the file system is not sandboxed".to_string(), - )); } - } else { - return Err(WasiStateCreationError::WasiInheritError(format!( - "failed to fetch webc package for {}", - use_package - ))); + + let mut package = pkg.clone(); + package.entrypoint_cmd = Some(command.name().to_string()); + self.bin_factory + .set_binary(path.as_os_str().to_string_lossy().as_ref(), package); + + tracing::debug!( + package=%pkg.package_name, + command_name=command.name(), + path=%path.display(), + "Injected a command into the filesystem", + ); } } + + Ok(()) + } + + /// Given a list of packages, load them from the registry and make them + /// available. + pub fn uses(&self, uses: I) -> Result<(), WasiStateCreationError> + where + I: IntoIterator, + { + let rt = self.runtime(); + + for package_name in uses { + let specifier = package_name + .parse::() + .map_err(|e| WasiStateCreationError::WasiIncludePackageError(e.to_string()))?; + let pkg = rt + .task_manager() + .block_on(BinaryPackage::from_registry(&specifier, rt)) + .map_err(|e| WasiStateCreationError::WasiIncludePackageError(e.to_string()))?; + self.use_package(&pkg)?; + } + Ok(()) } diff --git a/lib/wasi/src/wapm/mod.rs b/lib/wasi/src/wapm/mod.rs deleted file mode 100644 index 474b97c5db3..00000000000 --- a/lib/wasi/src/wapm/mod.rs +++ /dev/null @@ -1,630 +0,0 @@ -use anyhow::{bail, Context}; -use once_cell::sync::OnceCell; -use std::{ - collections::HashMap, - path::Path, - sync::{Arc, RwLock}, -}; -use url::Url; -use virtual_fs::{FileSystem, WebcVolumeFileSystem}; -use wasmer_wasix_types::wasi::Snapshot0Clockid; - -use webc::{ - metadata::{ - annotations::{EMSCRIPTEN_RUNNER_URI, WASI_RUNNER_URI, WCGI_RUNNER_URI}, - UrlOrManifest, - }, - Container, -}; - -use crate::{ - bin_factory::{BinaryPackage, BinaryPackageCommand}, - http::HttpClient, -}; - -mod pirita; - -use crate::http::{HttpRequest, HttpRequestOptions}; -use pirita::*; - -pub(crate) async fn fetch_webc( - cache_dir: &Path, - webc: &str, - client: &(dyn HttpClient + Send + Sync), - registry_endpoint: &Url, -) -> Result { - let name = webc.split_once(':').map(|a| a.0).unwrap_or_else(|| webc); - let (name, version) = match name.split_once('@') { - Some((name, version)) => (name, Some(version)), - None => (name, None), - }; - let query = match version { - Some(version) => WAPM_WEBC_QUERY_SPECIFIC - .replace(WAPM_WEBC_QUERY_TAG, name.replace('\"', "'").as_str()) - .replace(WAPM_WEBC_VERSION_TAG, version.replace('\"', "'").as_str()), - None => WAPM_WEBC_QUERY_LAST.replace(WAPM_WEBC_QUERY_TAG, name.replace('\"', "'").as_str()), - }; - tracing::debug!(query = query.as_str(), "Preparing GraphQL query"); - - let mut url = registry_endpoint.clone(); - url.query_pairs_mut().append_pair("query", &query); - - let response = client - .request(HttpRequest { - url: url.to_string(), - method: "GET".to_string(), - headers: vec![], - body: None, - options: HttpRequestOptions::default(), - }) - .await?; - - if response.status != 200 { - bail!(" http request failed with status {}", response.status); - } - let body = response.body.context("HTTP response with empty body")?; - let data: WapmWebQuery = - serde_json::from_slice(&body).context("Could not parse webc registry JSON data")?; - tracing::debug!("response: {:?}", data); - - let PiritaVersionedDownload { - url: download_url, - version, - } = wapm_extract_version(&data).context("No pirita download URL available")?; - let mut pkg = download_webc(cache_dir, name, download_url, client).await?; - pkg.version = version.parse()?; - Ok(pkg) -} - -struct PiritaVersionedDownload { - url: String, - version: String, -} - -fn wapm_extract_version(data: &WapmWebQuery) -> Option { - if let Some(package) = &data.data.get_package_version { - let url = package.distribution.pirita_download_url.clone()?; - Some(PiritaVersionedDownload { - url, - version: package.version.clone(), - }) - } else if let Some(package) = &data.data.get_package { - let url = package - .last_version - .distribution - .pirita_download_url - .clone()?; - Some(PiritaVersionedDownload { - url, - version: package.last_version.version.clone(), - }) - } else { - None - } -} - -pub fn parse_static_webc(data: Vec) -> Result { - let webc = Container::from_bytes(data)?; - parse_webc_v2(&webc).with_context(|| "Could not parse webc".to_string()) -} - -async fn download_webc( - cache_dir: &Path, - name: &str, - pirita_download_url: String, - client: &(dyn HttpClient + Send + Sync), -) -> Result { - let mut name_comps = pirita_download_url - .split('/') - .collect::>() - .into_iter() - .rev(); - let mut name = name_comps.next().unwrap_or(name); - let mut name_store; - for _ in 0..2 { - if let Some(prefix) = name_comps.next() { - name_store = format!("{}_{}", prefix, name); - name = name_store.as_str(); - } - } - let compute_path = |cache_dir: &Path, name: &str| { - let name = name.replace('/', "._."); - std::path::Path::new(cache_dir).join(&name) - }; - - // fast path - let path = compute_path(cache_dir, name); - - #[cfg(feature = "sys")] - if path.exists() { - tracing::debug!(path=%path.display(), "Parsing cached WEBC file"); - - match Container::from_disk(&path) { - Ok(webc) => { - return parse_webc_v2(&webc) - .with_context(|| format!("Could not parse webc at path '{}'", path.display())); - } - Err(err) => { - tracing::warn!( - error = &err as &dyn std::error::Error, - "failed to parse WEBC", - ); - } - } - } - if let Ok(data) = std::fs::read(&path) { - if let Ok(webc) = parse_static_webc(data) { - return Ok(webc); - } - } - - // slow path - let data = download_package(&pirita_download_url, client) - .await - .with_context(|| { - format!( - "Could not download webc package from '{}'", - pirita_download_url - ) - })?; - - #[cfg(feature = "sys")] - { - let path = compute_path(cache_dir, name); - std::fs::create_dir_all(path.parent().unwrap()).with_context(|| { - format!("Could not create cache directory '{}'", cache_dir.display()) - })?; - - let mut temp_path = path.clone(); - let rand_128: u128 = rand::random(); - temp_path = std::path::PathBuf::from(format!( - "{}.{}.temp", - temp_path.as_os_str().to_string_lossy(), - rand_128 - )); - - if let Err(err) = std::fs::write(temp_path.as_path(), &data[..]) { - tracing::debug!( - "failed to write webc cache file [{}] - {}", - temp_path.as_path().to_string_lossy(), - err - ); - } - if let Err(err) = std::fs::rename(temp_path.as_path(), path.as_path()) { - tracing::debug!( - "failed to rename webc cache file [{}] - {}", - temp_path.as_path().to_string_lossy(), - err - ); - } - - match Container::from_disk(&path) { - Ok(webc) => { - return parse_webc_v2(&webc) - .with_context(|| format!("Could not parse webc at path '{}'", path.display())) - } - Err(e) => { - tracing::warn!( - path=%temp_path.display(), - error=&e as &dyn std::error::Error, - "Unable to parse temporary WEBC from disk", - ) - } - } - } - - let webc = Container::from_bytes(data) - .with_context(|| format!("Failed to parse downloaded from '{pirita_download_url}'"))?; - let package = parse_webc_v2(&webc).context("Could not parse binary package")?; - - Ok(package) -} - -async fn download_package( - download_url: &str, - client: &(dyn HttpClient + Send + Sync), -) -> Result, anyhow::Error> { - let request = HttpRequest { - url: download_url.to_string(), - method: "GET".to_string(), - headers: vec![], - body: None, - options: HttpRequestOptions { - gzip: true, - cors_proxy: None, - }, - }; - let response = client.request(request).await?; - if response.status != 200 { - bail!("HTTP request failed with status {}", response.status); - } - response.body.context("HTTP response with empty body") -} - -fn parse_webc_v2(webc: &Container) -> Result { - let manifest = webc.manifest(); - - let wapm: webc::metadata::annotations::Wapm = manifest - .package_annotation("wapm")? - .context("The package must have 'wapm' annotations")?; - - let mut commands = HashMap::new(); - - for (name, cmd) in &manifest.commands { - if let Some(cmd) = load_binary_command(webc, name, cmd)? { - commands.insert(name.as_str(), cmd); - } - } - - let entry = manifest.entrypoint.as_deref().and_then(|entry| { - let cmd = commands.get(entry)?; - Some(cmd.atom.clone()) - }); - - let webc_fs = WebcVolumeFileSystem::mount_all(webc); - - // List all the dependencies - let uses: Vec<_> = manifest - .use_map - .values() - .filter_map(|uses| match uses { - UrlOrManifest::Url(url) => Some(url.path()), - UrlOrManifest::Manifest(manifest) => manifest.origin.as_deref(), - UrlOrManifest::RegistryDependentUrl(url) => Some(url), - }) - .map(String::from) - .collect(); - - let module_memory_footprint = entry.as_deref().map(|b| b.len() as u64).unwrap_or(0); - let file_system_memory_footprint = count_file_system(&webc_fs, Path::new("/")); - - let pkg = BinaryPackage { - package_name: wapm.name, - when_cached: Some( - crate::syscalls::platform_clock_time_get(Snapshot0Clockid::Monotonic, 1_000_000) - .unwrap() as u128, - ), - entry: entry.map(Into::into), - hash: OnceCell::new(), - webc_fs: Some(Arc::new(webc_fs)), - commands: Arc::new(RwLock::new(commands.into_values().collect())), - uses, - version: wapm.version.parse()?, - module_memory_footprint, - file_system_memory_footprint, - }; - - Ok(pkg) -} - -fn load_binary_command( - webc: &Container, - name: &str, - cmd: &webc::metadata::Command, -) -> Result, anyhow::Error> { - let atom_name = match atom_name_for_command(name, cmd)? { - Some(name) => name, - None => { - tracing::warn!( - cmd.name=name, - cmd.runner=%cmd.runner, - "Skipping unsupported command", - ); - return Ok(None); - } - }; - - let atom = webc.get_atom(&atom_name); - - if atom.is_none() && cmd.annotations.is_empty() { - return Ok(legacy_atom_hack(webc, name)); - } - - let atom = atom - .with_context(|| format!("The '{name}' command uses the '{atom_name}' atom, but it isn't present in the WEBC file"))?; - - let cmd = BinaryPackageCommand::new(name.to_string(), atom); - - Ok(Some(cmd)) -} - -fn atom_name_for_command( - command_name: &str, - cmd: &webc::metadata::Command, -) -> Result, anyhow::Error> { - use webc::metadata::annotations::{Emscripten, Wasi}; - - if let Some(Wasi { atom, .. }) = cmd - .annotation("wasi") - .context("Unable to deserialize 'wasi' annotations")? - { - return Ok(Some(atom)); - } - - if let Some(Emscripten { - atom: Some(atom), .. - }) = cmd - .annotation("emscripten") - .context("Unable to deserialize 'emscripten' annotations")? - { - return Ok(Some(atom)); - } - - if [WASI_RUNNER_URI, WCGI_RUNNER_URI, EMSCRIPTEN_RUNNER_URI] - .iter() - .any(|uri| cmd.runner.starts_with(uri)) - { - // Note: We use the command name as the atom name as a special case - // for known runner types because sometimes people will construct - // a manifest by hand instead of using wapm2pirita. - tracing::debug!( - command = command_name, - "No annotations specifying the atom name found. Falling back to the command name" - ); - return Ok(Some(command_name.to_string())); - } - - Ok(None) -} - -/// HACK: Some older packages like `sharrattj/bash` and `sharrattj/coreutils` -/// contain commands with no annotations. When this happens, you can just assume -/// it wants to use the first atom in the WEBC file. -/// -/// That works because most of these packages only have a single atom (e.g. in -/// `sharrattj/coreutils` there are commands for `ls`, `pwd`, and so on, but -/// under the hood they all use the `coreutils` atom). -/// -/// See -/// for more. -fn legacy_atom_hack(webc: &Container, command_name: &str) -> Option { - let (name, atom) = webc.atoms().into_iter().next()?; - - tracing::debug!( - command_name, - atom.name = name.as_str(), - atom.len = atom.len(), - "(hack) The command metadata is malformed. Falling back to the first atom in the WEBC file", - ); - - Some(BinaryPackageCommand::new(command_name.to_string(), atom)) -} - -fn count_file_system(fs: &dyn FileSystem, path: &Path) -> u64 { - let mut total = 0; - - let dir = match fs.read_dir(path) { - Ok(d) => d, - Err(_err) => { - // TODO: propagate error? - return 0; - } - }; - - for res in dir { - match res { - Ok(entry) => { - if let Ok(meta) = entry.metadata() { - total += meta.len(); - if meta.is_dir() { - total += count_file_system(fs, entry.path.as_path()); - } - } - } - Err(_err) => { - // TODO: propagate error? - } - }; - } - - total -} - -#[cfg(test)] -mod tests { - use std::collections::BTreeMap; - - use super::*; - - const PYTHON: &[u8] = include_bytes!("../../../c-api/examples/assets/python-0.1.0.wasmer"); - const COREUTILS: &[u8] = include_bytes!("../../../../tests/integration/cli/tests/webc/coreutils-1.0.16-e27dbb4f-2ef2-4b44-b46a-ddd86497c6d7.webc"); - const BASH: &[u8] = include_bytes!("../../../../tests/integration/cli/tests/webc/bash-1.0.16-f097441a-a80b-4e0d-87d7-684918ef4bb6.webc"); - const HELLO: &[u8] = include_bytes!("../../../../tests/integration/cli/tests/webc/hello-0.1.0-665d2ddc-80e6-4845-85d3-4587b1693bb7.webc"); - - #[test] - fn parse_the_python_webc_file() { - let python = webc::compat::Container::from_bytes(PYTHON).unwrap(); - - let pkg = parse_webc_v2(&python).unwrap(); - - assert_eq!(pkg.package_name, "python"); - assert_eq!(pkg.version.to_string(), "0.1.0"); - assert_eq!(pkg.uses, Vec::::new()); - assert_eq!(pkg.module_memory_footprint, 4694941); - assert_eq!(pkg.file_system_memory_footprint, 13387764); - let python_atom = python.get_atom("python").unwrap(); - assert_eq!(pkg.entry.as_deref(), Some(python_atom.as_slice())); - let commands = pkg.commands.read().unwrap(); - let commands: BTreeMap<&str, &[u8]> = commands - .iter() - .map(|cmd| (cmd.name(), cmd.atom())) - .collect(); - let command_names: Vec<_> = commands.keys().copied().collect(); - assert_eq!(command_names, &["python"]); - assert_eq!(commands["python"], python_atom); - - // Note: It's important that the entry we parse doesn't allocate, so - // make sure it lies within the original PYTHON buffer. - let bounds = PYTHON.as_ptr_range(); - - let entry_ptr = pkg.entry.as_deref().unwrap().as_ptr(); - assert!(bounds.start <= entry_ptr && entry_ptr < bounds.end); - - let python_cmd_ptr = commands["python"].as_ptr(); - assert!(bounds.start <= python_cmd_ptr && python_cmd_ptr < bounds.end); - } - - #[test] - fn parse_a_webc_with_multiple_commands() { - let coreutils = Container::from_bytes(COREUTILS).unwrap(); - - let pkg = parse_webc_v2(&coreutils).unwrap(); - - assert_eq!(pkg.package_name, "sharrattj/coreutils"); - assert_eq!(pkg.version.to_string(), "1.0.16"); - assert_eq!(pkg.uses, Vec::::new()); - assert_eq!(pkg.module_memory_footprint, 0); - assert_eq!(pkg.file_system_memory_footprint, 44); - assert_eq!(pkg.entry, None); - let commands = pkg.commands.read().unwrap(); - let commands: BTreeMap<&str, &[u8]> = commands - .iter() - .map(|cmd| (cmd.name(), cmd.atom())) - .collect(); - let command_names: Vec<_> = commands.keys().copied().collect(); - assert_eq!( - command_names, - &[ - "arch", - "base32", - "base64", - "baseenc", - "basename", - "cat", - "chcon", - "chgrp", - "chmod", - "chown", - "chroot", - "cksum", - "comm", - "cp", - "csplit", - "cut", - "date", - "dd", - "df", - "dircolors", - "dirname", - "du", - "echo", - "env", - "expand", - "expr", - "factor", - "false", - "fmt", - "fold", - "groups", - "hashsum", - "head", - "hostid", - "hostname", - "id", - "install", - "join", - "kill", - "link", - "ln", - "logname", - "ls", - "mkdir", - "mkfifo", - "mknod", - "mktemp", - "more", - "mv", - "nice", - "nl", - "nohup", - "nproc", - "numfmt", - "od", - "paste", - "pathchk", - "pinky", - "pr", - "printenv", - "printf", - "ptx", - "pwd", - "readlink", - "realpath", - "relpath", - "rm", - "rmdir", - "runcon", - "seq", - "sh", - "shred", - "shuf", - "sleep", - "sort", - "split", - "stat", - "stdbuf", - "sum", - "sync", - "tac", - "tail", - "tee", - "test", - "timeout", - "touch", - "tr", - "true", - "truncate", - "tsort", - "tty", - "uname", - "unexpand", - "uniq", - "unlink", - "uptime", - "users", - "wc", - "who", - "whoami", - "yes", - ] - ); - let coreutils_atom = coreutils.get_atom("coreutils").unwrap(); - for (cmd, atom) in commands { - assert_eq!(atom.len(), coreutils_atom.len(), "{cmd}"); - assert_eq!(atom, coreutils_atom, "{cmd}"); - } - } - - #[test] - fn parse_a_webc_with_dependencies() { - let bash = webc::compat::Container::from_bytes(BASH).unwrap(); - - let pkg = parse_webc_v2(&bash).unwrap(); - - assert_eq!(pkg.package_name, "sharrattj/bash"); - assert_eq!(pkg.version.to_string(), "1.0.16"); - assert_eq!(pkg.uses, &["sharrattj/coreutils@1.0.16"]); - assert_eq!(pkg.module_memory_footprint, 1847052); - assert_eq!(pkg.file_system_memory_footprint, 0); - let commands = pkg.commands.read().unwrap(); - let commands: BTreeMap<&str, &[u8]> = commands - .iter() - .map(|cmd| (cmd.name(), cmd.atom())) - .collect(); - let command_names: Vec<_> = commands.keys().copied().collect(); - assert_eq!(command_names, &["bash"]); - assert_eq!(commands["bash"], bash.get_atom("bash").unwrap()); - } - - #[test] - fn parse_a_webc_with_dependencies_and_no_commands() { - let pkg = parse_static_webc(HELLO.to_vec()).unwrap(); - - assert_eq!(pkg.package_name, "wasmer/hello"); - assert_eq!(pkg.version.to_string(), "0.1.0"); - let commands = pkg.commands.read().unwrap(); - assert!(commands.is_empty()); - assert!(pkg.entry.is_none()); - assert_eq!(pkg.uses, ["sharrattj/static-web-server@1"]); - } -} diff --git a/lib/wasi/src/wapm/pirita.rs b/lib/wasi/src/wapm/pirita.rs deleted file mode 100644 index fbe93e25041..00000000000 --- a/lib/wasi/src/wapm/pirita.rs +++ /dev/null @@ -1,75 +0,0 @@ -use serde::*; - -#[allow(dead_code)] -pub const WAPM_WEBC_QUERY_ALL: &str = r#" -{ - getPackage(name: "") { - versions { - version, - distribution { - downloadUrl, - piritaDownloadUrl - } - } - } -}"#; -pub const WAPM_WEBC_QUERY_LAST: &str = r#" -{ - getPackage(name: "") { - lastVersion { - version, - distribution { - downloadUrl, - piritaDownloadUrl - } - } - } -}"#; -pub const WAPM_WEBC_QUERY_SPECIFIC: &str = r#" -{ - getPackageVersion(name: "", version: "") { - version, - distribution { - downloadUrl, - piritaDownloadUrl - } - } -}"#; -pub const WAPM_WEBC_QUERY_TAG: &str = ""; -pub const WAPM_WEBC_VERSION_TAG: &str = ""; - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct WapmWebQueryGetPackageLastVersionDistribution { - #[serde(rename = "downloadUrl")] - pub download_url: Option, - #[serde(rename = "piritaDownloadUrl")] - pub pirita_download_url: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct WapmWebQueryGetPackageVersion { - #[serde(rename = "version")] - pub version: String, - #[serde(rename = "distribution")] - pub distribution: WapmWebQueryGetPackageLastVersionDistribution, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct WapmWebQueryGetPackage { - #[serde(rename = "lastVersion")] - pub last_version: WapmWebQueryGetPackageVersion, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct WapmWebQueryData { - #[serde(rename = "getPackage")] - pub get_package: Option, - #[serde(rename = "getPackageVersion")] - pub get_package_version: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct WapmWebQuery { - #[serde(rename = "data")] - pub data: WapmWebQueryData, -} diff --git a/lib/wasi/tests/runners.rs b/lib/wasi/tests/runners.rs index e55f9c483b2..9977d21e87d 100644 --- a/lib/wasi/tests/runners.rs +++ b/lib/wasi/tests/runners.rs @@ -1,74 +1,70 @@ #![cfg(feature = "webc_runner")] -use std::{path::Path, time::Duration}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; use once_cell::sync::Lazy; use reqwest::Client; -use wasmer_wasix::runners::Runner; +use tokio::runtime::Handle; +use wasmer::Engine; +use wasmer_wasix::{ + runners::Runner, + runtime::{ + module_cache::{FileSystemCache, ModuleCache, SharedCache}, + task_manager::tokio::TokioTaskManager, + }, + PluggableRuntime, WasiRuntime, +}; use webc::Container; #[cfg(feature = "webc_runner_rt_wasi")] mod wasi { - use tokio::runtime::Handle; - use wasmer::Store; - use wasmer_wasix::{ - runners::wasi::WasiRunner, runtime::task_manager::tokio::TokioTaskManager, WasiError, - }; + use wasmer_wasix::{bin_factory::BinaryPackage, runners::wasi::WasiRunner, WasiError}; use super::*; #[tokio::test] async fn can_run_wat2wasm() { let webc = download_cached("https://wapm.io/wasmer/wabt").await; - let store = Store::default(); let container = Container::from_bytes(webc).unwrap(); - let runner = WasiRunner::new(store); let command = &container.manifest().commands["wat2wasm"]; - assert!(runner.can_run_command("wat2wasm", command).unwrap()); + assert!(WasiRunner::can_run_command(command).unwrap()); } #[tokio::test] async fn wat2wasm() { let webc = download_cached("https://wapm.io/wasmer/wabt").await; - let store = Store::default(); - let tasks = TokioTaskManager::new(Handle::current()); let container = Container::from_bytes(webc).unwrap(); + let rt = runtime(); + let pkg = BinaryPackage::from_webc(&container, &rt).await.unwrap(); // Note: we don't have any way to intercept stdin or stdout, so blindly // assume that everything is fine if it runs successfully. let handle = std::thread::spawn(move || { - WasiRunner::new(store) - .with_task_manager(tasks) + WasiRunner::new() .with_args(["--version"]) - .run_cmd(&container, "wat2wasm") + .run_command("wat2wasm", &pkg, Arc::new(rt)) }); - let err = handle.join().unwrap().unwrap_err(); - dbg!(&err); + let result = handle.join().unwrap(); - let runtime_error = err - .chain() - .find_map(|e| e.downcast_ref::()) - .unwrap(); - let exit_code = match runtime_error { - WasiError::Exit(code) => *code, - other => unreachable!("Something else went wrong: {:?}", other), - }; - assert!(exit_code.is_success()); + assert!(result.is_ok()); } #[tokio::test] async fn python() { let webc = download_cached("https://wapm.io/python/python").await; - let store = Store::default(); - let tasks = TokioTaskManager::new(Handle::current()); + let rt = runtime(); let container = Container::from_bytes(webc).unwrap(); + let pkg = BinaryPackage::from_webc(&container, &rt).await.unwrap(); let handle = std::thread::spawn(move || { - WasiRunner::new(store) - .with_task_manager(tasks) + WasiRunner::new() .with_args(["-c", "import sys; sys.exit(42)"]) - .run_cmd(&container, "python") + .run_command("python", &pkg, Arc::new(rt)) }); let err = handle.join().unwrap().unwrap_err(); @@ -86,12 +82,12 @@ mod wasi { #[cfg(feature = "webc_runner_rt_wcgi")] mod wcgi { - use std::future::Future; + use std::{future::Future, sync::Arc}; use futures::{channel::mpsc::Sender, future::AbortHandle, SinkExt, StreamExt}; use rand::Rng; use tokio::runtime::Handle; - use wasmer_wasix::{runners::wcgi::WcgiRunner, runtime::task_manager::tokio::TokioTaskManager}; + use wasmer_wasix::{bin_factory::BinaryPackage, runners::wcgi::WcgiRunner}; use super::*; @@ -99,31 +95,28 @@ mod wcgi { async fn can_run_staticserver() { let webc = download_cached("https://wapm.io/Michael-F-Bryan/staticserver").await; let container = Container::from_bytes(webc).unwrap(); - let runner = WcgiRunner::new("staticserver"); let entrypoint = container.manifest().entrypoint.as_ref().unwrap(); - assert!(runner - .can_run_command(entrypoint, &container.manifest().commands[entrypoint]) - .unwrap()); + assert!(WcgiRunner::can_run_command(&container.manifest().commands[entrypoint]).unwrap()); } #[tokio::test] async fn staticserver() { let webc = download_cached("https://wapm.io/Michael-F-Bryan/staticserver").await; - let tasks = TokioTaskManager::new(Handle::current()); + let rt = runtime(); let container = Container::from_bytes(webc).unwrap(); - let mut runner = WcgiRunner::new("staticserver"); + let mut runner = WcgiRunner::new(); let port = rand::thread_rng().gen_range(10000_u16..65535_u16); let (cb, started) = callbacks(Handle::current()); runner .config() .addr(([127, 0, 0, 1], port).into()) - .task_manager(tasks) .callbacks(cb); + let pkg = BinaryPackage::from_webc(&container, &rt).await.unwrap(); // The server blocks so we need to start it on a background thread. let join_handle = std::thread::spawn(move || { - runner.run(&container).unwrap(); + runner.run_command("serve", &pkg, Arc::new(rt)).unwrap(); }); // wait for the server to have started @@ -186,7 +179,7 @@ async fn download_cached(url: &str) -> bytes::Bytes { let uri: http::Uri = url.parse().unwrap(); let file_name = Path::new(uri.path()).file_name().unwrap(); - let cache_dir = Path::new(env!("CARGO_TARGET_TMPDIR")).join(module_path!()); + let cache_dir = tmp_dir().join("downloads"); let cached_path = cache_dir.join(file_name); if cached_path.exists() { @@ -224,3 +217,22 @@ fn client() -> Client { }); CLIENT.clone() } + +fn runtime() -> impl WasiRuntime + Send + Sync { + let tasks = TokioTaskManager::new(Handle::current()); + let mut rt = PluggableRuntime::new(Arc::new(tasks)); + + let cache = + SharedCache::default().with_fallback(FileSystemCache::new(tmp_dir().join("compiled"))); + + rt.set_engine(Some(Engine::default())) + .set_module_cache(cache); + + rt +} + +fn tmp_dir() -> PathBuf { + Path::new(env!("CARGO_TARGET_TMPDIR")) + .join(env!("CARGO_PKG_NAME")) + .join(module_path!()) +} diff --git a/tests/integration/cli/Cargo.toml b/tests/integration/cli/Cargo.toml index b2506c8d547..2f4d6034205 100644 --- a/tests/integration/cli/Cargo.toml +++ b/tests/integration/cli/Cargo.toml @@ -20,6 +20,8 @@ reqwest = { version = "0.11.14", features = ["json", "blocking"] } tokio = { version = "1", features = [ "rt", "rt-multi-thread", "macros" ] } assert_cmd = "2.0.8" predicates = "2.1.5" +once_cell = "1.17.1" +futures = "0.3.28" [dependencies] anyhow = "1" diff --git a/tests/integration/cli/tests/create_exe.rs b/tests/integration/cli/tests/create_exe.rs index 2df10552e03..e368d422d6b 100644 --- a/tests/integration/cli/tests/create_exe.rs +++ b/tests/integration/cli/tests/create_exe.rs @@ -63,7 +63,7 @@ impl WasmerCreateExe { output.current_dir(&self.current_dir); output.arg("create-exe"); output.arg(&self.wasm_path.canonicalize()?); - output.arg(&self.compiler.to_flag()); + output.arg(self.compiler.to_flag()); output.args(self.extra_cli_flags.iter()); output.arg("-o"); output.arg(&self.native_executable_path); @@ -138,7 +138,7 @@ impl WasmerCreateObj { output.current_dir(&self.current_dir); output.arg("create-obj"); output.arg(&self.wasm_path.canonicalize()?); - output.arg(&self.compiler.to_flag()); + output.arg(self.compiler.to_flag()); output.args(self.extra_cli_flags.iter()); output.arg("-o"); output.arg(&self.output_object_path); @@ -429,9 +429,9 @@ fn create_exe_works_underscore_module_name() -> anyhow::Result<()> { let executable_path = operating_dir.join("multicommand.exe"); WasmerCreateExe { - current_dir: operating_dir.clone(), + current_dir: operating_dir, wasm_path, - native_executable_path: executable_path.clone(), + native_executable_path: executable_path, compiler: Compiler::Cranelift, extra_cli_flags: create_exe_flags, ..Default::default() @@ -638,7 +638,7 @@ fn create_exe_with_object_input(args: Vec) -> anyhow::Result<()> { #[cfg(windows)] let executable_path = operating_dir.join("wasm.exe"); - let mut create_exe_args = args.clone(); + let mut create_exe_args = args; create_exe_args.push("--precompiled-atom".to_string()); create_exe_args.push(format!("qjs:abc123:{}", object_path.display())); create_exe_args.push("--debug-dir".to_string()); diff --git a/tests/integration/cli/tests/run_unstable.rs b/tests/integration/cli/tests/run_unstable.rs index c11495f07a0..6e8d424b735 100644 --- a/tests/integration/cli/tests/run_unstable.rs +++ b/tests/integration/cli/tests/run_unstable.rs @@ -9,29 +9,41 @@ use std::{ }; use assert_cmd::{assert::Assert, prelude::OutputAssertExt}; +use once_cell::sync::Lazy; use predicates::str::contains; +use rand::Rng; use reqwest::{blocking::Client, IntoUrl}; use tempfile::TempDir; use wasmer_integration_tests_cli::get_wasmer_path; -const RUST_LOG: &str = "info,wasmer_wasi::runners=debug,virtual_fs::trace_fs=trace"; const HTTP_GET_TIMEOUT: Duration = Duration::from_secs(5); +static RUST_LOG: Lazy = Lazy::new(|| { + [ + "info", + "wasmer_wasix::resolve=debug", + "wasmer_wasix::runners=debug", + "wasmer_wasix=debug", + "virtual_fs::trace_fs=trace", + ] + .join(",") +}); + fn wasmer_run_unstable() -> std::process::Command { let mut cmd = std::process::Command::new("cargo"); cmd.arg("run") .arg("--quiet") .arg("--package=wasmer-cli") .arg("--features=singlepass,cranelift") + .arg("--color=never") .arg("--") .arg("run-unstable"); - cmd.env("RUST_LOG", RUST_LOG); + cmd.env("RUST_LOG", &*RUST_LOG); cmd } mod webc_on_disk { use super::*; - use rand::Rng; #[test] #[cfg_attr( @@ -88,6 +100,35 @@ mod webc_on_disk { assert.success().stdout(contains("Hello, World!")); } + #[test] + #[cfg_attr( + all(target_env = "musl", target_os = "linux"), + ignore = "wasmer run-unstable segfaults on musl" + )] + fn wasi_runner_with_dependencies() { + let mut cmd = wasmer_run_unstable(); + let port = random_port(); + cmd.arg(fixtures::hello()) + .arg(format!("--env=SERVER_PORT={port}")) + .arg("--net") + .arg("--") + .arg("--log-level=info"); + let mut child = JoinableChild::spawn(cmd); + child.wait_for_stderr("listening"); + + // Make sure we get the page we want + let html = reqwest::blocking::get(format!("http://localhost:{port}/")) + .unwrap() + .text() + .unwrap(); + assert!(html.contains("Hello World"), "{html}"); + + // and make sure our request was logged + child + .join() + .stderr(contains("incoming request: method=GET uri=/")); + } + #[test] #[cfg_attr( all(target_env = "musl", target_os = "linux"), @@ -96,7 +137,7 @@ mod webc_on_disk { fn webc_files_with_multiple_commands_require_an_entrypoint_flag() { let assert = wasmer_run_unstable().arg(fixtures::wabt()).assert(); - let msg = r#"Unable to determine the WEBC file's entrypoint. Please choose one of ["wat2wasm", "wast2json", "wasm2wat", "wasm-interp", "wasm-validate", "wasm-strip"]"#; + let msg = r#"Unable to determine the WEBC file's entrypoint. Please choose one of ["wasm-interp", "wasm-strip", "wasm-validate", "wasm2wat", "wast2json", "wat2wasm"]"#; assert.failure().stderr(contains(msg)); } @@ -125,7 +166,7 @@ mod webc_on_disk { )] fn wcgi_runner() { // Start the WCGI server in the background - let port = rand::thread_rng().gen_range(10_000_u16..u16::MAX); + let port = random_port(); let mut cmd = wasmer_run_unstable(); cmd.arg(format!("--addr=127.0.0.1:{port}")) .arg(fixtures::static_server()); @@ -161,7 +202,7 @@ mod webc_on_disk { let temp = TempDir::new().unwrap(); std::fs::write(temp.path().join("file.txt"), "Hello, World!").unwrap(); // Start the WCGI server in the background - let port = rand::thread_rng().gen_range(10_000_u16..u16::MAX); + let port = random_port(); let mut cmd = wasmer_run_unstable(); cmd.arg(format!("--addr=127.0.0.1:{port}")) .arg(format!("--mapdir=/path/to:{}", temp.path().display())) @@ -294,7 +335,7 @@ mod remote_webc { let assert = wasmer_run_unstable() .arg("saghul/quickjs") .arg("--entrypoint=quickjs") - .arg("--registry=https://wapm.io/") + .arg("--registry=wapm.io") .arg("--") .arg("--eval") .arg("console.log('Hello, World!')") @@ -319,6 +360,33 @@ mod remote_webc { assert.success().stdout(contains("Hello, World!")); } + + #[test] + #[cfg_attr( + all(target_env = "musl", target_os = "linux"), + ignore = "wasmer run-unstable segfaults on musl" + )] + #[cfg_attr( + windows, + ignore = "TODO(Michael-F-Bryan): Figure out why WasiFs::get_inode_at_path_inner() returns Errno::notcapable on Windows" + )] + fn bash_using_coreutils() { + let assert = wasmer_run_unstable() + .arg("sharrattj/bash") + .arg("--entrypoint=bash") + .arg("--use=sharrattj/coreutils") + .arg("--registry=wapm.io") + .arg("--") + .arg("-c") + .arg("ls /bin") + .assert(); + + let some_expected_binaries = [ + "arch", "base32", "base64", "baseenc", "basename", "bash", "cat", + ] + .join("\n"); + assert.success().stdout(contains(some_expected_binaries)); + } } mod fixtures { @@ -347,6 +415,13 @@ mod fixtures { Path::new(C_ASSET_PATH).join("qjs.wasm") } + pub fn hello() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("webc") + .join("hello-0.1.0-665d2ddc-80e6-4845-85d3-4587b1693bb7.webc") + } + /// The `wasmer.toml` file for QuickJS. pub fn qjs_wasmer_toml() -> PathBuf { Path::new(C_ASSET_PATH).join("qjs-wasmer.toml") @@ -381,23 +456,25 @@ impl JoinableChild { /// Keep reading lines from the child's stdout until a line containing the /// desired text is found. fn wait_for_stdout(&mut self, text: &str) -> String { - let stderr = self + let stdout = self .0 .as_mut() .and_then(|child| child.stdout.as_mut()) .unwrap(); - let mut all_output = String::new(); + wait_for(text, stdout) + } - loop { - let line = read_line(stderr).unwrap(); - let found = line.contains(text); - all_output.push_str(&line); + /// Keep reading lines from the child's stderr until a line containing the + /// desired text is found. + fn wait_for_stderr(&mut self, text: &str) -> String { + let stderr = self + .0 + .as_mut() + .and_then(|child| child.stderr.as_mut()) + .unwrap(); - if found { - return all_output; - } - } + wait_for(text, stderr) } /// Kill the underlying [`std::process::Child`] and get an [`Assert`] we @@ -409,6 +486,27 @@ impl JoinableChild { } } +fn wait_for(text: &str, reader: &mut dyn Read) -> String { + let mut all_output = String::new(); + + loop { + let line = read_line(reader).unwrap(); + + if line.is_empty() { + eprintln!("=== All Output === "); + eprintln!("{all_output}"); + panic!("EOF before \"{text}\" was found"); + } + + let found = line.contains(text); + all_output.push_str(&line); + + if found { + return all_output; + } + } +} + fn read_line(reader: &mut dyn Read) -> Result { let mut line = Vec::new(); @@ -477,3 +575,7 @@ fn http_get(url: impl IntoUrl) -> Result { panic!("Didn't receive a response from \"{url}\" within the allocated time"); } + +fn random_port() -> u16 { + rand::thread_rng().gen_range(10_000_u16..u16::MAX) +} diff --git a/tests/integration/cli/tests/snapshot.rs b/tests/integration/cli/tests/snapshot.rs index a0179384161..b9cabe45750 100644 --- a/tests/integration/cli/tests/snapshot.rs +++ b/tests/integration/cli/tests/snapshot.rs @@ -3,10 +3,12 @@ use std::{ path::{Path, PathBuf}, process::{Child, Stdio}, sync::Arc, + time::Duration, }; +use anyhow::Error; use derivative::Derivative; -#[cfg(test)] +use futures::TryFutureExt; use insta::assert_json_snapshot; use tempfile::NamedTempFile; @@ -50,7 +52,7 @@ pub struct TestSpec { } fn is_false(b: &bool) -> bool { - *b == false + !(*b) } static WEBC_BASH: &[u8] = @@ -61,10 +63,10 @@ static WEBC_COREUTILS_11: &[u8] = include_bytes!("./webc/coreutils-1.0.11-9d7746ca-694f-11ed-b932-dead3543c068.webc"); static WEBC_DASH: &[u8] = include_bytes!("./webc/dash-1.0.18-f0d13233-bcda-4cf1-9a23-3460bffaae2a.webc"); -static WEBC_PYTHON: &'static [u8] = include_bytes!("./webc/python-0.1.0.webc"); -static WEBC_WEB_SERVER: &'static [u8] = +static WEBC_PYTHON: &[u8] = include_bytes!("./webc/python-0.1.0.webc"); +static WEBC_WEB_SERVER: &[u8] = include_bytes!("./webc/static-web-server-1.0.96-e2b80276-c194-473d-bbd0-27c8a2c96a59.webc"); -static WEBC_WASMER_SH: &'static [u8] = +static WEBC_WASMER_SH: &[u8] = include_bytes!("./webc/wasmer-sh-1.0.63-dd3d67d1-de94-458c-a9ee-caea3b230ccf.webc"); impl std::fmt::Debug for TestSpec { @@ -119,7 +121,7 @@ pub struct TestBuilder { spec: TestSpec, } -type RunWith = Box Result + 'static>; +type RunWith = Box Result + 'static>; impl TestBuilder { pub fn new() -> Self { @@ -300,7 +302,7 @@ pub fn run_test_with(spec: TestSpec, code: &[u8], with: RunWith) -> TestResult { } for pkg in &spec.use_packages { - cmd.args(["--use", &pkg]); + cmd.args(["--use", pkg]); } for pkg in &spec.include_webcs { @@ -374,28 +376,24 @@ pub fn run_test_with(spec: TestSpec, code: &[u8], with: RunWith) -> TestResult { // we do some post processing to replace the temporary random name of the binary // with a fixed name as otherwise the results are not comparable. this occurs // because bash (and others) use the process name in the printf on stdout - let stdout = stdout - .replace( - wasm_path - .path() - .file_name() - .unwrap() - .to_string_lossy() - .as_ref(), - "test.wasm", - ) - .to_string(); - let stderr = stderr - .replace( - wasm_path - .path() - .file_name() - .unwrap() - .to_string_lossy() - .as_ref(), - "test.wasm", - ) - .to_string(); + let stdout = stdout.replace( + wasm_path + .path() + .file_name() + .unwrap() + .to_string_lossy() + .as_ref(), + "test.wasm", + ); + let stderr = stderr.replace( + wasm_path + .path() + .file_name() + .unwrap() + .to_string_lossy() + .as_ref(), + "test.wasm", + ); TestResult::Success(TestOutput { stdout, @@ -417,16 +415,16 @@ pub fn build_snapshot(mut spec: TestSpec, code: &[u8]) -> TestSnapshot { .map(|status| status.code().unwrap_or_default()) }), ); - let snapshot = TestSnapshot { spec, result }; - snapshot + + TestSnapshot { spec, result } } pub fn build_snapshot_with(mut spec: TestSpec, code: &[u8], with: RunWith) -> TestSnapshot { spec.wasm_hash = format!("{:x}", md5::compute(code)); let result = run_test_with(spec.clone(), code, with); - let snapshot = TestSnapshot { spec, result }; - snapshot + + TestSnapshot { spec, result } } pub fn snapshot_file(path: &Path, spec: TestSpec) -> TestSnapshot { @@ -464,7 +462,10 @@ fn test_snapshot_condvar() { assert_json_snapshot!(snapshot); } -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_condvar_async() { let snapshot = TestBuilder::new() @@ -496,7 +497,7 @@ fn test_snapshot_stdin_stdout_stderr() { let snapshot = TestBuilder::new() .with_name(function!()) .stdin_str("blah") - .args(&["tee", "/dev/stderr"]) + .args(["tee", "/dev/stderr"]) .run_wasm(include_bytes!("./wasm/coreutils.wasm")); assert_json_snapshot!(snapshot); } @@ -524,7 +525,10 @@ fn test_snapshot_epoll() { assert_json_snapshot!(snapshot); } -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_epoll_async() { let snapshot = TestBuilder::new() @@ -604,47 +608,49 @@ fn test_run_http_request( port: u16, what: &str, expected_size: Option, -) -> Result { +) -> Result { let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .build()?; - let http_get = move |url, max_retries: i32| { + let http_get = move |url: String, max_retries: i32| { rt.block_on(async move { - for n in 0..(max_retries.max(1)) { - println!("http request: {}", &url); - tokio::select! { - resp = reqwest::get(&url) => { - let resp = match resp { - Ok(a) => a, - Err(_) if n < max_retries => { - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - continue; - } - Err(err) => return Err(err.into()) - }; - if resp.status().is_success() == false { - return Err(anyhow::format_err!("incorrect status code: {}", resp.status())); - } - return Ok(resp.bytes().await?); - } - _ = tokio::time::sleep(std::time::Duration::from_secs(2)) => { - eprintln!("retrying request... ({} attempts)", (n+1)); + let mut n = 1; + + loop { + println!("http request (attempt #{n}): {url}"); + + let pending_request = reqwest::get(&url) + .and_then(|r| futures::future::ready(r.error_for_status())) + .and_then(|r| r.bytes()); + + match tokio::time::timeout(Duration::from_secs(2), pending_request) + .await + .map_err(Error::from) + .and_then(|result| result.map_err(Error::from)) + { + Ok(body) => return Ok(body), + Err(e) if n <= max_retries => { + eprintln!("non-fatal error: {e}... Retrying"); + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + n += 1; continue; } + Err(e) => { + return Err(e); + } } } - Err(anyhow::format_err!("timeout while performing HTTP request")) }) }; let expected_size = match expected_size { None => { let url = format!("http://localhost:{}/{}.size", port, what); - let expected_size = usize::from_str_radix( - String::from_utf8_lossy(http_get(url, 50)?.as_ref()).trim(), - 10, - )?; + let expected_size = String::from_utf8_lossy(http_get(url, 50)?.as_ref()) + .trim() + .parse()?; if expected_size == 0 { return Err(anyhow::format_err!("There was no data returned")); } @@ -690,7 +696,10 @@ fn test_snapshot_tokio() { assert_json_snapshot!(snapshot); } -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_unix_pipe() { let snapshot = TestBuilder::new() @@ -699,8 +708,12 @@ fn test_snapshot_unix_pipe() { assert_json_snapshot!(snapshot); } -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] #[test] +// #[cfg_attr( +// any(target_env = "musl", target_os = "macos", target_os = "windows"), +// ignore +// )] +#[ignore = "TODO(Michael-F-Bryan): figure out why the request body doesn't get sent fully on Linux"] fn test_snapshot_web_server() { let name: &str = function!(); let port = 7777; @@ -714,7 +727,8 @@ fn test_snapshot_web_server() { let script = format!( r#" cat /public/main.js | wc -c > /public/main.js.size -rm -f /cfg/config.toml +rm -f /cfg/ +cd /public /bin/webserver --log-level warn --root /public --port {}"#, port ); @@ -776,7 +790,10 @@ fn test_snapshot_fork_and_exec() { // The ability to fork the current process and run a different image but retain // the existing open file handles (which is needed for stdin and stdout redirection) #[cfg(not(target_os = "windows"))] -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_fork_and_exec_async() { let snapshot = TestBuilder::new() @@ -811,7 +828,10 @@ fn test_snapshot_fork() { } // Simple fork example that is a crude multi-threading implementation - used by `dash` -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_fork_async() { let snapshot = TestBuilder::new() @@ -851,7 +871,10 @@ fn test_snapshot_longjump_fork() { // This test ensures that the stacks that have been recorded are preserved // after a fork. // The behavior is needed for `dash` -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_longjump_fork_async() { let snapshot = TestBuilder::new() @@ -920,7 +943,10 @@ fn test_snapshot_sleep() { // full multi-threading with shared memory and shared compiled modules #[cfg(target_os = "linux")] -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_sleep_async() { let snapshot = TestBuilder::new() @@ -943,7 +969,10 @@ fn test_snapshot_process_spawn() { // Uses `posix_spawn` to launch a sub-process and wait on it to exit #[cfg(not(target_os = "windows"))] -#[cfg(not(any(target_env = "musl", target_os = "macos", target_os = "windows")))] +#[cfg_attr( + any(target_env = "musl", target_os = "macos", target_os = "windows"), + ignore +)] #[test] fn test_snapshot_process_spawn_async() { let snapshot = TestBuilder::new()