diff --git a/lib/wasi/src/bin_factory/binary_package.rs b/lib/wasi/src/bin_factory/binary_package.rs index ece724168f7..904d17b7b4b 100644 --- a/lib/wasi/src/bin_factory/binary_package.rs +++ b/lib/wasi/src/bin_factory/binary_package.rs @@ -1,145 +1,75 @@ -use std::{ - any::Any, - borrow::Cow, - collections::HashMap, - sync::{Arc, Mutex, RwLock}, -}; +use std::sync::{Arc, Mutex, RwLock}; use derivative::*; -use virtual_fs::{FileSystem, TmpFileSystem}; -use wasmer_wasix_types::wasi::Snapshot0Clockid; +use once_cell::sync::OnceCell; +use virtual_fs::FileSystem; +use webc::compat::SharedBytes; use super::hash_of_binary; -use crate::syscalls::platform_clock_time_get; #[derive(Derivative, Clone)] #[derivative(Debug)] pub struct BinaryPackageCommand { - pub name: String, + name: String, #[derivative(Debug = "ignore")] - pub atom: Cow<'static, [u8]>, - hash: Option, - pub ownership: Option>, + pub(crate) atom: SharedBytes, + hash: OnceCell, } impl BinaryPackageCommand { - pub fn new(name: String, atom: Cow<'static, [u8]>) -> Self { + pub fn new(name: String, atom: SharedBytes) -> Self { Self { name, - ownership: None, - hash: None, atom, + hash: OnceCell::new(), } } - /// Hold on to some arbitrary data for the lifetime of this binary pacakge. - /// - /// # Safety + pub fn name(&self) -> &str { + &self.name + } + + /// Get a reference to this [`BinaryPackageCommand`]'s atom. /// - /// Must ensure that the atom data will be safe to use as long as the provided - /// ownership handle stays alive. - pub unsafe fn new_with_ownership<'a, T>( - name: String, - atom: Cow<'a, [u8]>, - ownership: Arc, - ) -> Self - where - T: 'static, - { - let ownership: Arc = ownership; - let mut ret = Self::new(name, std::mem::transmute(atom)); - ret.ownership = Some(std::mem::transmute(ownership)); - ret + /// The address of the returned slice is guaranteed to be stable and live as + /// long as the [`BinaryPackageCommand`]. + pub fn atom(&self) -> &[u8] { + &self.atom } - pub fn hash(&mut self) -> &str { - if self.hash.is_none() { - self.hash = Some(hash_of_binary(self.atom.as_ref())); - } - let hash = self.hash.as_ref().unwrap(); - hash.as_str() + pub fn hash(&self) -> &str { + self.hash.get_or_init(|| hash_of_binary(self.atom())) } } +/// A WebAssembly package that has been loaded into memory. +/// +/// You can crate a [`BinaryPackage`] using [`crate::bin_factory::ModuleCache`] +/// or [`crate::wapm::parse_static_webc()`]. #[derive(Derivative, Clone)] #[derivative(Debug)] pub struct BinaryPackage { - pub package_name: Cow<'static, str>, + pub package_name: String, pub when_cached: Option, - pub ownership: Option>, #[derivative(Debug = "ignore")] - pub entry: Option>, + pub entry: Option, pub hash: Arc>>, - pub wapm: Option, - pub base_dir: Option, - pub tmp_fs: TmpFileSystem, pub webc_fs: Option>, - pub webc_top_level_dirs: Vec, - pub mappings: Vec, - pub envs: HashMap, pub commands: Arc>>, pub uses: Vec, - pub version: Cow<'static, str>, + pub version: String, pub module_memory_footprint: u64, pub file_system_memory_footprint: u64, } impl BinaryPackage { - pub fn new(package_name: &str, entry: Option>) -> Self { - let now = platform_clock_time_get(Snapshot0Clockid::Monotonic, 1_000_000).unwrap() as u128; - let (package_name, version) = match package_name.split_once('@') { - Some((a, b)) => (a.to_string(), b.to_string()), - None => (package_name.to_string(), "1.0.0".to_string()), - }; - let module_memory_footprint = entry.as_ref().map(|a| a.len()).unwrap_or_default() as u64; - Self { - package_name: package_name.into(), - when_cached: Some(now), - ownership: None, - entry, - hash: Arc::new(Mutex::new(None)), - wapm: None, - base_dir: None, - tmp_fs: TmpFileSystem::new(), - webc_fs: None, - webc_top_level_dirs: Default::default(), - mappings: Vec::new(), - envs: HashMap::default(), - commands: Arc::new(RwLock::new(Vec::new())), - uses: Vec::new(), - version: version.into(), - module_memory_footprint, - file_system_memory_footprint: 0, - } - } - - /// Hold on to some arbitrary data for the lifetime of this binary pacakge. - /// - /// # Safety - /// - /// Must ensure that the entry data will be safe to use as long as the provided - /// ownership handle stays alive. - pub unsafe fn new_with_ownership<'a, T>( - package_name: &str, - entry: Option>, - ownership: Arc, - ) -> Self - where - T: 'static, - { - let ownership: Arc = ownership; - let mut ret = Self::new(package_name, entry.map(|a| std::mem::transmute(a))); - ret.ownership = Some(std::mem::transmute(ownership)); - ret - } - pub fn hash(&self) -> String { let mut hash = self.hash.lock().unwrap(); if hash.is_none() { if let Some(entry) = self.entry.as_ref() { hash.replace(hash_of_binary(entry.as_ref())); } else { - hash.replace(hash_of_binary(self.package_name.as_ref())); + hash.replace(hash_of_binary(&self.package_name)); } } hash.as_ref().unwrap().clone() diff --git a/lib/wasi/src/bin_factory/mod.rs b/lib/wasi/src/bin_factory/mod.rs index 8f697fc10cd..41870e3884a 100644 --- a/lib/wasi/src/bin_factory/mod.rs +++ b/lib/wasi/src/bin_factory/mod.rs @@ -1,9 +1,11 @@ use std::{ collections::HashMap, ops::Deref, + path::Path, sync::{Arc, RwLock}, }; +use anyhow::Context; use virtual_fs::{AsyncReadExt, FileSystem}; mod binary_package; @@ -78,15 +80,17 @@ impl BinFactory { // Check the filesystem for the file if name.starts_with('/') { if let Some(fs) = fs { - if let Ok(mut file) = fs.new_open_options().read(true).open(name.clone()) { - // Read the file - let mut data = Vec::with_capacity(file.size() as usize); - // TODO: log error? - if file.read_to_end(&mut data).await.is_ok() { - let package_name = name.split('/').last().unwrap_or(name.as_str()); - let data = BinaryPackage::new(package_name, Some(data.into())); - cache.insert(name, Some(data.clone())); - return Some(data); + match load_package_from_filesystem(fs, name.as_ref()).await { + Ok(pkg) => { + cache.insert(name, Some(pkg.clone())); + return Some(pkg); + } + Err(e) => { + tracing::warn!( + path = name, + error = &*e, + "Unable to load the package from disk" + ); } } } @@ -98,6 +102,23 @@ impl BinFactory { } } +async fn load_package_from_filesystem( + fs: &dyn FileSystem, + path: &Path, +) -> Result { + let mut f = fs + .new_open_options() + .read(true) + .open(path) + .context("Unable to open the file")?; + + let mut data = Vec::with_capacity(f.size() as usize); + f.read_to_end(&mut data).await.context("Read failed")?; + let pkg = crate::wapm::parse_static_webc(data).context("Unable to parse the package")?; + + Ok(pkg) +} + pub fn hash_of_binary(data: impl AsRef<[u8]>) -> String { let mut hasher = Sha256::default(); hasher.update(data.as_ref()); diff --git a/lib/wasi/src/bin_factory/module_cache.rs b/lib/wasi/src/bin_factory/module_cache.rs index 6eff3d6eb73..676a83ebd3f 100644 --- a/lib/wasi/src/bin_factory/module_cache.rs +++ b/lib/wasi/src/bin_factory/module_cache.rs @@ -18,11 +18,11 @@ pub const DEFAULT_CACHE_TIME: std::time::Duration = std::time::Duration::from_se #[derive(Debug)] pub struct ModuleCache { - pub(crate) cache_compile_dir: String, + pub(crate) cache_compile_dir: PathBuf, pub(crate) cached_modules: Option>>, pub(crate) cache_webc: RwLock>, - pub(crate) cache_webc_dir: String, + pub(crate) cache_webc_dir: PathBuf, pub(crate) cache_time: std::time::Duration, } @@ -51,21 +51,18 @@ impl ModuleCache { /// /// use_shared_cache enables a shared cache of modules in addition to a thread-local cache. pub fn new( - cache_compile_dir: Option, - cache_webc_dir: Option, + cache_compile_dir: Option, + cache_webc_dir: Option, use_shared_cache: bool, ) -> ModuleCache { - let cache_compile_dir = shellexpand::tilde( - cache_compile_dir - .as_deref() - .unwrap_or(DEFAULT_COMPILED_PATH), - ) - .to_string(); - let _ = std::fs::create_dir_all(PathBuf::from(cache_compile_dir.clone())); - - let cache_webc_dir = - shellexpand::tilde(cache_webc_dir.as_deref().unwrap_or(DEFAULT_WEBC_PATH)).to_string(); - let _ = std::fs::create_dir_all(PathBuf::from(cache_webc_dir.clone())); + let cache_compile_dir = cache_compile_dir.unwrap_or_else(|| { + PathBuf::from(shellexpand::tilde(DEFAULT_COMPILED_PATH).into_owned()) + }); + let _ = std::fs::create_dir_all(&cache_compile_dir); + + let cache_webc_dir = cache_webc_dir + .unwrap_or_else(|| PathBuf::from(shellexpand::tilde(DEFAULT_WEBC_PATH).into_owned())); + let _ = std::fs::create_dir_all(&cache_webc_dir); let cached_modules = if use_shared_cache { Some(RwLock::new(HashMap::default())) @@ -145,8 +142,9 @@ impl ModuleCache { .split_once(':') .map(|a| a.0) .unwrap_or_else(|| name.as_str()); - let cache_webc_dir = self.cache_webc_dir.as_str(); - if let Ok(mut data) = crate::wapm::fetch_webc_task(cache_webc_dir, wapm_name, runtime) { + if let Ok(mut data) = + crate::wapm::fetch_webc_task(&self.cache_webc_dir, wapm_name, runtime) + { // If the binary has no entry but it inherits from another module // that does have an entry then we fall back to that inherited entry point // (this convention is recursive down the list of inheritance until it finds the first entry point) @@ -221,8 +219,7 @@ impl ModuleCache { } // slow path - let path = std::path::Path::new(self.cache_compile_dir.as_str()) - .join(format!("{}.bin", key).as_str()); + let path = self.cache_compile_dir.join(format!("{}.bin", key).as_str()); if let Ok(data) = std::fs::read(path.as_path()) { tracing::trace!("bin file found: {:?} [len={}]", path.as_path(), data.len()); let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8); @@ -277,8 +274,7 @@ impl ModuleCache { // We should also attempt to store it in the cache directory let compiled_bytes = module.serialize().unwrap(); - let path = std::path::Path::new(self.cache_compile_dir.as_str()) - .join(format!("{}.bin", key).as_str()); + let path = self.cache_compile_dir.join(format!("{}.bin", key).as_str()); // TODO: forward error! let _ = std::fs::create_dir_all(path.parent().unwrap()); let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8); @@ -293,9 +289,7 @@ impl ModuleCache { mod tests { use std::{sync::Arc, time::Duration}; - use tracing_subscriber::{ - filter, prelude::__tracing_subscriber_SubscriberExt, util::SubscriberInitExt, Layer, - }; + use tracing_subscriber::filter::LevelFilter; use crate::{runtime::task_manager::tokio::TokioTaskManager, PluggableRuntime}; @@ -303,13 +297,11 @@ mod tests { #[test] fn test_module_cache() { - tracing_subscriber::registry() - .with( - tracing_subscriber::fmt::layer() - .pretty() - .with_filter(filter::LevelFilter::INFO), - ) - .init(); + let _ = tracing_subscriber::fmt() + .pretty() + .with_test_writer() + .with_max_level(LevelFilter::INFO) + .try_init(); let mut cache = ModuleCache::new(None, None, true); cache.cache_time = std::time::Duration::from_millis(500); diff --git a/lib/wasi/src/state/env.rs b/lib/wasi/src/state/env.rs index 908bf6a7189..c48f2b39e75 100644 --- a/lib/wasi/src/state/env.rs +++ b/lib/wasi/src/state/env.rs @@ -757,14 +757,14 @@ impl WasiEnv { I: IntoIterator, { // Load all the containers that we inherit from + use std::collections::VecDeque; #[allow(unused_imports)] use std::path::Path; - use std::{borrow::Cow, collections::VecDeque}; #[allow(unused_imports)] use virtual_fs::FileSystem; - let mut already: HashMap> = HashMap::new(); + let mut already: HashMap = HashMap::new(); let mut use_packages = uses.into_iter().collect::>(); @@ -782,7 +782,7 @@ impl WasiEnv { // If its already been added make sure the version is correct let package_name = package.package_name.to_string(); if let Some(version) = already.get(&package_name) { - if version.as_ref() != package.version.as_ref() { + if version.as_str() != package.version { return Err(WasiStateCreationError::WasiInheritError(format!( "webc package version conflict for {} - {} vs {}", use_package, version, package.version @@ -804,20 +804,36 @@ impl WasiEnv { } // Add all the commands as binaries in the bin folder + let commands = package.commands.read().unwrap(); if !commands.is_empty() { let _ = root_fs.create_dir(Path::new("/bin")); for command in commands.iter() { - let path = format!("/bin/{}", command.name); + let path = format!("/bin/{}", command.name()); let path = Path::new(path.as_str()); + + // FIXME(Michael-F-Bryan): This is pretty sketchy. + // We should be using some sort of reference-counted + // pointer to some bytes that are either on the heap + // or from a memory-mapped file. However, that's not + // possible here because things like memfs and + // WasiEnv are expecting a Cow<'static, [u8]>. It's + // too hard to refactor those at the moment, and we + // were pulling the same trick before by storing an + // "ownership" object in the BinaryPackageCommand, + // so as long as packages aren't removed from the + // module cache it should be fine. + let atom: &'static [u8] = + unsafe { std::mem::transmute(command.atom()) }; + if let Err(err) = root_fs .new_open_options_ext() - .insert_ro_file(path, command.atom.clone()) + .insert_ro_file(path, atom.into()) { tracing::debug!( "failed to add package [{}] command [{}] - {}", use_package, - command.name, + command.name(), err ); continue; @@ -825,7 +841,7 @@ impl WasiEnv { // Add the binary package to the bin factory (zero copy the atom) let mut package = package.clone(); - package.entry = Some(command.atom.clone()); + package.entry = Some(atom.into()); self.bin_factory .set_binary(path.as_os_str().to_string_lossy().as_ref(), package); } diff --git a/lib/wasi/src/wapm/manifest.rs b/lib/wasi/src/wapm/manifest.rs deleted file mode 100644 index 076785f2fa9..00000000000 --- a/lib/wasi/src/wapm/manifest.rs +++ /dev/null @@ -1,186 +0,0 @@ -use std::{collections::HashMap, fmt, path::PathBuf}; - -use semver::Version; -use serde::*; - -/// The name of the manifest file. This is hard-coded for now. -pub static MANIFEST_FILE_NAME: &str = "wapm.toml"; -pub static PACKAGES_DIR_NAME: &str = "wapm_packages"; - -/// Primitive wasm type -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub enum WasmType { - I32, - I64, - F32, - F64, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub enum Import { - Func { - namespace: String, - name: String, - params: Vec, - result: Vec, - }, - Global { - namespace: String, - name: String, - var_type: WasmType, - }, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub enum Export { - Func { - name: String, - params: Vec, - result: Vec, - }, - Global { - name: String, - var_type: WasmType, - }, -} - -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -pub struct Interface { - /// The name the interface gave itself - pub name: Option, - /// Things that the module can import - pub imports: HashMap<(String, String), Import>, - /// Things that the module must export - pub exports: HashMap, -} - -/// The ABI is a hint to WebAssembly runtimes about what additional imports to insert. -/// It currently is only used for validation (in the validation subcommand). The default value is `None`. -#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq)] -pub enum Abi { - #[serde(rename = "emscripten")] - Emscripten, - #[serde(rename = "none")] - None, - #[serde(rename = "wasi")] - Wasi, -} - -impl Abi { - pub fn to_str(&self) -> &str { - match self { - Abi::Emscripten => "emscripten", - Abi::Wasi => "wasi", - Abi::None => "generic", - } - } - pub fn is_none(&self) -> bool { - return self == &Abi::None; - } - pub fn from_str(name: &str) -> Self { - match name.to_lowercase().as_ref() { - "emscripten" => Abi::Emscripten, - "wasi" => Abi::Wasi, - _ => Abi::None, - } - } -} - -impl fmt::Display for Abi { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.to_str()) - } -} - -impl Default for Abi { - fn default() -> Self { - Abi::None - } -} - -impl Abi { - pub fn get_interface(&self) -> Option { - match self { - Abi::Emscripten => None, - Abi::Wasi => None, - Abi::None => None, - } - } -} - -/// Describes a command for a wapm module -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Package { - pub name: String, - pub version: Version, - pub description: String, - pub license: Option, - /// The location of the license file, useful for non-standard licenses - #[serde(rename = "license-file")] - pub license_file: Option, - pub readme: Option, - pub repository: Option, - pub homepage: Option, - #[serde(rename = "wasmer-extra-flags")] - pub wasmer_extra_flags: Option, - #[serde( - rename = "disable-command-rename", - default, - skip_serializing_if = "std::ops::Not::not" - )] - pub disable_command_rename: bool, - /// Unlike, `disable-command-rename` which prevents `wapm run `, - /// this flag enables the command rename of `wapm run ` into - /// just `. This is useful for programs that need to inspect - /// their argv[0] names and when the command name matches their executable name. - #[serde( - rename = "rename-commands-to-raw-command-name", - default, - skip_serializing_if = "std::ops::Not::not" - )] - pub rename_commands_to_raw_command_name: bool, -} - -/// Describes a command for a wapm module -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Command { - pub name: String, - pub module: String, - pub main_args: Option, - pub package: Option, -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Module { - pub name: String, - pub source: PathBuf, - #[serde(default = "Abi::default", skip_serializing_if = "Abi::is_none")] - pub abi: Abi, - #[cfg(feature = "package")] - pub fs: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub interfaces: Option>, -} - -/// The manifest represents the file used to describe a Wasm package. -/// -/// The `module` field represents the wasm file to be published. -/// -/// The `source` is used to create bundles with the `fs` section. -/// -/// The `fs` section represents fs assets that will be made available to the -/// program relative to its starting current directory (there may be issues with WASI). -/// These are pairs of paths. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Manifest { - pub package: Package, - pub dependencies: Option>, - pub module: Option>, - pub command: Option>, - /// Of the form Guest -> Host path - pub fs: Option>, - /// private data - /// store the directory path of the manifest file for use later accessing relative path fields - #[serde(skip)] - pub base_directory_path: PathBuf, -} diff --git a/lib/wasi/src/wapm/mod.rs b/lib/wasi/src/wapm/mod.rs index 3be033b89f3..d77b6f2f329 100644 --- a/lib/wasi/src/wapm/mod.rs +++ b/lib/wasi/src/wapm/mod.rs @@ -1,17 +1,18 @@ use anyhow::{bail, Context}; use std::{ - ops::Deref, - path::{Path, PathBuf}, - sync::Arc, + collections::HashMap, + path::Path, + sync::{Arc, Mutex, RwLock}, }; -use virtual_fs::FileSystem; +use virtual_fs::{FileSystem, WebcVolumeFileSystem}; +use wasmer_wasix_types::wasi::Snapshot0Clockid; -use tracing::*; -#[allow(unused_imports)] -use tracing::{error, warn}; use webc::{ - metadata::{Annotation, UrlOrManifest}, - v1::WebC, + metadata::{ + annotations::{EMSCRIPTEN_RUNNER_URI, WASI_RUNNER_URI, WCGI_RUNNER_URI}, + UrlOrManifest, + }, + Container, }; use crate::{ @@ -19,15 +20,13 @@ use crate::{ WasiRuntime, }; -#[cfg(feature = "wapm-tar")] -mod manifest; mod pirita; use crate::http::{DynHttpClient, HttpRequest, HttpRequestOptions}; use pirita::*; pub(crate) fn fetch_webc_task( - cache_dir: &str, + cache_dir: &Path, webc: &str, runtime: &dyn WasiRuntime, ) -> Result { @@ -36,11 +35,7 @@ pub(crate) fn fetch_webc_task( .context("no http client available")? .clone(); - let f = { - let cache_dir = cache_dir.to_string(); - let webc = webc.to_string(); - async move { fetch_webc(&cache_dir, &webc, client).await } - }; + let f = async move { fetch_webc(cache_dir, webc, client).await }; let result = runtime .task_manager() @@ -50,7 +45,7 @@ pub(crate) fn fetch_webc_task( } async fn fetch_webc( - cache_dir: &str, + cache_dir: &Path, webc: &str, client: DynHttpClient, ) -> Result { @@ -65,7 +60,7 @@ async fn fetch_webc( .replace(WAPM_WEBC_VERSION_TAG, version.replace('\"', "'").as_str()), None => WAPM_WEBC_QUERY_LAST.replace(WAPM_WEBC_QUERY_TAG, name.replace('\"', "'").as_str()), }; - debug!("request: {}", url_query); + tracing::debug!("request: {}", url_query); let url = format!( "{}{}", @@ -89,14 +84,14 @@ async fn fetch_webc( let body = response.body.context("HTTP response with empty body")?; let data: WapmWebQuery = serde_json::from_slice(&body).context("Could not parse webc registry JSON data")?; - debug!("response: {:?}", data); + tracing::debug!("response: {:?}", data); let PiritaVersionedDownload { url: download_url, version, } = wapm_extract_version(&data).context("No pirita download URL available")?; let mut pkg = download_webc(cache_dir, name, download_url, client).await?; - pkg.version = version.into(); + pkg.version = version; Ok(pkg) } @@ -128,22 +123,12 @@ fn wapm_extract_version(data: &WapmWebQuery) -> Option } pub fn parse_static_webc(data: Vec) -> Result { - let options = webc::v1::ParseOptions::default(); - match webc::v1::WebCOwned::parse(data, &options) { - Ok(webc) => unsafe { - let webc = Arc::new(webc); - return parse_webc(webc.as_webc_ref(), webc.clone()) - .with_context(|| "Could not parse webc".to_string()); - }, - Err(err) => { - warn!("failed to parse WebC: {}", err); - Err(err.into()) - } - } + let webc = Container::from_bytes(data)?; + parse_webc_v2(&webc).with_context(|| "Could not parse webc".to_string()) } async fn download_webc( - cache_dir: &str, + cache_dir: &Path, name: &str, pirita_download_url: String, client: DynHttpClient, @@ -161,28 +146,28 @@ async fn download_webc( name = name_store.as_str(); } } - let compute_path = |cache_dir: &str, name: &str| { + let compute_path = |cache_dir: &Path, name: &str| { let name = name.replace('/', "._."); std::path::Path::new(cache_dir).join(&name) }; - // build the parse options - let options = webc::v1::ParseOptions::default(); - // fast path let path = compute_path(cache_dir, name); #[cfg(feature = "sys")] if path.exists() { - match webc::v1::WebCMmap::parse(path.clone(), &options) { - Ok(webc) => unsafe { - let webc = Arc::new(webc); - return parse_webc(webc.as_webc_ref(), webc.clone()).with_context(|| { - format!("could not parse webc file at path : '{}'", path.display()) - }); - }, + tracing::debug!(path=%path.display(), "Parsing cached WEBC file"); + + match Container::from_disk(&path) { + Ok(webc) => { + return parse_webc_v2(&webc) + .with_context(|| format!("Could not parse webc at path '{}'", path.display())); + } Err(err) => { - warn!("failed to parse WebC: {}", err); + tracing::warn!( + error = &err as &dyn std::error::Error, + "failed to parse WEBC", + ); } } } @@ -204,11 +189,10 @@ async fn download_webc( #[cfg(feature = "sys")] { - let cache_dir = cache_dir.to_string(); - let name = name.to_string(); - let path = compute_path(cache_dir.as_str(), name.as_str()); - std::fs::create_dir_all(path.parent().unwrap()) - .with_context(|| format!("Could not create cache directory '{}'", cache_dir))?; + let path = compute_path(cache_dir, name); + std::fs::create_dir_all(path.parent().unwrap()).with_context(|| { + format!("Could not create cache directory '{}'", cache_dir.display()) + })?; let mut temp_path = path.clone(); let rand_128: u128 = rand::random(); @@ -219,39 +203,38 @@ async fn download_webc( )); if let Err(err) = std::fs::write(temp_path.as_path(), &data[..]) { - debug!( + tracing::debug!( "failed to write webc cache file [{}] - {}", temp_path.as_path().to_string_lossy(), err ); } if let Err(err) = std::fs::rename(temp_path.as_path(), path.as_path()) { - debug!( + tracing::debug!( "failed to rename webc cache file [{}] - {}", temp_path.as_path().to_string_lossy(), err ); } - match webc::v1::WebCMmap::parse(path.clone(), &options) { - Ok(webc) => unsafe { - let webc = Arc::new(webc); - return parse_webc(webc.as_webc_ref(), webc.clone()) - .with_context(|| format!("Could not parse webc at path '{}'", path.display())); - }, - Err(err) => { - warn!("failed to parse WebC: {}", err); + match Container::from_disk(&path) { + Ok(webc) => { + return parse_webc_v2(&webc) + .with_context(|| format!("Could not parse webc at path '{}'", path.display())) + } + Err(e) => { + tracing::warn!( + path=%temp_path.display(), + error=&e as &dyn std::error::Error, + "Unable to parse temporary WEBC from disk", + ) } } } - let webc_raw = webc::v1::WebCOwned::parse(data, &options) + let webc = Container::from_bytes(data) .with_context(|| format!("Failed to parse downloaded from '{pirita_download_url}'"))?; - let webc = Arc::new(webc_raw); - // FIXME: add SAFETY comment - let package = unsafe { - parse_webc(webc.as_webc_ref(), webc.clone()).context("Could not parse binary package")? - }; + let package = parse_webc_v2(&webc).context("Could not parse binary package")?; Ok(package) } @@ -277,201 +260,153 @@ async fn download_package( response.body.context("HTTP response with empty body") } -// TODO: should return Result<_, anyhow::Error> -unsafe fn parse_webc<'a, T>(webc: webc::v1::WebC<'a>, ownership: Arc) -> Option -where - T: std::fmt::Debug + Send + Sync + 'static, - T: Deref>, -{ - let package_name = webc.get_package_name(); - - let mut pck = webc - .manifest - .entrypoint - .iter() - .filter_map(|entry| webc.manifest.commands.get(entry).map(|a| (a, entry))) - .filter_map(|(cmd, entry)| { - let api = if cmd.runner.starts_with("https://webc.org/runner/emscripten") { - "emscripten" - } else if cmd.runner.starts_with("https://webc.org/runner/wasi") { - "wasi" - } else { - warn!("unsupported runner - {}", cmd.runner); - return None; - }; - let atom = webc.get_atom_name_for_command(api, entry.as_str()); - match atom { - Ok(a) => Some(a), - Err(err) => { - warn!( - "failed to find atom name for entry command({}) - {} - falling back on the command name itself", - entry.as_str(), - err - ); - for (name, atom) in webc.manifest.atoms.iter() { - tracing::debug!("found atom (name={}, kind={})", name, atom.kind); - } - Some(entry.clone()) - } - } - }) - .filter_map(|atom| match webc.get_atom(&package_name, atom.as_str()) { - Ok(a) => Some(a), - Err(err) => { - warn!("failed to find atom for atom name({}) - {}", atom, err); - None - } - }) - .map(|atom| { - BinaryPackage::new_with_ownership( - package_name.as_str(), - Some(atom.into()), - ownership.clone(), - ) - }) - .next(); - - // Otherwise add a package without an entry point - if pck.is_none() { - pck = Some(BinaryPackage::new_with_ownership( - package_name.as_str(), - None, - ownership.clone(), - )) - } - let mut pck = pck.take().unwrap(); - - // Add all the dependencies - for uses in webc.manifest.use_map.values() { - let uses = match uses { - UrlOrManifest::Url(url) => Some(url.path().to_string()), - UrlOrManifest::Manifest(manifest) => manifest.origin.clone(), - UrlOrManifest::RegistryDependentUrl(url) => Some(url.clone()), - }; - if let Some(uses) = uses { - pck.uses.push(uses); +fn parse_webc_v2(webc: &Container) -> Result { + let manifest = webc.manifest(); + + let wapm: webc::metadata::annotations::Wapm = manifest + .package_annotation("wapm")? + .context("The package must have 'wapm' annotations")?; + + let mut commands = HashMap::new(); + + for (name, cmd) in &manifest.commands { + if let Some(cmd) = load_binary_command(webc, name, cmd)? { + commands.insert(name.as_str(), cmd); } } - // Set the version of this package - if let Some(Annotation::Map(wapm)) = webc.manifest.package.get("wapm") { - if let Some(Annotation::Text(version)) = wapm.get(&Annotation::Text("version".to_string())) - { - pck.version = version.clone().into(); + let entry = manifest.entrypoint.as_deref().and_then(|entry| { + let cmd = commands.get(entry)?; + Some(cmd.atom.clone()) + }); + + let webc_fs = WebcVolumeFileSystem::mount_all(webc); + + // List all the dependencies + let uses: Vec<_> = manifest + .use_map + .values() + .filter_map(|uses| match uses { + UrlOrManifest::Url(url) => Some(url.path()), + UrlOrManifest::Manifest(manifest) => manifest.origin.as_deref(), + UrlOrManifest::RegistryDependentUrl(url) => Some(url), + }) + .map(String::from) + .collect(); + + let module_memory_footprint = entry.as_deref().map(|b| b.len() as u64).unwrap_or(0); + let file_system_memory_footprint = count_file_system(&webc_fs, Path::new("/")); + + let pkg = BinaryPackage { + package_name: wapm.name, + when_cached: Some( + crate::syscalls::platform_clock_time_get(Snapshot0Clockid::Monotonic, 1_000_000) + .unwrap() as u128, + ), + entry: entry.map(Into::into), + hash: Arc::new(Mutex::new(None)), + webc_fs: Some(Arc::new(webc_fs)), + commands: Arc::new(RwLock::new(commands.into_values().collect())), + uses, + version: wapm.version, + module_memory_footprint, + file_system_memory_footprint, + }; + + Ok(pkg) +} + +fn load_binary_command( + webc: &Container, + name: &str, + cmd: &webc::metadata::Command, +) -> Result, anyhow::Error> { + let atom_name = match atom_name_for_command(name, cmd)? { + Some(name) => name, + None => { + tracing::warn!( + cmd.name=name, + cmd.runner=%cmd.runner, + "Skipping unsupported command", + ); + return Ok(None); } - } else if let Some(Annotation::Text(version)) = webc.manifest.package.get("version") { - pck.version = version.clone().into(); - } + }; - // Add the file system from the webc - let webc_fs = virtual_fs::webc_fs::WebcFileSystem::init_all(ownership.clone()); - let top_level_dirs = webc_fs.top_level_dirs().clone(); - pck.webc_fs = Some(Arc::new(webc_fs)); - pck.webc_top_level_dirs = top_level_dirs; - - // Add the memory footprint of the file system - if let Some(webc_fs) = pck.webc_fs.as_ref() { - let root_path = PathBuf::from("/"); - pck.file_system_memory_footprint += - count_file_system(webc_fs.as_ref(), root_path.as_path()); + let atom = webc.get_atom(&atom_name); + + if atom.is_none() && cmd.annotations.is_empty() { + return Ok(legacy_atom_hack(webc, name)); } - // Add all the commands - for (command, action) in webc.get_metadata().commands.iter() { - let api = if action - .runner - .starts_with("https://webc.org/runner/emscripten") - { - "emscripten" - } else if action.runner.starts_with("https://webc.org/runner/wasi") { - "wasi" - } else { - warn!("unsupported runner - {}", action.runner); - continue; - }; - let atom = webc.get_atom_name_for_command(api, command.as_str()); - let atom = match atom { - Ok(a) => Some(a), - Err(err) => { - debug!( - "failed to find atom name for entry command({}) - {} - falling back on the command name itself", - command.as_str(), - err - ); - Some(command.clone()) - } - }; + let atom = atom + .with_context(|| format!("The '{name}' command uses the '{atom_name}' atom, but it isn't present in the WEBC file"))?; - // Load the atom as a command - if let Some(atom_name) = atom { - match webc.get_atom(package_name.as_str(), atom_name.as_str()) { - Ok(atom) => { - trace!( - "added atom (name={}, size={}) for command [{}]", - atom_name, - atom.len(), - command - ); - if pck.entry.is_none() { - trace!("defaulting entry to command [{}]", command); - let entry: &'static [u8] = { - let atom: &'_ [u8] = atom; - std::mem::transmute(atom) - }; - pck.entry = Some(entry.into()); - } + let cmd = BinaryPackageCommand::new(name.to_string(), atom); - let mut commands = pck.commands.write().unwrap(); - commands.push(BinaryPackageCommand::new_with_ownership( - command.clone(), - atom.into(), - ownership.clone(), - )); - } - Err(err) => { - debug!( - "Failed to find atom [{}].[{}] - {} - falling back on the first atom", - package_name, atom_name, err - ); - - if let Ok(files) = webc.atoms.get_all_files_and_directories_with_bytes() { - if let Some(file) = files.iter().next() { - if let Some(atom) = file.get_bytes() { - trace!( - "added atom (name={}, size={}) for command [{}]", - atom_name, - atom.len(), - command - ); - let mut commands = pck.commands.write().unwrap(); - commands.push(BinaryPackageCommand::new_with_ownership( - command.clone(), - atom.into(), - ownership.clone(), - )); - continue; - } - } - } + Ok(Some(cmd)) +} - debug!( - "Failed to find atom [{}].[{}] - {} - command will be ignored", - package_name, package_name, err - ); - for (name, atom) in webc.manifest.atoms.iter() { - tracing::debug!("found atom (name={}, kind={})", name, atom.kind); - } - if let Ok(files) = webc.atoms.get_all_files_and_directories_with_bytes() { - for file in files.iter() { - tracing::debug!("found file ({})", file.get_path().to_string_lossy()); - } - } - } - } - } +fn atom_name_for_command( + command_name: &str, + cmd: &webc::metadata::Command, +) -> Result, anyhow::Error> { + use webc::metadata::annotations::{Emscripten, Wasi}; + + if let Some(Wasi { atom, .. }) = cmd + .annotation("wasi") + .context("Unable to deserialize 'wasi' annotations")? + { + return Ok(Some(atom)); + } + + if let Some(Emscripten { + atom: Some(atom), .. + }) = cmd + .annotation("emscripten") + .context("Unable to deserialize 'emscripten' annotations")? + { + return Ok(Some(atom)); } - Some(pck) + if [WASI_RUNNER_URI, WCGI_RUNNER_URI, EMSCRIPTEN_RUNNER_URI] + .iter() + .any(|uri| cmd.runner.starts_with(uri)) + { + // Note: We use the command name as the atom name as a special case + // for known runner types because sometimes people will construct + // a manifest by hand instead of using wapm2pirita. + tracing::debug!( + command = command_name, + "No annotations specifying the atom name found. Falling back to the command name" + ); + return Ok(Some(command_name.to_string())); + } + + Ok(None) +} + +/// HACK: Some older packages like `sharrattj/bash` and `sharrattj/coreutils` +/// contain commands with no annotations. When this happens, you can just assume +/// it wants to use the first atom in the WEBC file. +/// +/// That works because most of these packages only have a single atom (e.g. in +/// `sharrattj/coreutils` there are commands for `ls`, `pwd`, and so on, but +/// under the hood they all use the `coreutils` atom). +/// +/// See +/// for more. +fn legacy_atom_hack(webc: &Container, command_name: &str) -> Option { + let (name, atom) = webc.atoms().into_iter().next()?; + + tracing::debug!( + command_name, + atom.name = name.as_str(), + atom.len = atom.len(), + "(hack) The command metadata is malformed. Falling back to the first atom in the WEBC file", + ); + + Some(BinaryPackageCommand::new(command_name.to_string(), atom)) } fn count_file_system(fs: &dyn FileSystem, path: &Path) -> u64 { @@ -503,3 +438,213 @@ fn count_file_system(fs: &dyn FileSystem, path: &Path) -> u64 { total } + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use super::*; + + const PYTHON: &[u8] = include_bytes!("../../../c-api/examples/assets/python-0.1.0.wasmer"); + const COREUTILS: &[u8] = include_bytes!("../../../../tests/integration/cli/tests/webc/coreutils-1.0.14-076508e5-e704-463f-b467-f3d9658fc907.webc"); + const BASH: &[u8] = include_bytes!("../../../../tests/integration/cli/tests/webc/bash-1.0.12-0103d733-1afb-4a56-b0ef-0e124139e996.webc"); + const HELLO: &[u8] = include_bytes!("../../../../tests/integration/cli/tests/webc/hello-0.1.0-665d2ddc-80e6-4845-85d3-4587b1693bb7.webc"); + + #[test] + fn parse_the_python_webc_file() { + let python = webc::compat::Container::from_bytes(PYTHON).unwrap(); + + let pkg = parse_webc_v2(&python).unwrap(); + + assert_eq!(pkg.package_name, "python"); + assert_eq!(pkg.version, "0.1.0"); + assert_eq!(pkg.uses, Vec::::new()); + assert_eq!(pkg.module_memory_footprint, 4694941); + assert_eq!(pkg.file_system_memory_footprint, 13387764); + let python_atom = python.get_atom("python").unwrap(); + assert_eq!(pkg.entry.as_deref(), Some(python_atom.as_slice())); + let commands = pkg.commands.read().unwrap(); + let commands: BTreeMap<&str, &[u8]> = commands + .iter() + .map(|cmd| (cmd.name(), cmd.atom())) + .collect(); + let command_names: Vec<_> = commands.keys().copied().collect(); + assert_eq!(command_names, &["python"]); + assert_eq!(commands["python"], python_atom); + + // Note: It's important that the entry we parse doesn't allocate, so + // make sure it lies within the original PYTHON buffer. + let bounds = PYTHON.as_ptr_range(); + + let entry_ptr = pkg.entry.as_deref().unwrap().as_ptr(); + assert!(bounds.start <= entry_ptr && entry_ptr < bounds.end); + + let python_cmd_ptr = commands["python"].as_ptr(); + assert!(bounds.start <= python_cmd_ptr && python_cmd_ptr < bounds.end); + } + + #[test] + fn parse_a_webc_with_multiple_commands() { + let coreutils = Container::from_bytes(COREUTILS).unwrap(); + + let pkg = parse_webc_v2(&coreutils).unwrap(); + + assert_eq!(pkg.package_name, "sharrattj/coreutils"); + assert_eq!(pkg.version, "1.0.14"); + assert_eq!(pkg.uses, Vec::::new()); + assert_eq!(pkg.module_memory_footprint, 0); + assert_eq!(pkg.file_system_memory_footprint, 44); + assert_eq!(pkg.entry, None); + let commands = pkg.commands.read().unwrap(); + let commands: BTreeMap<&str, &[u8]> = commands + .iter() + .map(|cmd| (cmd.name(), cmd.atom())) + .collect(); + let command_names: Vec<_> = commands.keys().copied().collect(); + assert_eq!( + command_names, + &[ + "arch", + "base32", + "base64", + "baseenc", + "basename", + "cat", + "chcon", + "chgrp", + "chmod", + "chown", + "chroot", + "cksum", + "comm", + "cp", + "csplit", + "cut", + "date", + "dd", + "df", + "dircolors", + "dirname", + "du", + "echo", + "env", + "expand", + "expr", + "factor", + "false", + "fmt", + "fold", + "groups", + "hashsum", + "head", + "hostid", + "hostname", + "id", + "install", + "join", + "kill", + "link", + "ln", + "logname", + "ls", + "mkdir", + "mkfifo", + "mknod", + "mktemp", + "more", + "mv", + "nice", + "nl", + "nohup", + "nproc", + "numfmt", + "od", + "paste", + "pathchk", + "pinky", + "pr", + "printenv", + "printf", + "ptx", + "pwd", + "readlink", + "realpath", + "relpath", + "rm", + "rmdir", + "runcon", + "seq", + "sh", + "shred", + "shuf", + "sleep", + "sort", + "split", + "stat", + "stdbuf", + "sum", + "sync", + "tac", + "tail", + "tee", + "test", + "timeout", + "touch", + "tr", + "true", + "truncate", + "tsort", + "tty", + "uname", + "unexpand", + "uniq", + "unlink", + "uptime", + "users", + "wc", + "who", + "whoami", + "yes", + ] + ); + let coreutils_atom = coreutils.get_atom("coreutils").unwrap(); + for (cmd, atom) in commands { + assert_eq!(atom.len(), coreutils_atom.len(), "{cmd}"); + assert_eq!(atom, coreutils_atom, "{cmd}"); + } + } + + #[test] + fn parse_a_webc_with_dependencies() { + let bash = webc::compat::Container::from_bytes(BASH).unwrap(); + + let pkg = parse_webc_v2(&bash).unwrap(); + + assert_eq!(pkg.package_name, "sharrattj/bash"); + assert_eq!(pkg.version, "1.0.12"); + assert_eq!(pkg.uses, &["sharrattj/coreutils@1.0.11"]); + assert_eq!(pkg.module_memory_footprint, 0); + assert_eq!(pkg.file_system_memory_footprint, 0); + let commands = pkg.commands.read().unwrap(); + let commands: BTreeMap<&str, &[u8]> = commands + .iter() + .map(|cmd| (cmd.name(), cmd.atom())) + .collect(); + let command_names: Vec<_> = commands.keys().copied().collect(); + assert_eq!(command_names, &["bash", "sh"]); + assert_eq!(commands["bash"], bash.get_atom("bash").unwrap()); + assert_eq!(commands["sh"], commands["bash"]); + } + + #[test] + fn parse_a_webc_with_dependencies_and_no_commands() { + let pkg = parse_static_webc(HELLO.to_vec()).unwrap(); + + assert_eq!(pkg.package_name, "wasmer/hello"); + assert_eq!(pkg.version, "0.1.0"); + let commands = pkg.commands.read().unwrap(); + assert!(commands.is_empty()); + assert!(pkg.entry.is_none()); + assert_eq!(pkg.uses, ["sharrattj/static-web-server@1"]); + } +} diff --git a/tests/integration/cli/tests/webc/hello-0.1.0-665d2ddc-80e6-4845-85d3-4587b1693bb7.webc b/tests/integration/cli/tests/webc/hello-0.1.0-665d2ddc-80e6-4845-85d3-4587b1693bb7.webc new file mode 100644 index 00000000000..b6122652733 Binary files /dev/null and b/tests/integration/cli/tests/webc/hello-0.1.0-665d2ddc-80e6-4845-85d3-4587b1693bb7.webc differ