From f15780e7367c1117750da71cb13bd6d74cd1681e Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Mon, 26 Jun 2023 09:53:19 +0800 Subject: [PATCH 1/2] Remove LZW compression from the module cache --- .../src/runtime/module_cache/filesystem.rs | 98 ++++++++++++++----- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/lib/wasix/src/runtime/module_cache/filesystem.rs b/lib/wasix/src/runtime/module_cache/filesystem.rs index 4fc685dd6e7..752fc0d05b8 100644 --- a/lib/wasix/src/runtime/module_cache/filesystem.rs +++ b/lib/wasix/src/runtime/module_cache/filesystem.rs @@ -1,4 +1,8 @@ -use std::path::{Path, PathBuf}; +use std::{ + fs::File, + io::{BufReader, BufWriter, Read, Seek, Write}, + path::{Path, PathBuf}, +}; use tempfile::NamedTempFile; use wasmer::{Engine, Module}; @@ -101,32 +105,23 @@ impl ModuleCache for FileSystemCache { // Note: We save to a temporary file and persist() it at the end so // concurrent readers won't see a partially written module. - let mut f = NamedTempFile::new_in(parent).map_err(CacheError::other)?; + let mut temp = NamedTempFile::new_in(parent).map_err(CacheError::other)?; let serialized = module.serialize()?; - if let Err(e) = save_compressed(&mut f, &serialized) { - return Err(CacheError::FileWrite { path, error: e }); + if let Err(error) = BufWriter::new(&mut temp).write_all(&serialized) { + return Err(CacheError::FileWrite { path, error }); } - f.persist(&path).map_err(CacheError::other)?; + temp.persist(&path).map_err(CacheError::other)?; Ok(()) } } -fn save_compressed(writer: impl std::io::Write, data: &[u8]) -> Result<(), std::io::Error> { - let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8); - encoder - .into_stream(writer) - .encode_all(std::io::Cursor::new(data)) - .status?; - - Ok(()) -} - +/// Read a file that may or may not be compressed. fn read_compressed(path: &Path) -> Result, CacheError> { - let compressed = match std::fs::read(path) { - Ok(bytes) => bytes, + let mut f = match File::open(path) { + Ok(f) => BufReader::new(f), Err(e) if e.kind() == std::io::ErrorKind::NotFound => { return Err(CacheError::NotFound); } @@ -138,11 +133,48 @@ fn read_compressed(path: &Path) -> Result, CacheError> { } }; + // We used to compress our compiled modules using LZW encoding in the past. + // This was removed because it has a negative impact on startup times for + // "wasmer run", so all new compiled modules should be saved directly to + // disk. + // + // For perspective, compiling php.wasm with cranelift took about 4.75 + // seconds on a M1 Mac. + // + // Without LZW compression: + // - ModuleCache::save(): 408ms, 142MB binary + // - ModuleCache::load(): 155ms + // With LZW compression: + // - ModuleCache::save(): 2.4s, 72MB binary + // - ModuleCache::load(): 822ms + + let mut leading_bytes = [0_u8; 128]; + f.read_exact(&mut leading_bytes) + .and_then(|_| f.rewind()) + .map_err(|error| CacheError::FileRead { + path: path.to_path_buf(), + error, + })?; + + if wasmer::Artifact::is_deserializable(&leading_bytes) { + // the compiled artifact was saved as-is. Return it. + let mut artifact = Vec::new(); + f.read_to_end(&mut artifact) + .map_err(|error| CacheError::FileRead { + path: path.to_path_buf(), + error, + })?; + + return Ok(artifact); + } + + // Fall back to LZW decoding + let mut uncompressed = Vec::new(); let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8); decoder - .into_vec(&mut uncompressed) - .decode_all(&compressed) + .into_stream(&mut uncompressed) + .decode_all(f) .status .map_err(CacheError::other)?; @@ -151,8 +183,6 @@ fn read_compressed(path: &Path) -> Result, CacheError> { #[cfg(test)] mod tests { - use std::fs::File; - use tempfile::TempDir; use super::*; @@ -218,7 +248,31 @@ mod tests { let expected_path = cache.path(key, engine.deterministic_id()); std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap(); let serialized = module.serialize().unwrap(); - save_compressed(File::create(&expected_path).unwrap(), &serialized).unwrap(); + std::fs::write(&expected_path, &serialized).unwrap(); + + let module = cache.load(key, &engine).await.unwrap(); + + let exports: Vec<_> = module + .exports() + .map(|export| export.name().to_string()) + .collect(); + assert_eq!(exports, ["add"]); + } + + /// For backwards compatibility, make sure we can still work with LZW + /// compressed modules. + #[tokio::test] + async fn can_still_load_lzw_compressed_binaries() { + let temp = TempDir::new().unwrap(); + let engine = Engine::default(); + let module = Module::new(&engine, ADD_WAT).unwrap(); + let key = ModuleHash::from_bytes([0; 32]); + let cache = FileSystemCache::new(temp.path()); + let expected_path = cache.path(key, engine.deterministic_id()); + std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap(); + let serialized = module.serialize().unwrap(); + let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8); + std::fs::write(&expected_path, encoder.encode(&serialized).unwrap()).unwrap(); let module = cache.load(key, &engine).await.unwrap(); From 422362569d6b7a5f48cddf8e15b1e2ac1656dc7a Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Mon, 26 Jun 2023 11:27:39 +0800 Subject: [PATCH 2/2] Re-work deserializing to not rely on Artifact::is_deserializable() --- .../src/runtime/module_cache/filesystem.rs | 76 +++++++------------ 1 file changed, 27 insertions(+), 49 deletions(-) diff --git a/lib/wasix/src/runtime/module_cache/filesystem.rs b/lib/wasix/src/runtime/module_cache/filesystem.rs index 752fc0d05b8..47f95f837a1 100644 --- a/lib/wasix/src/runtime/module_cache/filesystem.rs +++ b/lib/wasix/src/runtime/module_cache/filesystem.rs @@ -1,6 +1,5 @@ use std::{ - fs::File, - io::{BufReader, BufWriter, Read, Seek, Write}, + io::{BufWriter, Write}, path::{Path, PathBuf}, }; @@ -47,10 +46,9 @@ impl ModuleCache for FileSystemCache { // background. // https://github.com/wasmerio/wasmer/issues/3851 - let uncompressed = read_compressed(&path)?; + let bytes = read_file(&path)?; - let res = unsafe { Module::deserialize(&engine, uncompressed) }; - match res { + match deserialize(&bytes, engine) { Ok(m) => { tracing::debug!("Cache hit!"); Ok(m) @@ -72,7 +70,7 @@ impl ModuleCache for FileSystemCache { ); } - Err(CacheError::Deserialize(e)) + Err(e) } } } @@ -118,21 +116,18 @@ impl ModuleCache for FileSystemCache { } } -/// Read a file that may or may not be compressed. -fn read_compressed(path: &Path) -> Result, CacheError> { - let mut f = match File::open(path) { - Ok(f) => BufReader::new(f), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - return Err(CacheError::NotFound); - } - Err(error) => { - return Err(CacheError::FileRead { - path: path.to_path_buf(), - error, - }); - } - }; +fn read_file(path: &Path) -> Result, CacheError> { + match std::fs::read(path) { + Ok(bytes) => Ok(bytes), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Err(CacheError::NotFound), + Err(error) => Err(CacheError::FileRead { + path: path.to_path_buf(), + error, + }), + } +} +fn deserialize(bytes: &[u8], engine: &Engine) -> Result { // We used to compress our compiled modules using LZW encoding in the past. // This was removed because it has a negative impact on startup times for // "wasmer run", so all new compiled modules should be saved directly to @@ -148,37 +143,20 @@ fn read_compressed(path: &Path) -> Result, CacheError> { // - ModuleCache::save(): 2.4s, 72MB binary // - ModuleCache::load(): 822ms - let mut leading_bytes = [0_u8; 128]; - f.read_exact(&mut leading_bytes) - .and_then(|_| f.rewind()) - .map_err(|error| CacheError::FileRead { - path: path.to_path_buf(), - error, - })?; - - if wasmer::Artifact::is_deserializable(&leading_bytes) { - // the compiled artifact was saved as-is. Return it. - let mut artifact = Vec::new(); - f.read_to_end(&mut artifact) - .map_err(|error| CacheError::FileRead { - path: path.to_path_buf(), - error, - })?; - - return Ok(artifact); - } + match unsafe { Module::deserialize(engine, bytes) } { + // The happy case + Ok(m) => Ok(m), + Err(wasmer::DeserializeError::Incompatible(_)) => { + let bytes = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8) + .decode(bytes) + .map_err(CacheError::other)?; - // Fall back to LZW decoding + let m = unsafe { Module::deserialize(engine, bytes)? }; - let mut uncompressed = Vec::new(); - let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8); - decoder - .into_stream(&mut uncompressed) - .decode_all(f) - .status - .map_err(CacheError::other)?; - - Ok(uncompressed) + Ok(m) + } + Err(e) => Err(CacheError::Deserialize(e)), + } } #[cfg(test)]