Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up the module cache 6x by removing LZW compression #4031

Merged
merged 2 commits into from
Jun 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 74 additions & 42 deletions lib/wasix/src/runtime/module_cache/filesystem.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::path::{Path, PathBuf};
use std::{
io::{BufWriter, Write},
path::{Path, PathBuf},
};

use tempfile::NamedTempFile;
use wasmer::{Engine, Module};
Expand Down Expand Up @@ -43,10 +46,9 @@ impl ModuleCache for FileSystemCache {
// background.
// https://github.com/wasmerio/wasmer/issues/3851

let uncompressed = read_compressed(&path)?;
let bytes = read_file(&path)?;

let res = unsafe { Module::deserialize(&engine, uncompressed) };
match res {
match deserialize(&bytes, engine) {
Ok(m) => {
tracing::debug!("Cache hit!");
Ok(m)
Expand All @@ -68,7 +70,7 @@ impl ModuleCache for FileSystemCache {
);
}

Err(CacheError::Deserialize(e))
Err(e)
}
}
}
Expand Down Expand Up @@ -101,58 +103,64 @@ impl ModuleCache for FileSystemCache {

// Note: We save to a temporary file and persist() it at the end so
// concurrent readers won't see a partially written module.
let mut f = NamedTempFile::new_in(parent).map_err(CacheError::other)?;
let mut temp = NamedTempFile::new_in(parent).map_err(CacheError::other)?;
let serialized = module.serialize()?;

if let Err(e) = save_compressed(&mut f, &serialized) {
return Err(CacheError::FileWrite { path, error: e });
if let Err(error) = BufWriter::new(&mut temp).write_all(&serialized) {
return Err(CacheError::FileWrite { path, error });
}

f.persist(&path).map_err(CacheError::other)?;
temp.persist(&path).map_err(CacheError::other)?;

Ok(())
}
}

fn save_compressed(writer: impl std::io::Write, data: &[u8]) -> Result<(), std::io::Error> {
let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8);
encoder
.into_stream(writer)
.encode_all(std::io::Cursor::new(data))
.status?;

Ok(())
fn read_file(path: &Path) -> Result<Vec<u8>, CacheError> {
match std::fs::read(path) {
Ok(bytes) => Ok(bytes),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Err(CacheError::NotFound),
Err(error) => Err(CacheError::FileRead {
path: path.to_path_buf(),
error,
}),
}
}

fn read_compressed(path: &Path) -> Result<Vec<u8>, CacheError> {
let compressed = match std::fs::read(path) {
Ok(bytes) => bytes,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Err(CacheError::NotFound);
fn deserialize(bytes: &[u8], engine: &Engine) -> Result<Module, CacheError> {
// We used to compress our compiled modules using LZW encoding in the past.
// This was removed because it has a negative impact on startup times for
// "wasmer run", so all new compiled modules should be saved directly to
// disk.
//
// For perspective, compiling php.wasm with cranelift took about 4.75
// seconds on a M1 Mac.
//
// Without LZW compression:
// - ModuleCache::save(): 408ms, 142MB binary
// - ModuleCache::load(): 155ms
// With LZW compression:
// - ModuleCache::save(): 2.4s, 72MB binary
// - ModuleCache::load(): 822ms

match unsafe { Module::deserialize(engine, bytes) } {
// The happy case
Ok(m) => Ok(m),
Err(wasmer::DeserializeError::Incompatible(_)) => {
let bytes = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8)
.decode(bytes)
.map_err(CacheError::other)?;

let m = unsafe { Module::deserialize(engine, bytes)? };

Ok(m)
}
Err(error) => {
return Err(CacheError::FileRead {
path: path.to_path_buf(),
error,
});
}
};

let mut uncompressed = Vec::new();
let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8);
decoder
.into_vec(&mut uncompressed)
.decode_all(&compressed)
.status
.map_err(CacheError::other)?;

Ok(uncompressed)
Err(e) => Err(CacheError::Deserialize(e)),
}
}

#[cfg(test)]
mod tests {
use std::fs::File;

use tempfile::TempDir;

use super::*;
Expand Down Expand Up @@ -218,7 +226,31 @@ mod tests {
let expected_path = cache.path(key, engine.deterministic_id());
std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
let serialized = module.serialize().unwrap();
save_compressed(File::create(&expected_path).unwrap(), &serialized).unwrap();
std::fs::write(&expected_path, &serialized).unwrap();

let module = cache.load(key, &engine).await.unwrap();

let exports: Vec<_> = module
.exports()
.map(|export| export.name().to_string())
.collect();
assert_eq!(exports, ["add"]);
}

/// For backwards compatibility, make sure we can still work with LZW
/// compressed modules.
#[tokio::test]
async fn can_still_load_lzw_compressed_binaries() {
let temp = TempDir::new().unwrap();
let engine = Engine::default();
let module = Module::new(&engine, ADD_WAT).unwrap();
let key = ModuleHash::from_bytes([0; 32]);
let cache = FileSystemCache::new(temp.path());
let expected_path = cache.path(key, engine.deterministic_id());
std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
let serialized = module.serialize().unwrap();
let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8);
std::fs::write(&expected_path, encoder.encode(&serialized).unwrap()).unwrap();

let module = cache.load(key, &engine).await.unwrap();

Expand Down