From 863fd919c890608cea49e499e0fcfb81eee529ea Mon Sep 17 00:00:00 2001 From: Christoph Herzog Date: Mon, 22 May 2023 22:11:41 +0200 Subject: [PATCH] Rework Module deserialize methods Rework the deserialize/deserialize_checked methods for module deserialization. * Make deserialize() methods use artifact validation * Make checked methods unsafe again, because loading executable memory is inherently unsafe * Rename methods that skip artifact validation to deserialize_unchecked Closes #3727 --- lib/api/src/engine.rs | 45 ++++++++++++++++--- lib/api/src/js/module.rs | 10 ++--- lib/api/src/jsc/module.rs | 12 ++--- lib/api/src/module.rs | 32 ++++++------- lib/api/src/sys/module.rs | 16 +++---- lib/cache/src/filesystem.rs | 2 +- lib/cli/src/commands/run.rs | 2 +- lib/compiler/src/engine/artifact.rs | 26 +++++++---- lib/compiler/src/engine/inner.rs | 33 +++++++++----- lib/types/src/compilation/symbols.rs | 4 +- lib/types/src/serialize.rs | 8 +++- .../src/runtime/module_cache/filesystem.rs | 3 +- 12 files changed, 124 insertions(+), 69 deletions(-) diff --git a/lib/api/src/engine.rs b/lib/api/src/engine.rs index 308501b5bd6..67fbc45f238 100644 --- a/lib/api/src/engine.rs +++ b/lib/api/src/engine.rs @@ -135,24 +135,57 @@ impl Engine { )) } - #[deprecated(since = "3.2.0")] #[cfg(all(feature = "sys", not(target_arch = "wasm32")))] - /// Deserializes a WebAssembly module + /// Deserializes a WebAssembly module which was previously serialized with + /// `Module::serialize`. + /// + /// NOTE: you should almost always prefer [`Self::deserialize`]. /// /// # Safety + /// See [`Artifact::deserialize_unchecked`]. + pub unsafe fn deserialize_unchecked( + &self, + bytes: &[u8], + ) -> Result, DeserializeError> { + Ok(Arc::new(Artifact::deserialize_unchecked(&self.0, bytes)?)) + } + + #[cfg(all(feature = "sys", not(target_arch = "wasm32")))] + /// Deserializes a WebAssembly module which was previously serialized with + /// `Module::serialize`. /// - /// The serialized content must represent a serialized WebAssembly module. + /// # Safety + /// See [`Artifact::deserialize`]. pub unsafe fn deserialize(&self, bytes: &[u8]) -> Result, DeserializeError> { Ok(Arc::new(Artifact::deserialize(&self.0, bytes)?)) } - #[deprecated(since = "3.2.0")] #[cfg(all(feature = "sys", not(target_arch = "wasm32")))] - /// Deserializes a WebAssembly module from a path + /// Load a serialized WebAssembly module from a file and deserialize it. + /// + /// NOTE: you should almost always prefer [`Self::deserialize_from_file`]. /// /// # Safety + /// See [`Artifact::deserialize_unchecked`]. + pub unsafe fn deserialize_from_file_unchecked( + &self, + file_ref: &Path, + ) -> Result, DeserializeError> { + let mut file = std::fs::File::open(file_ref)?; + let mut buffer = Vec::new(); + // read the whole file + file.read_to_end(&mut buffer)?; + Ok(Arc::new(Artifact::deserialize_unchecked( + &self.0, + buffer.as_slice(), + )?)) + } + + #[cfg(all(feature = "sys", not(target_arch = "wasm32")))] + /// Load a serialized WebAssembly module from a file and deserialize it. /// - /// The file's content must represent a serialized WebAssembly module. + /// # Safety + /// See [`Artifact::deserialize`]. pub unsafe fn deserialize_from_file( &self, file_ref: &Path, diff --git a/lib/api/src/js/module.rs b/lib/api/src/js/module.rs index c38af93c77e..1a202def78f 100644 --- a/lib/api/src/js/module.rs +++ b/lib/api/src/js/module.rs @@ -227,7 +227,7 @@ impl Module { )); } - pub unsafe fn deserialize( + pub unsafe fn deserialize_unchecked( _engine: &impl AsEngineRef, _bytes: impl IntoBytes, ) -> Result { @@ -239,7 +239,7 @@ impl Module { return Err(DeserializeError::Generic("You need to enable the `js-serializable-module` feature flag to deserialize a `Module`".to_string())); } - pub fn deserialize_checked( + pub unsafe fn deserialize( _engine: &impl AsEngineRef, _bytes: impl IntoBytes, ) -> Result { @@ -251,7 +251,7 @@ impl Module { return Err(DeserializeError::Generic("You need to enable the `js-serializable-module` feature flag to deserialize a `Module`".to_string())); } - pub unsafe fn deserialize_from_file( + pub unsafe fn deserialize_from_file_unchecked( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { @@ -259,12 +259,12 @@ impl Module { Self::deserialize(engine, bytes) } - pub fn deserialize_from_file_checked( + pub unsafe fn deserialize_from_file( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { let bytes = std::fs::read(path.as_ref())?; - Self::deserialize_checked(engine, bytes) + Self::deserialize(engine, bytes) } pub fn set_name(&mut self, name: &str) -> bool { diff --git a/lib/api/src/jsc/module.rs b/lib/api/src/jsc/module.rs index 44f3b5fc9c0..4cdd56c74cb 100644 --- a/lib/api/src/jsc/module.rs +++ b/lib/api/src/jsc/module.rs @@ -183,7 +183,7 @@ impl Module { )); } - pub unsafe fn deserialize( + pub unsafe fn deserialize_unchecked( _engine: &impl AsEngineRef, _bytes: impl IntoBytes, ) -> Result { @@ -191,27 +191,27 @@ impl Module { .map_err(|e| DeserializeError::Compiler(e)); } - pub fn deserialize_checked( + pub unsafe fn deserialize( _engine: &impl AsEngineRef, _bytes: impl IntoBytes, ) -> Result { unimplemented!(); } - pub unsafe fn deserialize_from_file( + pub unsafe fn deserialize_from_file_unchecked( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { let bytes = std::fs::read(path.as_ref())?; - Self::deserialize(engine, bytes) + Self::deserialize_unchecked(engine, bytes) } - pub fn deserialize_from_file_checked( + pub unsafe fn deserialize_from_file( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { let bytes = std::fs::read(path.as_ref())?; - Self::deserialize_checked(engine, bytes) + Self::deserialize(engine, bytes) } pub fn set_name(&mut self, name: &str) -> bool { diff --git a/lib/api/src/module.rs b/lib/api/src/module.rs index ad03a2746fc..253f981e3de 100644 --- a/lib/api/src/module.rs +++ b/lib/api/src/module.rs @@ -219,9 +219,9 @@ impl Module { Ok(()) } - /// Deserializes a serialized Module binary into a `Module`. + /// Deserializes a serialized module binary into a `Module`. /// - /// Note: You should usually prefer the safe [`Module::deserialize_checked`]. + /// Note: You should usually prefer the safer [`Module::deserialize`]. /// /// # Important /// @@ -250,11 +250,13 @@ impl Module { /// # Ok(()) /// # } /// ``` - pub unsafe fn deserialize( + pub unsafe fn deserialize_unchecked( engine: &impl AsEngineRef, bytes: impl IntoBytes, ) -> Result { - Ok(Self(module_imp::Module::deserialize(engine, bytes)?)) + Ok(Self(module_imp::Module::deserialize_unchecked( + engine, bytes, + )?)) } /// Deserializes a serialized Module binary into a `Module`. @@ -272,17 +274,15 @@ impl Module { /// # use wasmer::*; /// # fn main() -> anyhow::Result<()> { /// # let mut store = Store::default(); - /// let module = Module::deserialize_checked(&store, serialized_data)?; + /// let module = Module::deserialize(&store, serialized_data)?; /// # Ok(()) /// # } /// ``` - pub fn deserialize_checked( + pub unsafe fn deserialize( engine: &impl AsEngineRef, bytes: impl IntoBytes, ) -> Result { - Ok(Self(module_imp::Module::deserialize_checked( - engine, bytes, - )?)) + Ok(Self(module_imp::Module::deserialize(engine, bytes)?)) } /// Deserializes a a serialized Module located in a `Path` into a `Module`. @@ -298,11 +298,11 @@ impl Module { /// # Ok(()) /// # } /// ``` - pub fn deserialize_from_file_checked( + pub unsafe fn deserialize_from_file( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { - Ok(Self(module_imp::Module::deserialize_from_file_checked( + Ok(Self(module_imp::Module::deserialize_from_file( engine, path, )?)) } @@ -310,9 +310,11 @@ impl Module { /// Deserializes a a serialized Module located in a `Path` into a `Module`. /// > Note: the module has to be serialized before with the `serialize` method. /// + /// You should usually prefer the safer [`Module::deserialize_from_file`]. + /// /// # Safety /// - /// Please check [`Module::deserialize`]. + /// Please check [`Module::deserialize_unchecked`]. /// /// # Usage /// @@ -320,15 +322,15 @@ impl Module { /// # use wasmer::*; /// # let mut store = Store::default(); /// # fn main() -> anyhow::Result<()> { - /// let module = Module::deserialize_from_file(&store, path)?; + /// let module = Module::deserialize_from_file_unchecked(&store, path)?; /// # Ok(()) /// # } /// ``` - pub unsafe fn deserialize_from_file( + pub unsafe fn deserialize_from_file_unchecked( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { - Ok(Self(module_imp::Module::deserialize_from_file( + Ok(Self(module_imp::Module::deserialize_from_file_unchecked( engine, path, )?)) } diff --git a/lib/api/src/sys/module.rs b/lib/api/src/sys/module.rs index 108dbe11e92..287b92ce9b3 100644 --- a/lib/api/src/sys/module.rs +++ b/lib/api/src/sys/module.rs @@ -69,7 +69,7 @@ impl Module { self.artifact.serialize().map(|bytes| bytes.into()) } - pub unsafe fn deserialize( + pub unsafe fn deserialize_unchecked( engine: &impl AsEngineRef, bytes: impl IntoBytes, ) -> Result { @@ -78,20 +78,16 @@ impl Module { Ok(Self::from_artifact(artifact)) } - pub fn deserialize_checked( + pub unsafe fn deserialize( engine: &impl AsEngineRef, bytes: impl IntoBytes, ) -> Result { let bytes = bytes.into_bytes(); - let artifact = engine - .as_engine_ref() - .engine() - .0 - .deserialize_checked(&bytes)?; + let artifact = engine.as_engine_ref().engine().0.deserialize(&bytes)?; Ok(Self::from_artifact(artifact)) } - pub unsafe fn deserialize_from_file( + pub unsafe fn deserialize_from_file_unchecked( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { @@ -103,7 +99,7 @@ impl Module { Ok(Self::from_artifact(artifact)) } - pub fn deserialize_from_file_checked( + pub unsafe fn deserialize_from_file( engine: &impl AsEngineRef, path: impl AsRef, ) -> Result { @@ -111,7 +107,7 @@ impl Module { .as_engine_ref() .engine() .0 - .deserialize_from_file_checked(path.as_ref())?; + .deserialize_from_file(path.as_ref())?; Ok(Self::from_artifact(artifact)) } diff --git a/lib/cache/src/filesystem.rs b/lib/cache/src/filesystem.rs index 2a0841b061c..42f7938caaf 100644 --- a/lib/cache/src/filesystem.rs +++ b/lib/cache/src/filesystem.rs @@ -103,7 +103,7 @@ impl Cache for FileSystemCache { key.to_string() }; let path = self.path.join(filename); - let ret = Module::deserialize_from_file_checked(engine, path.clone()); + let ret = Module::deserialize_from_file(engine, path.clone()); if ret.is_err() { // If an error occurs while deserializing then we can not trust it anymore // so delete the cache file diff --git a/lib/cli/src/commands/run.rs b/lib/cli/src/commands/run.rs index 6b0f047bb72..d88c19e8588 100644 --- a/lib/cli/src/commands/run.rs +++ b/lib/cli/src/commands/run.rs @@ -472,7 +472,7 @@ impl RunWithPathBuf { if wasmer_compiler::Artifact::is_deserializable(&contents) { let engine = wasmer_compiler::EngineBuilder::headless(); let store = Store::new(engine); - let module = Module::deserialize_from_file_checked(&store, &self.path)?; + let module = unsafe { Module::deserialize_from_file(&store, &self.path)? }; return Ok((store, module)); } let (store, compiler_type) = self.store.get_store()?; diff --git a/lib/compiler/src/engine/artifact.rs b/lib/compiler/src/engine/artifact.rs index 054e76c79c5..b5961fc5d29 100644 --- a/lib/compiler/src/engine/artifact.rs +++ b/lib/compiler/src/engine/artifact.rs @@ -143,12 +143,15 @@ impl Artifact { )) } - /// Deserialize a ArtifactBuild + /// Deserialize a serialized artifact. /// /// # Safety - /// This function is unsafe because rkyv reads directly without validating - /// the data. - pub fn deserialize_checked(engine: &Engine, bytes: &[u8]) -> Result { + /// This function loads executable code into memory. + /// You must trust the loaded bytes to be valid for the chosen engine and + /// for the host CPU architecture. + /// In contrast to [`Self::deserialize_unchecked`] the artifact layout is + /// validated, which increases safety. + pub unsafe fn deserialize(engine: &Engine, bytes: &[u8]) -> Result { if !ArtifactBuild::is_deserializable(bytes) { return Err(DeserializeError::Incompatible( "Magic header not found".to_string(), @@ -161,18 +164,25 @@ impl Artifact { let metadata_slice = Self::get_byte_slice(bytes, MetadataHeader::LEN, bytes.len())?; let metadata_slice = Self::get_byte_slice(metadata_slice, 0, metadata_len)?; - let serializable = SerializableModule::deserialize_checked(metadata_slice)?; + let serializable = SerializableModule::deserialize(metadata_slice)?; let artifact = ArtifactBuild::from_serializable(serializable); let mut inner_engine = engine.inner_mut(); Self::from_parts(&mut inner_engine, artifact, engine.target()) .map_err(DeserializeError::Compiler) } - /// Deserialize a ArtifactBuild + /// Deserialize a serialized artifact. + /// + /// NOTE: You should prefer [`Self::deserialize`]. /// /// # Safety - /// This function is unsafe because rkyv reads directly without validating the data. - pub unsafe fn deserialize(engine: &Engine, bytes: &[u8]) -> Result { + /// See [`Self::deserialize`]. + /// In contrast to the above, this function skips artifact layout validation, + /// which increases the risk of loading invalid artifacts. + pub unsafe fn deserialize_unchecked( + engine: &Engine, + bytes: &[u8], + ) -> Result { if !ArtifactBuild::is_deserializable(bytes) { let static_artifact = Self::deserialize_object(engine, bytes); match static_artifact { diff --git a/lib/compiler/src/engine/inner.rs b/lib/compiler/src/engine/inner.rs index 5424966fdd1..85277155684 100644 --- a/lib/compiler/src/engine/inner.rs +++ b/lib/compiler/src/engine/inner.rs @@ -191,44 +191,53 @@ impl Engine { } #[cfg(not(target_arch = "wasm32"))] - /// Deserializes a WebAssembly module + /// Deserializes a WebAssembly module which was previously serialized with + /// [`Module::serialize`]. /// /// # Safety /// - /// The serialized content must represent a serialized WebAssembly module. - pub unsafe fn deserialize(&self, bytes: &[u8]) -> Result, DeserializeError> { - Ok(Arc::new(Artifact::deserialize(self, bytes)?)) + /// See [`Artifact::deserialize_unchecked`]. + pub unsafe fn deserialize_unchecked( + &self, + bytes: &[u8], + ) -> Result, DeserializeError> { + Ok(Arc::new(Artifact::deserialize_unchecked(self, bytes)?)) } - /// Deserializes a WebAssembly module + /// Deserializes a WebAssembly module which was previously serialized with + /// [`Module::serialize`]. + /// + /// # Safety + /// + /// See [`Artifact::deserialize`]. #[cfg(not(target_arch = "wasm32"))] - pub fn deserialize_checked(&self, bytes: &[u8]) -> Result, DeserializeError> { - Ok(Arc::new(Artifact::deserialize_checked(self, bytes)?)) + pub unsafe fn deserialize(&self, bytes: &[u8]) -> Result, DeserializeError> { + Ok(Arc::new(Artifact::deserialize(self, bytes)?)) } /// Deserializes a WebAssembly module from a path #[cfg(not(target_arch = "wasm32"))] - pub fn deserialize_from_file_checked( + pub unsafe fn deserialize_from_file( &self, file_ref: &Path, ) -> Result, DeserializeError> { let contents = std::fs::read(file_ref)?; - self.deserialize_checked(&contents) + self.deserialize(&contents) } /// Deserialize from a file path. /// /// # Safety /// - /// See [`Artifact::deserialize`]. + /// See [`Artifact::deserialize_unchecked`]. #[cfg(not(target_arch = "wasm32"))] - pub unsafe fn deserialize_from_file( + pub unsafe fn deserialize_from_file_unchecked( &self, file_ref: &Path, ) -> Result, DeserializeError> { let file = std::fs::File::open(file_ref)?; let mmap = Mmap::map(&file)?; - self.deserialize(&mmap) + self.deserialize_unchecked(&mmap) } /// A unique identifier for this object. diff --git a/lib/types/src/compilation/symbols.rs b/lib/types/src/compilation/symbols.rs index 1e91e44d14b..4dc63119729 100644 --- a/lib/types/src/compilation/symbols.rs +++ b/lib/types/src/compilation/symbols.rs @@ -110,7 +110,7 @@ impl ModuleMetadata { /// Right now we are not doing any extra work for validation, but /// `rkyv` has an option to do bytecheck on the serialized data before /// serializing (via `rkyv::check_archived_value`). - pub unsafe fn deserialize(metadata_slice: &[u8]) -> Result { + pub unsafe fn deserialize_unchecked(metadata_slice: &[u8]) -> Result { let archived = Self::archive_from_slice(metadata_slice)?; Self::deserialize_from_archive(archived) } @@ -118,7 +118,7 @@ impl ModuleMetadata { /// Deserialize a Module from a slice. /// The slice must have the following format: /// RKYV serialization (any length) + POS (8 bytes) - pub fn deserialize_checked(metadata_slice: &[u8]) -> Result { + pub fn deserialize(metadata_slice: &[u8]) -> Result { let archived = Self::archive_from_slice_checked(metadata_slice)?; Self::deserialize_from_archive(archived) } diff --git a/lib/types/src/serialize.rs b/lib/types/src/serialize.rs index 4528b9b1c2a..9eb6c28b2f6 100644 --- a/lib/types/src/serialize.rs +++ b/lib/types/src/serialize.rs @@ -94,7 +94,7 @@ impl SerializableModule { /// Right now we are not doing any extra work for validation, but /// `rkyv` has an option to do bytecheck on the serialized data before /// serializing (via `rkyv::check_archived_value`). - pub unsafe fn deserialize(metadata_slice: &[u8]) -> Result { + pub unsafe fn deserialize_unchecked(metadata_slice: &[u8]) -> Result { let archived = Self::archive_from_slice(metadata_slice)?; Self::deserialize_from_archive(archived) } @@ -104,7 +104,11 @@ impl SerializableModule { /// RKYV serialization (any length) + POS (8 bytes) /// /// Unlike [`Self::deserialize`], this function will validate the data. - pub fn deserialize_checked(metadata_slice: &[u8]) -> Result { + /// + /// # Safety + /// Unsafe because it loads executable code into memory. + /// The loaded bytes must be trusted. + pub unsafe fn deserialize(metadata_slice: &[u8]) -> Result { let archived = Self::archive_from_slice_checked(metadata_slice)?; Self::deserialize_from_archive(archived) } diff --git a/lib/wasi/src/runtime/module_cache/filesystem.rs b/lib/wasi/src/runtime/module_cache/filesystem.rs index 81ae8f823f0..9cabf9bd2d6 100644 --- a/lib/wasi/src/runtime/module_cache/filesystem.rs +++ b/lib/wasi/src/runtime/module_cache/filesystem.rs @@ -44,7 +44,8 @@ impl ModuleCache for FileSystemCache { let uncompressed = read_compressed(&path)?; - match Module::deserialize_checked(&engine, &uncompressed) { + let res = unsafe { Module::deserialize(&engine, &uncompressed) }; + match res { Ok(m) => Ok(m), Err(e) => { tracing::debug!(