Skip to content

Commit

Permalink
feat: hamt: paranoid validation when reading nodes (#1864)
Browse files Browse the repository at this point in the history
This duplicates some logic we already have in the go hamt implementation but failed to replicate here. The lack of validation here won't cause any issues on the current network as we only read HAMTs we've written, never untrusted HAMTs. However, we should fix this anyways.
  • Loading branch information
Stebalien authored Sep 1, 2023
1 parent c56f541 commit 1845ab4
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 116 deletions.
2 changes: 0 additions & 2 deletions ipld/hamt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ fvm_ipld_blockstore = { version = "0.2", path = "../blockstore" }

[features]
identity = []
# This feature should just be used for testing (ignoring links that don't exist in store)
ignore-dead-links = []

[dev-dependencies]
hex = "0.4.3"
Expand Down
20 changes: 13 additions & 7 deletions ipld/hamt/src/bitfield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ use byteorder::{BigEndian, ByteOrder};
use fvm_ipld_encoding::de::{Deserialize, Deserializer};
use fvm_ipld_encoding::ser::{Serialize, Serializer};
use fvm_ipld_encoding::strict_bytes;
use serde::de::Error;

const MAX_LEN: usize = 4 * 8;

#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub struct Bitfield([u64; 4]);
Expand All @@ -17,7 +20,7 @@ impl Serialize for Bitfield {
where
S: Serializer,
{
let mut v = [0u8; 4 * 8];
let mut v = [0u8; MAX_LEN];
// Big endian ordering, to match go
BigEndian::write_u64(&mut v[..8], self.0[3]);
BigEndian::write_u64(&mut v[8..16], self.0[2]);
Expand All @@ -40,13 +43,16 @@ impl<'de> Deserialize<'de> for Bitfield {
D: Deserializer<'de>,
{
let mut res = Bitfield::zero();
let bytes = strict_bytes::ByteBuf::deserialize(deserializer)?.into_vec();

let mut arr = [0u8; 4 * 8];
let len = bytes.len();
for (old, new) in bytes.iter().zip(arr[(32 - len)..].iter_mut()) {
*new = *old;
let strict_bytes::ByteBuf(bytes) = Deserialize::deserialize(deserializer)?;
if bytes.len() > MAX_LEN {
return Err(Error::invalid_length(
bytes.len(),
&"bitfield length exceeds maximum",
));
}

let mut arr = [0u8; MAX_LEN];
arr[MAX_LEN - bytes.len()..].copy_from_slice(&bytes);
res.0[3] = BigEndian::read_u64(&arr[..8]);
res.0[2] = BigEndian::read_u64(&arr[8..16]);
res.0[1] = BigEndian::read_u64(&arr[16..24]);
Expand Down
32 changes: 12 additions & 20 deletions ipld/hamt/src/hamt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,13 @@ where

/// Lazily instantiate a hamt from this root Cid with a specified parameters.
pub fn load_with_config(cid: &Cid, store: BS, conf: Config) -> Result<Self, Error> {
match store.get_cbor(cid)? {
Some(root) => Ok(Self {
root,
store,
conf,
hash: Default::default(),
flushed_cid: Some(*cid),
}),
None => Err(Error::CidNotFound(cid.to_string())),
}
Ok(Self {
root: Node::load(&conf, &store, cid, 0)?,
store,
conf,
hash: Default::default(),
flushed_cid: Some(*cid),
})
}
/// Lazily instantiate a hamt from this root Cid with a specified bit width.
pub fn load_with_bit_width(cid: &Cid, store: BS, bit_width: u32) -> Result<Self, Error> {
Expand All @@ -139,13 +136,8 @@ where

/// Sets the root based on the Cid of the root node using the Hamt store
pub fn set_root(&mut self, cid: &Cid) -> Result<(), Error> {
match self.store.get_cbor(cid)? {
Some(root) => {
self.root = root;
self.flushed_cid = Some(*cid);
}
None => return Err(Error::CidNotFound(cid.to_string())),
}
self.root = Node::load(&self.conf, &self.store, cid, 0)?;
self.flushed_cid = Some(*cid);

Ok(())
}
Expand Down Expand Up @@ -453,7 +445,7 @@ where

impl<BS, V, K, H, Ver> HamtImpl<BS, V, K, H, Ver>
where
K: DeserializeOwned,
K: DeserializeOwned + PartialOrd,
V: DeserializeOwned,
Ver: Version,
BS: Blockstore,
Expand All @@ -479,7 +471,7 @@ where
/// # anyhow::Ok(())
/// ```
pub fn iter(&self) -> IterImpl<BS, V, K, H, Ver> {
IterImpl::new(&self.store, &self.root)
IterImpl::new(&self.store, &self.root, &self.conf)
}

/// Iterate over the HAMT starting at the given key. This can be used to implement "ranged"
Expand Down Expand Up @@ -530,7 +522,7 @@ where

impl<'a, BS, V, K, H, Ver> IntoIterator for &'a HamtImpl<BS, V, K, H, Ver>
where
K: DeserializeOwned,
K: DeserializeOwned + PartialOrd,
V: DeserializeOwned,
Ver: Version,
BS: Blockstore,
Expand Down
56 changes: 17 additions & 39 deletions ipld/hamt/src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use std::iter::FusedIterator;
use forest_hash_utils::BytesKey;
use fvm_ipld_blockstore::Blockstore;
use fvm_ipld_encoding::de::DeserializeOwned;
use fvm_ipld_encoding::CborStore;

use crate::hash_bits::HashBits;
use crate::node::Node;
Expand All @@ -17,6 +16,7 @@ use crate::{Config, Error, Hash, HashAlgorithm, KeyValuePair, Sha256};
#[doc(hidden)]
pub struct IterImpl<'a, BS, V, K = BytesKey, H = Sha256, Ver = version::V3> {
store: &'a BS,
conf: &'a Config,
stack: Vec<std::slice::Iter<'a, Pointer<K, V, H, Ver>>>,
current: std::slice::Iter<'a, KeyValuePair<K, V>>,
}
Expand All @@ -34,8 +34,9 @@ where
Ver: Version,
BS: Blockstore,
{
pub(crate) fn new(store: &'a BS, root: &'a Node<K, V, H, Ver>) -> Self {
pub(crate) fn new(store: &'a BS, root: &'a Node<K, V, H, Ver>, conf: &'a Config) -> Self {
Self {
conf,
store,
stack: vec![root.pointers.iter()],
current: [].iter(),
Expand All @@ -46,11 +47,11 @@ where
store: &'a BS,
root: &'a Node<K, V, H, Ver>,
key: &Q,
conf: &Config,
conf: &'a Config,
) -> Result<Self, Error>
where
H: HashAlgorithm,
K: Borrow<Q>,
K: Borrow<Q> + PartialOrd,
Q: Hash + Eq,
{
let hashed_key = H::hash(key);
Expand All @@ -62,29 +63,14 @@ where
stack.push(node.pointers[node.index_for_bit_pos(idx)..].iter());
node = match stack.last_mut().unwrap().next() {
Some(p) => match p {
Pointer::Link { cid, cache } => {
if let Some(cached_node) = cache.get() {
cached_node
} else {
let node =
if let Some(node) = store.get_cbor::<Node<K, V, H, Ver>>(cid)? {
node
} else {
#[cfg(not(feature = "ignore-dead-links"))]
return Err(Error::CidNotFound(cid.to_string()));

#[cfg(feature = "ignore-dead-links")]
continue;
};

// Ignore error intentionally, the cache value will always be the same
cache.get_or_init(|| Box::new(node))
}
}
Pointer::Link { cid, cache } => cache.get_or_try_init(|| {
Node::load(conf, store, cid, stack.len() as u32).map(Box::new)
})?,
Pointer::Dirty(node) => node,
Pointer::Values(values) => {
return match values.iter().position(|kv| kv.key().borrow() == key) {
Some(offset) => Ok(Self {
conf,
store,
stack,
current: values[offset..].iter(),
Expand All @@ -103,7 +89,7 @@ impl<'a, K, V, BS, H, Ver> Iterator for IterImpl<'a, BS, V, K, H, Ver>
where
BS: Blockstore,
Ver: Version,
K: DeserializeOwned,
K: DeserializeOwned + PartialOrd,
V: DeserializeOwned,
{
type Item = Result<(&'a K, &'a V), Error>;
Expand All @@ -119,20 +105,12 @@ where
};
match next {
Pointer::Link { cid, cache } => {
let node = if let Some(cached_node) = cache.get() {
cached_node
} else {
let node = match self.store.get_cbor::<Node<K, V, H, Ver>>(cid) {
Ok(Some(node)) => node,
#[cfg(not(feature = "ignore-dead-links"))]
Ok(None) => return Some(Err(Error::CidNotFound(cid.to_string()))),
#[cfg(feature = "ignore-dead-links")]
Ok(None) => continue,
Err(err) => return Some(Err(err.into())),
};

// Ignore error intentionally, the cache value will always be the same
cache.get_or_init(|| Box::new(node))
let node = match cache.get_or_try_init(|| {
Node::load(self.conf, &self.store, cid, self.stack.len() as u32)
.map(Box::new)
}) {
Ok(node) => node,
Err(e) => return Some(Err(e)),
};
self.stack.push(node.pointers.iter())
}
Expand All @@ -150,7 +128,7 @@ where

impl<'a, K, V, BS, H, Ver> FusedIterator for IterImpl<'a, BS, V, K, H, Ver>
where
K: DeserializeOwned,
K: DeserializeOwned + PartialOrd,
V: DeserializeOwned,
Ver: Version,
BS: Blockstore,
Expand Down
Loading

0 comments on commit 1845ab4

Please sign in to comment.