Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/tool/subcommands/benchmark_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use crate::utils::db::car_stream::{CarBlock, CarStream};
use crate::utils::encoding::extract_cids;
use crate::utils::stream::par_buffer;
use anyhow::Context as _;
use cid::Cid;
use clap::Subcommand;
use futures::{StreamExt, TryStreamExt};
use fvm_ipld_encoding::DAG_CBOR;
Expand Down Expand Up @@ -139,7 +138,7 @@ async fn benchmark_car_streaming_inspect(input: Vec<PathBuf>) -> anyhow::Result<
while let Some(block) = s.try_next().await? {
let block: CarBlock = block;
if block.cid.codec() == DAG_CBOR {
let cid_vec: Vec<Cid> = extract_cids(&block.data)?;
let cid_vec = extract_cids(&block.data)?;
let _ = cid_vec.iter().unique().count();
}
sink.write_all(&block.data).await?
Expand Down
29 changes: 23 additions & 6 deletions src/utils/encoding/cid_de_cbor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,25 @@ use cid::Cid;
use cid::serde::BytesToCidVisitor;
use serde::Deserializer;
use serde::de::{self, DeserializeSeed, SeqAccess, Visitor};
use smallvec::SmallVec;
use std::fmt;

pub type SmallCidVec = SmallVec<[Cid; 8]>;

/// Find and extract all the [`Cid`] from a `DAG_CBOR`-encoded blob without employing any
/// intermediate recursive structures, eliminating unnecessary allocations.
pub fn extract_cids(cbor_blob: &[u8]) -> anyhow::Result<Vec<Cid>> {
pub fn extract_cids(cbor_blob: &[u8]) -> anyhow::Result<SmallCidVec> {
let CidVec(v) = from_slice_with_fallback(cbor_blob)?;
Ok(v)
}

/// [`CidVec`] allows for efficient zero-copy de-serialization of `DAG_CBOR`-encoded nodes into a
/// vector of [`Cid`].
struct CidVec(Vec<Cid>);
struct CidVec(SmallCidVec);

/// [`FilterCids`] traverses an [`ipld_core::ipld::Ipld`] tree, appending [`Cid`]s (and only CIDs) to a single vector.
/// This is much faster than constructing an [`ipld_core::ipld::Ipld`] tree and then performing the filtering.
struct FilterCids<'a>(&'a mut Vec<Cid>);
struct FilterCids<'a>(&'a mut SmallCidVec);

impl<'de> DeserializeSeed<'de> for FilterCids<'_> {
type Value = ();
Expand All @@ -30,7 +33,21 @@ impl<'de> DeserializeSeed<'de> for FilterCids<'_> {
where
D: Deserializer<'de>,
{
struct FilterCidsVisitor<'a>(&'a mut Vec<Cid>);
struct IgnoredSeed;

impl<'de> DeserializeSeed<'de> for IgnoredSeed {
type Value = ();

fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_ignored_any(de::IgnoredAny)?;
Ok(())
}
}

struct FilterCidsVisitor<'a>(&'a mut SmallCidVec);

impl<'de> Visitor<'de> for FilterCidsVisitor<'_> {
type Value = ();
Expand All @@ -50,7 +67,7 @@ impl<'de> DeserializeSeed<'de> for FilterCids<'_> {
// This is where recursion happens, we unravel each [`Ipld`] till we reach all
// the nodes.
while visitor
.next_entry_seed(FilterCids(&mut Vec::new()), FilterCids(self.0))?
.next_entry_seed(IgnoredSeed, FilterCids(self.0))?
.is_some()
{
// Nothing to do; inner map values have been into `vec`.
Expand Down Expand Up @@ -171,7 +188,7 @@ impl<'de> de::Deserialize<'de> for CidVec {
where
D: de::Deserializer<'de>,
{
let mut vec = CidVec(Vec::new());
let mut vec = CidVec(SmallCidVec::new());
FilterCids(&mut vec.0).deserialize(deserializer)?;
Ok(vec)
}
Expand Down
Loading