diff --git a/.config/forest.dic b/.config/forest.dic index 28cd3d77edab..04d06fb1666a 100644 --- a/.config/forest.dic +++ b/.config/forest.dic @@ -129,6 +129,9 @@ unrepresentable untrusted URL UUID +v0 +v1 +v2 validator/S varint verifier diff --git a/.github/workflows/forest.yml b/.github/workflows/forest.yml index b4f1e1c54be8..5c5ed1013e88 100644 --- a/.github/workflows/forest.yml +++ b/.github/workflows/forest.yml @@ -281,8 +281,30 @@ jobs: - name: Set permissions run: | chmod +x ~/.cargo/bin/forest* - - name: Snapshot export check - run: ./scripts/tests/calibnet_export_check.sh + - name: Snapshot export check v1 + run: ./scripts/tests/calibnet_export_check.sh v1 + timeout-minutes: ${{ fromJSON(env.SCRIPT_TIMEOUT_MINUTES) }} + calibnet-export-check-v2: + needs: + - build-ubuntu + name: Snapshot export checks v2 + runs-on: ubuntu-24.04 + steps: + - run: lscpu + - uses: actions/cache@v4 + with: + path: "${{ env.FIL_PROOFS_PARAMETER_CACHE }}" + key: proof-params-keys + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: "forest-${{ runner.os }}" + path: ~/.cargo/bin + - name: Set permissions + run: | + chmod +x ~/.cargo/bin/forest* + - name: Snapshot export check v2 + run: ./scripts/tests/calibnet_export_check.sh v2 timeout-minutes: ${{ fromJSON(env.SCRIPT_TIMEOUT_MINUTES) }} calibnet-no-discovery-checks: needs: diff --git a/CHANGELOG.md b/CHANGELOG.md index bc79910429f4..277cfbe61615 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,10 @@ ### Added +- [#5835](https://github.com/ChainSafe/forest/issues/5835) Add `--format` flag to the `forest-cli snapshot export` subcommand. This allows exporting a Filecoin snapshot in v2 format(FRC-0108). + +- [#5835](https://github.com/ChainSafe/forest/issues/5835) Add `forest-tool archive metadata` subcommand for inspecting snapshot metadata of a Filecoin snapshot in v2 format(FRC-0108). + - [#5859](https://github.com/ChainSafe/forest/pull/5859) Added size metrics for zstd frame cache and made max size configurable via `FOREST_ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE` environment variable. - [#4976](https://github.com/ChainSafe/forest/issues/4976) Add support for the `Filecoin.EthSubscribe` and `Filecoin.EthUnsubscribe` API methods to enable subscriptions to Ethereum event types: `heads` and `logs`. @@ -53,7 +57,7 @@ This is a non-mandatory release recommended for all node operators. It includes - [#5739](https://github.com/ChainSafe/forest/issues/5739) Add `--export-mode` flag to the `forest-tool archive sync-bucket` subcommand. This allows exporting and uploading only the required files. -- [#5778](https://github.com/ChainSafe/forest/pull/5778) Feat generate a detailed test report in `api compare` command through `--report-dir` and `--report-mode` +- [#5778](https://github.com/ChainSafe/forest/pull/5778) Feat generate a detailed test report in `api compare` command through `--report-dir` and `--report-mode`. ### Changed diff --git a/scripts/tests/calibnet_export_check.sh b/scripts/tests/calibnet_export_check.sh index 3bbf402b08c8..44163f757bcb 100755 --- a/scripts/tests/calibnet_export_check.sh +++ b/scripts/tests/calibnet_export_check.sh @@ -5,6 +5,8 @@ set -eu +format="${1:-v1}" + source "$(dirname "$0")/harness.sh" forest_init "$@" @@ -12,8 +14,20 @@ forest_init "$@" echo "Cleaning up the initial snapshot" rm --force --verbose ./*.{car,car.zst,sha256sum} -echo "Exporting zstd compressed snapshot" -$FOREST_CLI_PATH snapshot export +echo "Exporting zstd compressed snapshot at genesis" +$FOREST_CLI_PATH snapshot export --tipset 0 --format "$format" + +echo "Exporting zstd compressed snapshot in $format format" +$FOREST_CLI_PATH snapshot export --format "$format" + +$FOREST_CLI_PATH shutdown --force + +for f in *.car.zst; do + echo "Inspecting archive info $f" + $FOREST_TOOL_PATH archive info "$f" + echo "Inspecting archive metadata $f" + $FOREST_TOOL_PATH archive metadata "$f" +done echo "Testing snapshot validity" zstd --test ./*.car.zst @@ -21,15 +35,7 @@ zstd --test ./*.car.zst echo "Verifying snapshot checksum" sha256sum --check ./*.sha256sum -echo "Validating CAR files" -zstd --decompress ./*.car.zst -for f in *.car; do +for f in *.car.zst; do echo "Validating CAR file $f" $FOREST_TOOL_PATH snapshot validate "$f" done - -echo "Exporting zstd compressed snapshot at genesis" -$FOREST_CLI_PATH snapshot export --tipset 0 - -echo "Testing genesis snapshot validity" -zstd --test forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst diff --git a/scripts/tests/harness.sh b/scripts/tests/harness.sh index 2625ee7124fd..dc153f27a54f 100644 --- a/scripts/tests/harness.sh +++ b/scripts/tests/harness.sh @@ -141,7 +141,7 @@ function forest_print_logs_and_metrics { function forest_cleanup { if pkill -0 forest 2>/dev/null; then forest_print_logs_and_metrics - $FOREST_CLI_PATH shutdown --force + $FOREST_CLI_PATH shutdown --force || true timeout 10s sh -c "while pkill -0 forest 2>/dev/null; do sleep 1; done" fi } diff --git a/scripts/tests/snapshot_parity/docker-compose.yml b/scripts/tests/snapshot_parity/docker-compose.yml index a7dac263ab52..10872bdf0af9 100644 --- a/scripts/tests/snapshot_parity/docker-compose.yml +++ b/scripts/tests/snapshot_parity/docker-compose.yml @@ -98,10 +98,10 @@ services: set -euxo pipefail pushd /data/exported # Skip the CAR format line and the "Index size" line (only present in Forest snapshots) - forest-tool archive info forest.car.zst | tail -n +2 | grep -v "Index size" > forest.txt + forest-tool archive info forest.car.zst | grep -v "CAR format" | grep -v "Index size" > forest.txt cat forest.txt # Skip the CAR format line - forest-tool archive info lotus.car | tail -n +2 > lotus.txt + forest-tool archive info lotus.car | grep -v "CAR format" > lotus.txt cat lotus.txt diff forest.txt lotus.txt # Do byte-to-byte comparison diff --git a/src/blocks/chain4u.rs b/src/blocks/chain4u.rs index a955c477f46a..5e7c79b24978 100644 --- a/src/blocks/chain4u.rs +++ b/src/blocks/chain4u.rs @@ -16,7 +16,7 @@ use crate::{ sector::PoStProof, }, }; -use chain4u::header::GENESIS_BLOCK_PARENTS; +use chain4u::header::{FILECOIN_GENESIS_BLOCK, FILECOIN_GENESIS_CID, GENESIS_BLOCK_PARENTS}; use cid::Cid; use fvm_ipld_blockstore::Blockstore; use fvm_ipld_encoding::CborStore; @@ -161,7 +161,13 @@ impl Chain4U { } impl Chain4U { - pub fn with_blockstore(blockstore: T) -> Self { + pub fn with_blockstore(blockstore: T) -> Self + where + T: Blockstore, + { + blockstore + .put_keyed(&FILECOIN_GENESIS_CID, &FILECOIN_GENESIS_BLOCK) + .unwrap(); Self { blockstore, inner: Default::default(), diff --git a/src/blocks/header.rs b/src/blocks/header.rs index 79e3d1f7174b..639633bc9a64 100644 --- a/src/blocks/header.rs +++ b/src/blocks/header.rs @@ -24,18 +24,10 @@ use num::BigInt; use serde::{Deserialize, Serialize}; use serde_tuple::{Deserialize_tuple, Serialize_tuple}; -// See -// and #[cfg(test)] -static FILECOIN_GENESIS_CID: std::sync::LazyLock = std::sync::LazyLock::new(|| { - "bafyreiaqpwbbyjo4a42saasj36kkrpv4tsherf2e7bvezkert2a7dhonoi" - .parse() - .expect("Infallible") -}); - +mod test; #[cfg(test)] -pub static GENESIS_BLOCK_PARENTS: std::sync::LazyLock = - std::sync::LazyLock::new(|| nunny::vec![*FILECOIN_GENESIS_CID].into()); +pub use test::*; #[derive(Deserialize_tuple, Serialize_tuple, Clone, Hash, Eq, PartialEq, Debug)] pub struct RawBlockHeader { @@ -72,30 +64,6 @@ pub struct RawBlockHeader { pub parent_base_fee: TokenAmount, } -#[cfg(test)] -impl Default for RawBlockHeader { - fn default() -> Self { - Self { - parents: GENESIS_BLOCK_PARENTS.clone(), - miner_address: Default::default(), - ticket: Default::default(), - election_proof: Default::default(), - beacon_entries: Default::default(), - winning_post_proof: Default::default(), - weight: Default::default(), - epoch: Default::default(), - state_root: Default::default(), - message_receipts: Default::default(), - messages: Default::default(), - bls_aggregate: Default::default(), - timestamp: Default::default(), - signature: Default::default(), - fork_signal: Default::default(), - parent_base_fee: Default::default(), - } - } -} - impl RawBlockHeader { pub fn cid(&self) -> Cid { self.car_block().expect("CBOR serialization failed").0 @@ -345,15 +313,15 @@ impl<'de> Deserialize<'de> for CachingBlockHeader { #[cfg(test)] mod tests { + use super::*; use crate::beacon::{BeaconEntry, BeaconPoint, BeaconSchedule, mock_beacon::MockBeacon}; + use crate::blocks::{CachingBlockHeader, Error}; use crate::shim::clock::ChainEpoch; use crate::shim::{address::Address, version::NetworkVersion}; use crate::utils::encoding::from_slice_with_fallback; - use fvm_ipld_encoding::to_vec; - - use crate::blocks::{CachingBlockHeader, Error}; - - use super::RawBlockHeader; + use crate::utils::multihash::MultihashCode; + use cid::Cid; + use fvm_ipld_encoding::{DAG_CBOR, to_vec}; impl quickcheck::Arbitrary for CachingBlockHeader { fn arbitrary(g: &mut quickcheck::Gen) -> Self { @@ -415,4 +383,15 @@ mod tests { } } } + + #[test] + fn test_genesis_parent() { + assert_eq!( + Cid::new_v1( + DAG_CBOR, + MultihashCode::Sha2_256.digest(&FILECOIN_GENESIS_BLOCK) + ), + *FILECOIN_GENESIS_CID + ); + } } diff --git a/src/blocks/header/test.rs b/src/blocks/header/test.rs new file mode 100644 index 000000000000..0b57832b0387 --- /dev/null +++ b/src/blocks/header/test.rs @@ -0,0 +1,45 @@ +// Copyright 2019-2025 ChainSafe Systems +// SPDX-License-Identifier: Apache-2.0, MIT + +use super::*; +use cid::Cid; +use std::sync::LazyLock; + +// See +// and +pub static FILECOIN_GENESIS_CID: LazyLock = LazyLock::new(|| { + "bafyreiaqpwbbyjo4a42saasj36kkrpv4tsherf2e7bvezkert2a7dhonoi" + .parse() + .expect("Infallible") +}); + +pub static FILECOIN_GENESIS_BLOCK: LazyLock> = LazyLock::new(|| { + hex::decode("a5684461746574696d6573323031372d30352d30352030313a32373a3531674e6574776f726b6846696c65636f696e65546f6b656e6846696c65636f696e6c546f6b656e416d6f756e7473a36b546f74616c537570706c796d322c3030302c3030302c303030664d696e6572736d312c3430302c3030302c3030306c50726f746f636f6c4c616273a36b446576656c6f706d656e746b3330302c3030302c3030306b46756e6472616973696e676b3230302c3030302c3030306a466f756e646174696f6e6b3130302c3030302c303030674d657373616765784854686973206973207468652047656e6573697320426c6f636b206f66207468652046696c65636f696e20446563656e7472616c697a65642053746f72616765204e6574776f726b2e") + .expect("Infallible") +}); + +pub static GENESIS_BLOCK_PARENTS: LazyLock = + LazyLock::new(|| nunny::vec![*FILECOIN_GENESIS_CID].into()); + +impl Default for RawBlockHeader { + fn default() -> Self { + Self { + parents: GENESIS_BLOCK_PARENTS.clone(), + miner_address: Default::default(), + ticket: Default::default(), + election_proof: Default::default(), + beacon_entries: Default::default(), + winning_post_proof: Default::default(), + weight: Default::default(), + epoch: Default::default(), + state_root: Default::default(), + message_receipts: Default::default(), + messages: Default::default(), + bls_aggregate: Default::default(), + timestamp: Default::default(), + signature: Default::default(), + fork_signal: Default::default(), + parent_base_fee: Default::default(), + } + } +} diff --git a/src/blocks/tipset.rs b/src/blocks/tipset.rs index 1d449079f0b7..a361f5996ef8 100644 --- a/src/blocks/tipset.rs +++ b/src/blocks/tipset.rs @@ -170,6 +170,15 @@ impl From for Tipset { } } +impl From> for Tipset { + fn from(headers: NonEmpty) -> Self { + Self { + headers, + key: OnceLock::new(), + } + } +} + impl PartialEq for Tipset { fn eq(&self, other: &Self) -> bool { self.headers.eq(&other.headers) diff --git a/src/chain/mod.rs b/src/chain/mod.rs index 6658ec45d764..2cb48288f197 100644 --- a/src/chain/mod.rs +++ b/src/chain/mod.rs @@ -1,29 +1,43 @@ // Copyright 2019-2025 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT + +mod snapshot_format; pub mod store; +#[cfg(test)] +mod tests; mod weight; + +pub use self::{snapshot_format::*, store::*, weight::*}; + use crate::blocks::{Tipset, TipsetKey}; use crate::cid_collections::CidHashSet; -use crate::db::car::forest; +use crate::db::car::forest::{self, ForestCarFrame, finalize_frame}; use crate::db::{SettingsStore, SettingsStoreExt}; use crate::ipld::stream_chain; +use crate::utils::db::car_stream::{CarBlock, CarBlockWrite}; use crate::utils::io::{AsyncWriterWithChecksum, Checksum}; +use crate::utils::multihash::MultihashCode; use crate::utils::stream::par_buffer; use anyhow::Context as _; +use cid::Cid; use digest::Digest; +use futures::StreamExt as _; use fvm_ipld_blockstore::Blockstore; +use fvm_ipld_encoding::{DAG_CBOR, IPLD_RAW}; +use multihash_derive::MultihashDigest as _; +use nunny::Vec as NonEmpty; +use std::fs::File; +use std::io::{Seek as _, SeekFrom}; use std::sync::Arc; use tokio::io::{AsyncWrite, AsyncWriteExt, BufWriter}; -pub use self::{store::*, weight::*}; - pub async fn export_from_head( db: &Arc, lookup_depth: ChainEpochDelta, writer: impl AsyncWrite + Unpin, seen: CidHashSet, skip_checksum: bool, -) -> anyhow::Result<(Tipset, Option>), Error> { +) -> anyhow::Result<(Tipset, Option>)> { let head_key = SettingsStoreExt::read_obj::(db, crate::db::setting_keys::HEAD_KEY)? .context("chain head key not found")?; let head_ts = Tipset::load_required(&db, &head_key)?; @@ -31,6 +45,8 @@ pub async fn export_from_head( Ok((head_ts, digest)) } +/// Exports a Filecoin snapshot in v1 format +/// See pub async fn export( db: &Arc, tipset: &Tipset, @@ -38,9 +54,103 @@ pub async fn export( writer: impl AsyncWrite + Unpin, seen: CidHashSet, skip_checksum: bool, -) -> anyhow::Result>, Error> { - let stateroot_lookup_limit = tipset.epoch() - lookup_depth; +) -> anyhow::Result>> { let roots = tipset.key().to_cids(); + export_to_forest_car::( + roots, + None, + db, + tipset, + lookup_depth, + writer, + seen, + skip_checksum, + ) + .await +} + +/// Exports a Filecoin snapshot in v2 format +/// See +pub async fn export_v2( + db: &Arc, + mut f3: Option<(Cid, File)>, + tipset: &Tipset, + lookup_depth: ChainEpochDelta, + writer: impl AsyncWrite + Unpin, + seen: CidHashSet, + skip_checksum: bool, +) -> anyhow::Result>> { + // validate f3 data + if let Some((f3_cid, f3_data)) = &mut f3 { + f3_data.seek(SeekFrom::Start(0))?; + let expected_cid = Cid::new_v1( + IPLD_RAW, + MultihashCode::Blake2b256.digest_byte_stream(f3_data)?, + ); + anyhow::ensure!( + f3_cid == &expected_cid, + "f3 snapshot integrity check failed, actual cid: {f3_cid}, expected cid: {expected_cid}" + ); + } + + let head = tipset.key().to_cids(); + let f3_cid = f3.as_ref().map(|(cid, _)| *cid); + let snap_meta = FilecoinSnapshotMetadata::new_v2(head, f3_cid); + let snap_meta_cbor_encoded = fvm_ipld_encoding::to_vec(&snap_meta)?; + let snap_meta_block = CarBlock { + cid: Cid::new_v1( + DAG_CBOR, + MultihashCode::Blake2b256.digest(&snap_meta_cbor_encoded), + ), + data: snap_meta_cbor_encoded, + }; + let roots = nunny::vec![snap_meta_block.cid]; + let mut prefix_data_frames = vec![{ + let mut encoder = forest::new_encoder(forest::DEFAULT_FOREST_CAR_COMPRESSION_LEVEL)?; + snap_meta_block.write(&mut encoder)?; + anyhow::Ok(( + vec![snap_meta_block.cid], + finalize_frame(forest::DEFAULT_FOREST_CAR_COMPRESSION_LEVEL, &mut encoder)?, + )) + }]; + + if let Some((f3_cid, mut f3_data)) = f3 { + f3_data.seek(SeekFrom::Start(0))?; + prefix_data_frames.push({ + let mut encoder = forest::new_encoder(forest::DEFAULT_FOREST_CAR_COMPRESSION_LEVEL)?; + encoder.write_car_block(f3_cid, f3_data.metadata()?.len() as _, &mut f3_data)?; + anyhow::Ok(( + vec![f3_cid], + finalize_frame(forest::DEFAULT_FOREST_CAR_COMPRESSION_LEVEL, &mut encoder)?, + )) + }); + } + + export_to_forest_car::( + roots, + Some(prefix_data_frames), + db, + tipset, + lookup_depth, + writer, + seen, + skip_checksum, + ) + .await +} + +#[allow(clippy::too_many_arguments)] +async fn export_to_forest_car( + roots: NonEmpty, + prefix_data_frames: Option>>, + db: &Arc, + tipset: &Tipset, + lookup_depth: ChainEpochDelta, + writer: impl AsyncWrite + Unpin, + seen: CidHashSet, + skip_checksum: bool, +) -> anyhow::Result>> { + let stateroot_lookup_limit = tipset.epoch() - lookup_depth; // Wrap writer in optional checksum calculator let mut writer = AsyncWriterWithChecksum::::new(BufWriter::new(writer), !skip_checksum); @@ -61,7 +171,8 @@ pub async fn export( ); // Encode Ipld key-value pairs in zstd frames - let frames = forest::Encoder::compress_stream_default(blocks); + let block_frames = forest::Encoder::compress_stream_default(blocks); + let frames = futures::stream::iter(prefix_data_frames.unwrap_or_default()).chain(block_frames); // Write zstd frames and include a skippable index forest::Encoder::write(&mut writer, roots, frames).await?; diff --git a/src/chain/snapshot_format.rs b/src/chain/snapshot_format.rs new file mode 100644 index 000000000000..e70fd3d9e4d3 --- /dev/null +++ b/src/chain/snapshot_format.rs @@ -0,0 +1,93 @@ +// Copyright 2019-2025 ChainSafe Systems +// SPDX-License-Identifier: Apache-2.0, MIT + +use crate::lotus_json::lotus_json_with_self; +use cid::Cid; +use itertools::Itertools as _; +use num::FromPrimitive as _; +use num_derive::FromPrimitive; +use nunny::Vec as NonEmpty; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Copy, clap::ValueEnum, FromPrimitive, Clone, PartialEq, Eq, JsonSchema)] +#[repr(u64)] +pub enum FilecoinSnapshotVersion { + V1 = 1, + V2 = 2, +} +lotus_json_with_self!(FilecoinSnapshotVersion); + +impl Serialize for FilecoinSnapshotVersion { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_u64(*self as u64) + } +} + +impl<'de> Deserialize<'de> for FilecoinSnapshotVersion { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let i = u64::deserialize(deserializer)?; + match FilecoinSnapshotVersion::from_u64(i) { + Some(v) => Ok(v), + None => Err(serde::de::Error::custom(format!( + "invalid snapshot version {i}" + ))), + } + } +} + +/// Defined in +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "PascalCase")] +pub struct FilecoinSnapshotMetadata { + /// Snapshot version + pub version: FilecoinSnapshotVersion, + /// Chain head tipset key + pub head_tipset_key: NonEmpty, + /// F3 snapshot `CID` + pub f3_data: Option, +} + +impl FilecoinSnapshotMetadata { + pub fn new( + version: FilecoinSnapshotVersion, + head_tipset_key: NonEmpty, + f3_data: Option, + ) -> Self { + Self { + version, + head_tipset_key, + f3_data, + } + } + + pub fn new_v2(head_tipset_key: NonEmpty, f3_data: Option) -> Self { + Self::new(FilecoinSnapshotVersion::V2, head_tipset_key, f3_data) + } +} + +impl std::fmt::Display for FilecoinSnapshotMetadata { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Snapshot version: {}", self.version as u64)?; + let head_tipset_key_string = self + .head_tipset_key + .iter() + .map(Cid::to_string) + .join("\n "); + writeln!(f, "Head Tipset: {head_tipset_key_string}")?; + write!( + f, + "F3 data: {}", + self.f3_data + .map(|c| c.to_string()) + .unwrap_or_else(|| "not found".into()) + )?; + Ok(()) + } +} diff --git a/src/chain/tests.rs b/src/chain/tests.rs new file mode 100644 index 000000000000..ad680853230f --- /dev/null +++ b/src/chain/tests.rs @@ -0,0 +1,107 @@ +// Copyright 2019-2025 ChainSafe Systems +// SPDX-License-Identifier: Apache-2.0, MIT + +use super::*; +use crate::{ + blocks::{CachingBlockHeader, Chain4U, Tipset, TipsetKey, chain4u}, + db::{MemoryDB, car::ForestCar}, + utils::db::CborStoreExt, +}; +use sha2::{Digest as _, Sha256}; + +#[test] +fn test_snapshot_version_cbor_serde() { + assert_eq!( + fvm_ipld_encoding::to_vec(&FilecoinSnapshotVersion::V2), + fvm_ipld_encoding::to_vec(&2_u64) + ); + assert_eq!( + fvm_ipld_encoding::from_slice::( + &fvm_ipld_encoding::to_vec(&2_u64).unwrap() + ) + .unwrap(), + FilecoinSnapshotVersion::V2 + ); +} + +#[tokio::test] +async fn test_export_v1() { + test_export_inner(FilecoinSnapshotVersion::V1) + .await + .unwrap() +} + +#[tokio::test] +async fn test_export_v2() { + test_export_inner(FilecoinSnapshotVersion::V2) + .await + .unwrap() +} + +async fn test_export_inner(version: FilecoinSnapshotVersion) -> anyhow::Result<()> { + let db = Arc::new(MemoryDB::default()); + let c4u = Chain4U::with_blockstore(db.clone()); + chain4u! { + in c4u; // select the context + [genesis] + -> [b_1] + -> [b_2_0, b_2_1] + -> [b_3] + -> [b_4] + -> [b_5_0, b_5_1] + }; + + let head_key_cids = nunny::vec![b_5_0.cid(), b_5_1.cid()]; + let head_key = TipsetKey::from(head_key_cids.clone()); + let head = Tipset::load_required(&db, &head_key)?; + + let mut car_bytes = vec![]; + + let checksum = match version { + FilecoinSnapshotVersion::V1 => { + export::(&db, &head, 0, &mut car_bytes, Default::default(), false).await? + } + FilecoinSnapshotVersion::V2 => { + export_v2::( + &db, + None, + &head, + 0, + &mut car_bytes, + Default::default(), + false, + ) + .await? + } + }; + + assert_eq!(Sha256::digest(&car_bytes), checksum.unwrap()); + + let car = ForestCar::new(car_bytes)?; + + assert_eq!(car.heaviest_tipset()?, head); + + match version { + FilecoinSnapshotVersion::V1 => { + assert_eq!(car.metadata(), &None); + } + FilecoinSnapshotVersion::V2 => { + assert_eq!( + car.metadata(), + &Some(FilecoinSnapshotMetadata { + version, + head_tipset_key: head_key_cids, + f3_data: None, + }) + ); + } + } + + for b in [&genesis, &b_1, &b_2_0, &b_2_1, &b_3, &b_4, &b_5_0, &b_5_1] { + let b_from_car: CachingBlockHeader = car.get_cbor_required(&b.cid())?; + let b_from_db: CachingBlockHeader = db.get_cbor_required(&b.cid())?; + assert_eq!(b_from_car, b_from_db); + } + + Ok(()) +} diff --git a/src/cli/subcommands/snapshot_cmd.rs b/src/cli/subcommands/snapshot_cmd.rs index 6b262c9859c7..4fe9c5b59059 100644 --- a/src/cli/subcommands/snapshot_cmd.rs +++ b/src/cli/subcommands/snapshot_cmd.rs @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0, MIT use super::*; +use crate::chain::FilecoinSnapshotVersion; use crate::chain_sync::SyncConfig; use crate::cli_shared::snapshot::{self, TrustedVendor}; use crate::db::car::forest::new_forest_car_temp_path_in; use crate::networks::calibnet; -use crate::rpc::types::ApiTipsetKey; -use crate::rpc::{self, chain::ChainExportParams, prelude::*}; +use crate::rpc::{self, chain::ForestChainExportParams, prelude::*, types::ApiTipsetKey}; use anyhow::Context as _; use chrono::DateTime; use clap::Subcommand; @@ -38,6 +38,9 @@ pub enum SnapshotCommands { /// How many state-roots to include. Lower limit is 900 for `calibnet` and `mainnet`. #[arg(short, long)] depth: Option, + /// Export snapshot in the experimental v2 format(FRC-0108). + #[arg(long, value_enum, default_value_t = FilecoinSnapshotVersion::V1)] + format: FilecoinSnapshotVersion, }, } @@ -50,6 +53,7 @@ impl SnapshotCommands { dry_run, tipset, depth, + format, } => { let chain_head = ChainHead::call(&client, ()).await?; @@ -85,7 +89,8 @@ impl SnapshotCommands { let output_dir = output_path.parent().context("invalid output path")?; let temp_path = new_forest_car_temp_path_in(output_dir)?; - let params = ChainExportParams { + let params = ForestChainExportParams { + version: format, epoch, recent_roots: depth.unwrap_or(SyncConfig::default().recent_state_roots), output_path: temp_path.to_path_buf(), @@ -131,7 +136,7 @@ impl SnapshotCommands { // Manually construct RpcRequest because snapshot export could // take a few hours on mainnet let hash_result = client - .call(ChainExport::request((params,))?.with_timeout(Duration::MAX)) + .call(ForestChainExport::request((params,))?.with_timeout(Duration::MAX)) .await?; handle.abort(); diff --git a/src/db/car/any.rs b/src/db/car/any.rs index 39dc6063965c..fc271c6fe1d4 100644 --- a/src/db/car/any.rs +++ b/src/db/car/any.rs @@ -10,6 +10,7 @@ use super::{CacheKey, RandomAccessFileReader, ZstdFrameCache}; use crate::blocks::{Tipset, TipsetKey}; +use crate::chain::FilecoinSnapshotMetadata; use crate::utils::io::EitherMmapOrRandomAccessFile; use cid::Cid; use fvm_ipld_blockstore::Blockstore; @@ -50,6 +51,14 @@ impl AnyCar { )) } + pub fn metadata(&self) -> &Option { + match self { + AnyCar::Forest(forest) => forest.metadata(), + AnyCar::Plain(plain) => plain.metadata(), + AnyCar::Memory(mem) => mem.metadata(), + } + } + pub fn heaviest_tipset_key(&self) -> TipsetKey { match self { AnyCar::Forest(forest) => forest.heaviest_tipset_key(), diff --git a/src/db/car/forest.rs b/src/db/car/forest.rs index 3d9db5e12ac7..8f62f08c7ac7 100644 --- a/src/db/car/forest.rs +++ b/src/db/car/forest.rs @@ -48,6 +48,7 @@ use super::{CacheKey, ZstdFrameCache}; use crate::blocks::{Tipset, TipsetKey}; +use crate::chain::FilecoinSnapshotMetadata; use crate::db::car::RandomAccessFileReader; use crate::db::car::plain::write_skip_frame_header_async; use crate::utils::db::car_stream::{CarBlock, CarV1Header}; @@ -57,14 +58,14 @@ use ahash::{HashMap, HashMapExt}; use byteorder::LittleEndian; use bytes::{BufMut as _, Bytes, BytesMut, buf::Writer}; use cid::Cid; -use futures::{Stream, TryStream, TryStreamExt as _}; +use futures::{Stream, TryStreamExt as _}; use fvm_ipld_blockstore::Blockstore; -use fvm_ipld_encoding::to_vec; +use fvm_ipld_encoding::CborStore as _; use nunny::Vec as NonEmpty; use positioned_io::{Cursor, ReadAt, ReadBytesAtExt, SizeCursor}; use std::io::{Seek, SeekFrom}; use std::path::Path; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use std::task::Poll; use std::{ io, @@ -87,6 +88,9 @@ pub const DEFAULT_FOREST_CAR_FRAME_SIZE: usize = 8000_usize.next_power_of_two(); pub const DEFAULT_FOREST_CAR_COMPRESSION_LEVEL: u16 = zstd::DEFAULT_COMPRESSION_LEVEL as _; const ZSTD_SKIP_FRAME_LEN: u64 = 8; +/// `zstd` frame of Forest CAR +pub type ForestCarFrame = (Vec, Bytes); + pub struct ForestCar { // Multiple `ForestCar` structures may share the same cache. The cache key is used to identify // the origin of a cached z-frame. @@ -94,7 +98,8 @@ pub struct ForestCar { indexed: index::Reader>, index_size_bytes: u32, frame_cache: Arc, - roots: NonEmpty, + header: CarV1Header, + metadata: OnceLock>, } impl ForestCar { @@ -113,7 +118,22 @@ impl ForestCar { indexed, index_size_bytes, frame_cache: Arc::new(ZstdFrameCache::default()), - roots: header.roots, + header, + metadata: OnceLock::new(), + }) + } + + pub fn metadata(&self) -> &Option { + self.metadata.get_or_init(|| { + if self.header.roots.len() == super::V2_SNAPSHOT_ROOT_COUNT { + let maybe_metadata_cid = self.header.roots.first(); + if let Ok(Some(metadata)) = + self.get_cbor::(maybe_metadata_cid) + { + return Some(metadata); + } + } + None }) } @@ -145,8 +165,14 @@ impl ForestCar { Ok((header, footer)) } - pub fn roots(&self) -> &NonEmpty { - &self.roots + pub fn head_tipset_key(&self) -> &NonEmpty { + // head tipset key is stored in v2 snapshot metadata + // See + if let Some(metadata) = self.metadata() { + &metadata.head_tipset_key + } else { + &self.header.roots + } } pub fn index_size_bytes(&self) -> u32 { @@ -154,7 +180,7 @@ impl ForestCar { } pub fn heaviest_tipset_key(&self) -> TipsetKey { - TipsetKey::from(self.roots().clone()) + TipsetKey::from(self.head_tipset_key().clone()) } pub fn heaviest_tipset(&self) -> anyhow::Result { @@ -174,7 +200,8 @@ impl ForestCar { }), index_size_bytes: self.index_size_bytes, frame_cache: self.frame_cache, - roots: self.roots, + header: self.header, + metadata: self.metadata, } } @@ -254,7 +281,7 @@ impl Encoder { pub async fn write( mut sink: impl AsyncWrite + Unpin, roots: NonEmpty, - mut stream: impl TryStream, Bytes), Error = anyhow::Error> + Unpin, + mut stream: impl Stream> + Unpin, ) -> anyhow::Result<()> { let mut offset = 0; @@ -263,7 +290,10 @@ impl Encoder { let header = CarV1Header { roots, version: 1 }; let mut header_uvi_frame = BytesMut::new(); - UviBytes::default().encode(Bytes::from(to_vec(&header)?), &mut header_uvi_frame)?; + UviBytes::default().encode( + Bytes::from(fvm_ipld_encoding::to_vec(&header)?), + &mut header_uvi_frame, + )?; header_encoder.write_all(&header_uvi_frame)?; let header_bytes = header_encoder.finish()?.into_inner().freeze(); @@ -295,8 +325,8 @@ impl Encoder { /// `compress_stream` with [`DEFAULT_FOREST_CAR_FRAME_SIZE`] as default frame size and [`DEFAULT_FOREST_CAR_COMPRESSION_LEVEL`] as default compression level. pub fn compress_stream_default( - stream: impl TryStream, - ) -> impl TryStream, Bytes), Error = anyhow::Error> { + stream: impl Stream>, + ) -> impl Stream> { Self::compress_stream( DEFAULT_FOREST_CAR_FRAME_SIZE, DEFAULT_FOREST_CAR_COMPRESSION_LEVEL, @@ -309,8 +339,8 @@ impl Encoder { pub fn compress_stream( zstd_frame_size_tripwire: usize, zstd_compression_level: u16, - stream: impl TryStream, - ) -> impl TryStream, Bytes), Error = anyhow::Error> { + stream: impl Stream>, + ) -> impl Stream> { let mut encoder_store = new_encoder(zstd_compression_level); let mut frame_cids = vec![]; @@ -370,7 +400,7 @@ fn compressed_len(encoder: &zstd::Encoder<'static, Writer>) -> usize { encoder.get_ref().get_ref().len() } -fn finalize_frame( +pub fn finalize_frame( zstd_compression_level: u16, encoder: &mut zstd::Encoder<'static, Writer>, ) -> io::Result { @@ -378,7 +408,7 @@ fn finalize_frame( Ok(prev_encoder.finish()?.into_inner().freeze()) } -fn new_encoder( +pub fn new_encoder( zstd_compression_level: u16, ) -> io::Result>> { zstd::Encoder::new(BytesMut::new().writer(), i32::from(zstd_compression_level)) @@ -456,7 +486,7 @@ mod tests { let roots = nonempty!(blocks.first().cid); let forest_car = ForestCar::new(mk_encoded_car(1024 * 4, 3, roots.clone(), blocks.clone())).unwrap(); - assert_eq!(forest_car.roots(), &roots); + assert_eq!(forest_car.head_tipset_key(), &roots); for block in blocks { assert_eq!(forest_car.get(&block.cid).unwrap(), Some(block.data)); } @@ -478,7 +508,7 @@ mod tests { blocks.clone(), )) .unwrap(); - assert_eq!(forest_car.roots(), &roots); + assert_eq!(forest_car.head_tipset_key(), &roots); for block in blocks { assert_eq!(forest_car.get(&block.cid).unwrap(), Some(block.data)); } diff --git a/src/db/car/mod.rs b/src/db/car/mod.rs index f68528d46823..c7fd4e69df64 100644 --- a/src/db/car/mod.rs +++ b/src/db/car/mod.rs @@ -33,6 +33,9 @@ pub type CacheKey = u64; type FrameOffset = u64; +/// According to FRC-0108, v2 snapshots have exactly one root pointing to metadata +const V2_SNAPSHOT_ROOT_COUNT: usize = 1; + pub static ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE: LazyLock = LazyLock::new(|| { const ENV_KEY: &str = "FOREST_ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE"; if let Ok(value) = std::env::var(ENV_KEY) { diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index fc508f3a3d9e..9e2defcfab7c 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -60,6 +60,7 @@ //! - CARv2 support //! - A wrapper that abstracts over car formats for reading. +use crate::chain::FilecoinSnapshotMetadata; use crate::cid_collections::CidHashMap; use crate::db::PersistentStore; use crate::utils::db::car_stream::{CarV1Header, CarV2Header}; @@ -69,6 +70,7 @@ use crate::{ }; use cid::Cid; use fvm_ipld_blockstore::Blockstore; +use fvm_ipld_encoding::CborStore as _; use integer_encoding::{FixedIntReader, VarIntReader}; use nunny::Vec as NonEmpty; use parking_lot::RwLock; @@ -80,6 +82,7 @@ use std::{ Read, Seek, SeekFrom, }, iter, + sync::OnceLock, }; use tokio::io::{AsyncWrite, AsyncWriteExt}; use tracing::{debug, trace}; @@ -111,6 +114,7 @@ pub struct PlainCar { version: u64, header_v1: CarV1Header, header_v2: Option, + metadata: OnceLock>, } impl PlainCar { @@ -162,13 +166,34 @@ impl PlainCar { version, header_v1, header_v2, + metadata: OnceLock::new(), }) } } } - pub fn roots(&self) -> &NonEmpty { - &self.header_v1.roots + pub fn metadata(&self) -> &Option { + self.metadata.get_or_init(|| { + if self.header_v1.roots.len() == super::V2_SNAPSHOT_ROOT_COUNT { + let maybe_metadata_cid = self.header_v1.roots.first(); + if let Ok(Some(metadata)) = + self.get_cbor::(maybe_metadata_cid) + { + return Some(metadata); + } + } + None + }) + } + + pub fn head_tipset_key(&self) -> &NonEmpty { + // head tipset key is stored in v2 snapshot metadata + // See + if let Some(metadata) = self.metadata() { + &metadata.head_tipset_key + } else { + &self.header_v1.roots + } } pub fn version(&self) -> u64 { @@ -176,7 +201,7 @@ impl PlainCar { } pub fn heaviest_tipset_key(&self) -> TipsetKey { - TipsetKey::from(self.roots().clone()) + TipsetKey::from(self.head_tipset_key().clone()) } pub fn heaviest_tipset(&self) -> anyhow::Result { @@ -196,6 +221,7 @@ impl PlainCar { version: self.version, header_v1: self.header_v1, header_v2: self.header_v2, + metadata: self.metadata, } } } @@ -447,7 +473,7 @@ mod tests { let car_backed = PlainCar::new(car).unwrap(); assert_eq!(car_backed.version(), 1); - assert_eq!(car_backed.roots().len(), 1); + assert_eq!(car_backed.head_tipset_key().len(), 1); assert_eq!(car_backed.cids().len(), 1222); let reference_car = reference(Cursor::new(car)); @@ -470,7 +496,7 @@ mod tests { let car_backed = PlainCar::new(car).unwrap(); assert_eq!(car_backed.version(), 2); - assert_eq!(car_backed.roots().len(), 1); + assert_eq!(car_backed.head_tipset_key().len(), 1); assert_eq!(car_backed.cids().len(), 7153); let reference_car = reference(Cursor::new(car)); diff --git a/src/db/memory.rs b/src/db/memory.rs index 44aadb62f5cb..d091ebc9ba8f 100644 --- a/src/db/memory.rs +++ b/src/db/memory.rs @@ -215,7 +215,7 @@ mod tests { db.export_forest_car(&mut car_db_bytes).await.unwrap(); let car = ForestCar::new(car_db_bytes).unwrap(); - assert_eq!(car.roots(), &nonempty![key1]); + assert_eq!(car.head_tipset_key(), &nonempty![key1]); assert!(car.has(&key1).unwrap()); assert!(car.has(&key2).unwrap()); } diff --git a/src/eth/mod.rs b/src/eth/mod.rs index 4c4b9cdf1f3a..1b119e904ecd 100644 --- a/src/eth/mod.rs +++ b/src/eth/mod.rs @@ -37,5 +37,5 @@ pub const ETH_LEGACY_HOMESTEAD_TX_CHAIN_ID: u64 = 0; /// > Research into Filecoin chain behavior suggests that probabilistic finality /// > generally approaches the intended stability guarantee at, or near, 30 epochs. /// > Although a strictly "finalized" safe recommendation remains 900 epochs. -/// > See +/// > See pub const SAFE_EPOCH_DELAY: i64 = 30; diff --git a/src/rpc/methods/chain.rs b/src/rpc/methods/chain.rs index 15cfee927ce3..1d7a7ae53cc8 100644 --- a/src/rpc/methods/chain.rs +++ b/src/rpc/methods/chain.rs @@ -9,7 +9,7 @@ use types::*; use crate::blocks::RawBlockHeader; use crate::blocks::{Block, CachingBlockHeader, Tipset, TipsetKey}; use crate::chain::index::ResolveNullTipset; -use crate::chain::{ChainStore, HeadChange}; +use crate::chain::{ChainStore, FilecoinSnapshotVersion, HeadChange}; use crate::cid_collections::CidHashSet; use crate::ipld::DfsIter; use crate::lotus_json::{HasLotusJson, LotusJson, lotus_json_with_self}; @@ -299,21 +299,22 @@ impl RpcMethod<1> for ChainPruneSnapshot { } } -pub enum ChainExport {} -impl RpcMethod<1> for ChainExport { - const NAME: &'static str = "Filecoin.ChainExport"; +pub enum ForestChainExport {} +impl RpcMethod<1> for ForestChainExport { + const NAME: &'static str = "Forest.ChainExport"; const PARAM_NAMES: [&'static str; 1] = ["params"]; const API_PATHS: BitFlags = ApiPaths::all(); const PERMISSION: Permission = Permission::Read; - type Params = (ChainExportParams,); + type Params = (ForestChainExportParams,); type Ok = Option; async fn handle( ctx: Ctx, (params,): Self::Params, ) -> Result { - let ChainExportParams { + let ForestChainExportParams { + version, epoch, recent_roots, output_path, @@ -342,27 +343,35 @@ impl RpcMethod<1> for ChainExport { ctx.chain_index() .tipset_by_height(epoch, head, ResolveNullTipset::TakeOlder)?; - match if dry_run { - crate::chain::export::( - &ctx.store_owned(), - &start_ts, - recent_roots, - VoidAsyncWriter, - CidHashSet::default(), - skip_checksum, - ) - .await + let writer = if dry_run { + tokio_util::either::Either::Left(VoidAsyncWriter) } else { - let file = tokio::fs::File::create(&output_path).await?; - crate::chain::export::( - &ctx.store_owned(), - &start_ts, - recent_roots, - file, - CidHashSet::default(), - skip_checksum, - ) - .await + tokio_util::either::Either::Right(tokio::fs::File::create(&output_path).await?) + }; + match match version { + FilecoinSnapshotVersion::V1 => { + crate::chain::export::( + &ctx.store_owned(), + &start_ts, + recent_roots, + writer, + CidHashSet::default(), + skip_checksum, + ) + .await + } + FilecoinSnapshotVersion::V2 => { + crate::chain::export_v2::( + &ctx.store_owned(), + None, + &start_ts, + recent_roots, + writer, + CidHashSet::default(), + skip_checksum, + ) + .await + } } { Ok(checksum_opt) => Ok(checksum_opt.map(|hash| hash.encode_hex())), Err(e) => Err(anyhow::anyhow!(e).into()), @@ -370,6 +379,43 @@ impl RpcMethod<1> for ChainExport { } } +pub enum ChainExport {} +impl RpcMethod<1> for ChainExport { + const NAME: &'static str = "Filecoin.ChainExport"; + const PARAM_NAMES: [&'static str; 1] = ["params"]; + const API_PATHS: BitFlags = ApiPaths::all(); + const PERMISSION: Permission = Permission::Read; + + type Params = (ChainExportParams,); + type Ok = Option; + + async fn handle( + ctx: Ctx, + (ChainExportParams { + epoch, + recent_roots, + output_path, + tipset_keys, + skip_checksum, + dry_run, + },): Self::Params, + ) -> Result { + ForestChainExport::handle( + ctx, + (ForestChainExportParams { + version: FilecoinSnapshotVersion::V1, + epoch, + recent_roots, + output_path, + tipset_keys, + skip_checksum, + dry_run, + },), + ) + .await + } +} + pub enum ChainReadObj {} impl RpcMethod<1> for ChainReadObj { const NAME: &'static str = "Filecoin.ChainReadObj"; @@ -917,6 +963,20 @@ pub struct ApiMessage { lotus_json_with_self!(ApiMessage); +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ForestChainExportParams { + pub version: FilecoinSnapshotVersion, + pub epoch: ChainEpoch, + pub recent_roots: i64, + pub output_path: PathBuf, + #[schemars(with = "LotusJson")] + #[serde(with = "crate::lotus_json")] + pub tipset_keys: ApiTipsetKey, + pub skip_checksum: bool, + pub dry_run: bool, +} +lotus_json_with_self!(ForestChainExportParams); + #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct ChainExportParams { pub epoch: ChainEpoch, diff --git a/src/rpc/mod.rs b/src/rpc/mod.rs index c14634d11850..9236da6474e2 100644 --- a/src/rpc/mod.rs +++ b/src/rpc/mod.rs @@ -80,6 +80,7 @@ macro_rules! for_each_rpc_method { $callback!($crate::rpc::chain::ChainSetHead); $callback!($crate::rpc::chain::ChainStatObj); $callback!($crate::rpc::chain::ChainTipSetWeight); + $callback!($crate::rpc::chain::ForestChainExport); // common vertical $callback!($crate::rpc::common::Session); diff --git a/src/tool/subcommands/api_cmd/test_snapshots_ignored.txt b/src/tool/subcommands/api_cmd/test_snapshots_ignored.txt index a577cfa29abe..95130b2800da 100644 --- a/src/tool/subcommands/api_cmd/test_snapshots_ignored.txt +++ b/src/tool/subcommands/api_cmd/test_snapshots_ignored.txt @@ -80,6 +80,7 @@ Filecoin.WalletSignMessage Filecoin.WalletValidateAddress Filecoin.WalletVerify Filecoin.Web3ClientVersion +Forest.ChainExport Forest.ChainGetMinBaseFee Forest.NetInfo Forest.SnapshotGC diff --git a/src/tool/subcommands/archive_cmd.rs b/src/tool/subcommands/archive_cmd.rs index a94ab544786a..98e27bd5f9fd 100644 --- a/src/tool/subcommands/archive_cmd.rs +++ b/src/tool/subcommands/archive_cmd.rs @@ -27,6 +27,7 @@ //! Additional reading: [`crate::db::car::plain`] use crate::blocks::Tipset; +use crate::chain::FilecoinSnapshotVersion; use crate::chain::{ ChainEpochDelta, index::{ChainIndex, ResolveNullTipset}, @@ -82,7 +83,12 @@ impl ExportMode { pub enum ArchiveCommands { /// Show basic information about an archive. Info { - /// Path to an uncompressed archive (CAR) + /// Path to an archive (`.car` or `.car.zst`). + snapshot: PathBuf, + }, + /// Show FRC-0108 metadata of an Filecoin snapshot archive. + Metadata { + /// Path to an archive (`.car` or `.car.zst`). snapshot: PathBuf, }, /// Trim a snapshot of the chain and write it to `` @@ -171,12 +177,34 @@ impl ArchiveCommands { let variant = store.variant().to_string(); let heaviest = store.heaviest_tipset()?; let index_size_bytes = store.index_size_bytes(); + let snapshot_version = if let Some(metadata) = store.metadata() { + metadata.version + } else { + FilecoinSnapshotVersion::V1 + }; println!( "{}", - ArchiveInfo::from_store(&store, variant, heaviest, index_size_bytes)? + ArchiveInfo::from_store( + &store, + variant, + heaviest, + snapshot_version, + index_size_bytes + )? ); Ok(()) } + Self::Metadata { snapshot } => { + let store = AnyCar::try_from(snapshot.as_path())?; + if let Some(metadata) = store.metadata() { + println!("{metadata}"); + } else { + println!( + "No metadata found (required by v2 snapshot) - this appears to be a v1 snapshot" + ); + } + Ok(()) + } Self::Export { snapshot_files, output_path, @@ -230,29 +258,31 @@ pub struct ArchiveInfo { epoch: ChainEpoch, tipsets: ChainEpoch, messages: ChainEpoch, - root: Tipset, + head: Tipset, + snapshot_version: FilecoinSnapshotVersion, index_size_bytes: Option, } impl std::fmt::Display for ArchiveInfo { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - writeln!(f, "CAR format: {}", self.variant)?; - writeln!(f, "Network: {}", self.network)?; - writeln!(f, "Epoch: {}", self.epoch)?; - writeln!(f, "State-roots: {}", self.epoch - self.tipsets + 1)?; - writeln!(f, "Messages sets: {}", self.epoch - self.messages + 1)?; - let root_cids_string = self - .root + writeln!(f, "CAR format: {}", self.variant)?; + writeln!(f, "Snapshot version: {}", self.snapshot_version as u64)?; + writeln!(f, "Network: {}", self.network)?; + writeln!(f, "Epoch: {}", self.epoch)?; + writeln!(f, "State-roots: {}", self.epoch - self.tipsets + 1)?; + writeln!(f, "Messages sets: {}", self.epoch - self.messages + 1)?; + let head_tipset_key_string = self + .head .cids() .iter() .map(Cid::to_string) - .join("\n "); - write!(f, "Root CIDs: {root_cids_string}")?; + .join("\n "); + write!(f, "Head Tipset: {head_tipset_key_string}")?; if let Some(index_size_bytes) = self.index_size_bytes { writeln!(f)?; write!( f, - "Index size: {}", + "Index size: {}", human_bytes::human_bytes(index_size_bytes) )?; } @@ -267,9 +297,17 @@ impl ArchiveInfo { store: &impl Blockstore, variant: String, heaviest_tipset: Tipset, + snapshot_version: FilecoinSnapshotVersion, index_size_bytes: Option, ) -> anyhow::Result { - Self::from_store_with(store, variant, heaviest_tipset, index_size_bytes, true) + Self::from_store_with( + store, + variant, + heaviest_tipset, + snapshot_version, + index_size_bytes, + true, + ) } // Scan a CAR archive to identify which network it belongs to and how many @@ -279,15 +317,16 @@ impl ArchiveInfo { store: &impl Blockstore, variant: String, heaviest_tipset: Tipset, + snapshot_version: FilecoinSnapshotVersion, index_size_bytes: Option, progress: bool, ) -> anyhow::Result { - let root = heaviest_tipset; - let root_epoch = root.epoch(); + let head = heaviest_tipset; + let root_epoch = head.epoch(); - let tipsets = root.clone().chain(store); + let tipsets = head.clone().chain(store); - let windowed = (std::iter::once(root.clone()).chain(tipsets)).tuple_windows(); + let windowed = std::iter::once(head.clone()).chain(tipsets).tuple_windows(); let mut network: String = "unknown".into(); let mut lowest_stateroot_epoch = root_epoch; @@ -353,7 +392,8 @@ impl ArchiveInfo { epoch: root_epoch, tipsets: lowest_stateroot_epoch, messages: lowest_message_epoch, - root, + head, + snapshot_version, index_size_bytes, }) } @@ -862,8 +902,13 @@ async fn sync_bucket( let store = Arc::new(ManyCar::try_from(snapshot_files)?); let heaviest_tipset = store.heaviest_tipset()?; - let info = - ArchiveInfo::from_store(&store, "ManyCAR".to_string(), heaviest_tipset.clone(), None)?; + let info = ArchiveInfo::from_store( + &store, + "ManyCAR".to_string(), + heaviest_tipset.clone(), + FilecoinSnapshotVersion::V1, + None, + )?; let genesis_timestamp = heaviest_tipset.genesis(&store)?.timestamp; @@ -998,7 +1043,14 @@ mod tests { let variant = store.variant().to_string(); let ts = store.heaviest_tipset().unwrap(); let index_size_bytes = store.index_size_bytes(); - let info = ArchiveInfo::from_store(&store, variant, ts, index_size_bytes).unwrap(); + let info = ArchiveInfo::from_store( + &store, + variant, + ts, + FilecoinSnapshotVersion::V1, + index_size_bytes, + ) + .unwrap(); assert_eq!(info.network, "calibnet"); assert_eq!(info.epoch, 0); } @@ -1009,7 +1061,14 @@ mod tests { let variant = store.variant().to_string(); let ts = store.heaviest_tipset().unwrap(); let index_size_bytes = store.index_size_bytes(); - let info = ArchiveInfo::from_store(&store, variant, ts, index_size_bytes).unwrap(); + let info = ArchiveInfo::from_store( + &store, + variant, + ts, + FilecoinSnapshotVersion::V1, + index_size_bytes, + ) + .unwrap(); assert_eq!(info.network, "mainnet"); assert_eq!(info.epoch, 0); } diff --git a/src/utils/db/car_stream.rs b/src/utils/db/car_stream.rs index 7578e95e59a9..99a5c296b448 100644 --- a/src/utils/db/car_stream.rs +++ b/src/utils/db/car_stream.rs @@ -13,7 +13,7 @@ use integer_encoding::VarInt; use nunny::Vec as NonEmpty; use pin_project_lite::pin_project; use serde::{Deserialize, Serialize}; -use std::io::{self, SeekFrom}; +use std::io::{self, Cursor, Read, SeekFrom, Write}; use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{ @@ -53,15 +53,8 @@ pub struct CarBlock { impl CarBlock { // Write a varint frame containing the cid and the data - pub fn write(&self, mut writer: &mut impl io::Write) -> io::Result<()> { - let frame_length = self.cid.encoded_len() + self.data.len(); - writer.write_all(&frame_length.encode_var_vec())?; - #[allow(clippy::needless_borrows_for_generic_args)] - self.cid - .write_bytes(&mut writer) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - writer.write_all(&self.data)?; - Ok(()) + pub fn write(&self, writer: &mut impl Write) -> io::Result<()> { + writer.write_car_block(self.cid, self.data.len(), &mut Cursor::new(&self.data)) } pub fn from_bytes(bytes: impl Into) -> io::Result { @@ -94,6 +87,31 @@ impl CarBlock { } } +pub trait CarBlockWrite { + fn write_car_block( + &mut self, + cid: Cid, + data_len: usize, + data: &mut impl Read, + ) -> io::Result<()>; +} + +impl CarBlockWrite for T { + fn write_car_block( + &mut self, + cid: Cid, + data_len: usize, + data: &mut impl Read, + ) -> io::Result<()> { + let frame_length = cid.encoded_len() + data_len; + self.write_all(&frame_length.encode_var_vec())?; + cid.write_bytes(&mut *self) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + std::io::copy(data, self)?; + Ok(()) + } +} + pin_project! { /// Stream of CAR blocks. If the input data is compressed with zstd, it will /// automatically be decompressed.