From 5b429faa5b16fd3a5bf5095ff988ba3ca3d6a38c Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 6 Oct 2023 16:59:32 +0000 Subject: [PATCH 01/44] move tx_lookup hash calculation to db crate --- crates/stages/src/stages/tx_lookup.rs | 53 +---------------- crates/storage/db/src/tables/extensions.rs | 68 ++++++++++++++++++++++ crates/storage/db/src/tables/mod.rs | 1 + 3 files changed, 71 insertions(+), 51 deletions(-) create mode 100644 crates/storage/db/src/tables/extensions.rs diff --git a/crates/stages/src/stages/tx_lookup.rs b/crates/stages/src/stages/tx_lookup.rs index f35f28de450..d3927072712 100644 --- a/crates/stages/src/stages/tx_lookup.rs +++ b/crates/stages/src/stages/tx_lookup.rs @@ -1,23 +1,19 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use itertools::Itertools; use rayon::prelude::*; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, database::Database, tables, transaction::{DbTx, DbTxMut}, - DatabaseError, }; use reth_interfaces::provider::ProviderError; use reth_primitives::{ - keccak256, stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - PruneCheckpoint, PruneModes, PruneSegment, TransactionSignedNoHash, TxNumber, B256, + PruneCheckpoint, PruneModes, PruneSegment, }; use reth_provider::{ BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, }; -use tokio::sync::mpsc; use tracing::*; /// The transaction lookup stage. @@ -90,45 +86,11 @@ impl Stage for TransactionLookupStage { let (tx_range, block_range, is_final_range) = input.next_block_range_with_transaction_threshold(provider, self.commit_threshold)?; let end_block = *block_range.end(); - let tx_range_size = tx_range.clone().count(); debug!(target: "sync::stages::transaction_lookup", ?tx_range, "Updating transaction lookup"); let tx = provider.tx_ref(); - let mut tx_cursor = tx.cursor_read::()?; - let tx_walker = tx_cursor.walk_range(tx_range)?; - - let chunk_size = (tx_range_size / rayon::current_num_threads()).max(1); - let mut channels = Vec::with_capacity(chunk_size); - let mut transaction_count = 0; - - for chunk in &tx_walker.chunks(chunk_size) { - let (tx, rx) = mpsc::unbounded_channel(); - channels.push(rx); - - // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send) - let chunk: Vec<_> = chunk.collect(); - transaction_count += chunk.len(); - - // Spawn the task onto the global rayon pool - // This task will send the results through the channel after it has calculated the hash. - rayon::spawn(move || { - let mut rlp_buf = Vec::with_capacity(128); - for entry in chunk { - rlp_buf.clear(); - let _ = tx.send(calculate_hash(entry, &mut rlp_buf)); - } - }); - } - let mut tx_list = Vec::with_capacity(transaction_count); - - // Iterate over channels and append the tx hashes to be sorted out later - for mut channel in channels { - while let Some(tx) = channel.recv().await { - let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; - tx_list.push((tx_hash, tx_id)); - } - } + let mut tx_list = tables::Transactions::recover_hashes(tx, tx_range)?; // Sort before inserting the reverse lookup for hash -> tx_id. tx_list.par_sort_unstable_by(|txa, txb| txa.0.cmp(&txb.0)); @@ -198,17 +160,6 @@ impl Stage for TransactionLookupStage { } } -/// Calculates the hash of the given transaction -#[inline] -fn calculate_hash( - entry: Result<(TxNumber, TransactionSignedNoHash), DatabaseError>, - rlp_buf: &mut Vec, -) -> Result<(B256, TxNumber), Box> { - let (tx_id, tx) = entry.map_err(|e| Box::new(e.into()))?; - tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false); - Ok((keccak256(rlp_buf), tx_id)) -} - fn stage_checkpoint( provider: &DatabaseProviderRW<'_, &DB>, ) -> Result { diff --git a/crates/storage/db/src/tables/extensions.rs b/crates/storage/db/src/tables/extensions.rs new file mode 100644 index 00000000000..49076023e37 --- /dev/null +++ b/crates/storage/db/src/tables/extensions.rs @@ -0,0 +1,68 @@ +use itertools::Itertools; +use std::{ops::Range, sync::mpsc}; + +use reth_interfaces::{db::DatabaseError, RethError, RethResult}; +use reth_primitives::{keccak256, TransactionSignedNoHash, TxHash, TxNumber, B256}; + +use crate::{abstraction::cursor::DbCursorRO, transaction::DbTx, Transactions}; + +impl Transactions { + /// Recovers transaction hashes by walking through [`tables::Transactions`] table and + /// calculating them in a parallel manner. Returned unsorted. + pub fn recover_hashes<'a, 'b, TX: DbTx<'a>>( + tx: &'b TX, + tx_range: Range, + ) -> RethResult> + where + 'a: 'b, + { + let mut tx_cursor = tx.cursor_read::()?; + let tx_range_size = tx_range.clone().count(); + let tx_walker = tx_cursor.walk_range(tx_range)?; + + let chunk_size = (tx_range_size / rayon::current_num_threads()).max(1); + let mut channels = Vec::with_capacity(chunk_size); + let mut transaction_count = 0; + + for chunk in &tx_walker.chunks(chunk_size) { + let (tx, rx) = mpsc::channel(); + channels.push(rx); + + // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send) + let chunk: Vec<_> = chunk.collect(); + transaction_count += chunk.len(); + + // Spawn the task onto the global rayon pool + // This task will send the results through the channel after it has calculated the hash. + rayon::spawn(move || { + let mut rlp_buf = Vec::with_capacity(128); + for entry in chunk { + rlp_buf.clear(); + let _ = tx.send(calculate_hash(entry, &mut rlp_buf)); + } + }); + } + let mut tx_list = Vec::with_capacity(transaction_count); + + // Iterate over channels and append the tx hashes to be sorted out later + for channel in channels { + while let Ok(tx) = channel.recv() { + let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; + tx_list.push((tx_hash, tx_id)); + } + } + + Ok(tx_list) + } +} + +/// Calculates the hash of the given transaction +#[inline] +fn calculate_hash( + entry: Result<(TxNumber, TransactionSignedNoHash), DatabaseError>, + rlp_buf: &mut Vec, +) -> Result<(B256, TxNumber), Box> { + let (tx_id, tx) = entry.map_err(|e| Box::new(e.into()))?; + tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false); + Ok((keccak256(rlp_buf), tx_id)) +} diff --git a/crates/storage/db/src/tables/mod.rs b/crates/storage/db/src/tables/mod.rs index 7171f137c6b..efd00143c46 100644 --- a/crates/storage/db/src/tables/mod.rs +++ b/crates/storage/db/src/tables/mod.rs @@ -13,6 +13,7 @@ //! TODO(onbjerg): Find appropriate format for this... pub mod codecs; +mod extensions; pub mod models; mod raw; pub(crate) mod utils; From 80904310f267883041bb54b620b28a64c0c5b87e Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 6 Oct 2023 17:01:37 +0000 Subject: [PATCH 02/44] update doc on tx hash calc --- crates/storage/db/src/tables/extensions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/db/src/tables/extensions.rs b/crates/storage/db/src/tables/extensions.rs index 49076023e37..bbf962d7922 100644 --- a/crates/storage/db/src/tables/extensions.rs +++ b/crates/storage/db/src/tables/extensions.rs @@ -44,7 +44,7 @@ impl Transactions { } let mut tx_list = Vec::with_capacity(transaction_count); - // Iterate over channels and append the tx hashes to be sorted out later + // Iterate over channels and append the tx hashes unsorted for channel in channels { while let Ok(tx) = channel.recv() { let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; From 8eaab06af645f9f385ca3b6398676ebec44d30f1 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 20:15:19 +0000 Subject: [PATCH 03/44] use databaseprovider instead --- bin/reth/src/db/snapshots/headers.rs | 11 ++++--- bin/reth/src/db/snapshots/mod.rs | 40 +++++++------------------ crates/snapshot/src/segments/headers.rs | 35 ++++++++++++---------- crates/snapshot/src/segments/mod.rs | 14 +++++---- crates/stages/src/stages/tx_lookup.rs | 2 +- 5 files changed, 45 insertions(+), 57 deletions(-) diff --git a/bin/reth/src/db/snapshots/headers.rs b/bin/reth/src/db/snapshots/headers.rs index 4fc60f3cf6f..d65c383930a 100644 --- a/bin/reth/src/db/snapshots/headers.rs +++ b/bin/reth/src/db/snapshots/headers.rs @@ -2,23 +2,22 @@ use super::{ bench::{bench, BenchKind}, Command, }; -use crate::utils::DbTool; use rand::{seq::SliceRandom, Rng}; -use reth_db::{database::Database, open_db_read_only, table::Decompress, DatabaseEnvRO}; +use reth_db::{database::Database, open_db_read_only, table::Decompress}; use reth_interfaces::db::LogLevel; use reth_nippy_jar::NippyJar; use reth_primitives::{ snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, ChainSpec, Header, SnapshotSegment, }; -use reth_provider::{HeaderProvider, ProviderError, ProviderFactory}; +use reth_provider::{DatabaseProviderRO, HeaderProvider, ProviderError, ProviderFactory}; use reth_snapshot::segments::{get_snapshot_segment_file_name, Headers, Segment}; use std::{path::Path, sync::Arc}; impl Command { - pub(crate) fn generate_headers_snapshot( + pub(crate) fn generate_headers_snapshot( &self, - tool: &DbTool<'_, DatabaseEnvRO>, + provider: &DatabaseProviderRO<'_, DB>, compression: Compression, inclusion_filter: InclusionFilter, phf: PerfectHashingFunction, @@ -31,7 +30,7 @@ impl Command { Filters::WithoutFilters }, ); - segment.snapshot(&tool.db.tx()?, self.from..=(self.from + self.block_interval - 1))?; + segment.snapshot::(&provider, self.from..=(self.from + self.block_interval - 1))?; Ok(()) } diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index afc2b0ce852..b2a4f094f6e 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -1,7 +1,6 @@ -use crate::{db::genesis_value_parser, utils::DbTool}; use clap::Parser; use itertools::Itertools; -use reth_db::open_db_read_only; +use reth_db::{open_db_read_only, DatabaseEnvRO}; use reth_interfaces::db::LogLevel; use reth_nippy_jar::{ compression::{DecoderDictionary, Decompressor}, @@ -11,7 +10,7 @@ use reth_primitives::{ snapshot::{Compression, InclusionFilter, PerfectHashingFunction}, BlockNumber, ChainSpec, SnapshotSegment, }; -use reth_provider::providers::SnapshotProvider; +use reth_provider::{providers::SnapshotProvider, ProviderFactory}; use std::{path::Path, sync::Arc}; mod bench; @@ -20,25 +19,6 @@ mod headers; #[derive(Parser, Debug)] /// Arguments for the `reth db snapshot` command. pub struct Command { - /// The chain this node is running. - /// - /// Possible values are either a built-in chain or the path to a chain specification file. - /// - /// Built-in chains: - /// - mainnet - /// - goerli - /// - sepolia - /// - holesky - #[arg( - long, - value_name = "CHAIN_OR_PATH", - verbatim_doc_comment, - default_value = "mainnet", - value_parser = genesis_value_parser, - global = true, - )] - chain: Arc, - /// Snapshot segments to generate. segments: Vec, @@ -87,17 +67,19 @@ impl Command { { let db = open_db_read_only(db_path, None)?; - let tool = DbTool::new(&db, chain.clone())?; + let factory = ProviderFactory::new(db, chain.clone()); + let provider = factory.provider()?; if !self.only_bench { for ((mode, compression), phf) in all_combinations.clone() { match mode { - SnapshotSegment::Headers => self.generate_headers_snapshot( - &tool, - *compression, - InclusionFilter::Cuckoo, - *phf, - )?, + SnapshotSegment::Headers => self + .generate_headers_snapshot::( + &provider, + *compression, + InclusionFilter::Cuckoo, + *phf, + )?, SnapshotSegment::Transactions => todo!(), SnapshotSegment::Receipts => todo!(), } diff --git a/crates/snapshot/src/segments/headers.rs b/crates/snapshot/src/segments/headers.rs index 7388682ed05..a6d31798675 100644 --- a/crates/snapshot/src/segments/headers.rs +++ b/crates/snapshot/src/segments/headers.rs @@ -1,13 +1,14 @@ use crate::segments::{prepare_jar, Segment}; use reth_db::{ - cursor::DbCursorRO, snapshot::create_snapshot_T1_T2_T3, table::Table, tables, - transaction::DbTx, RawKey, RawTable, + cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1_T2_T3, table::Table, + tables, transaction::DbTx, RawKey, RawTable, }; use reth_interfaces::RethResult; use reth_primitives::{ snapshot::{Compression, Filters}, BlockNumber, SnapshotSegment, }; +use reth_provider::DatabaseProviderRO; use std::ops::RangeInclusive; /// Snapshot segment responsible for [SnapshotSegment::Headers] part of data. @@ -24,13 +25,13 @@ impl Headers { } // Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). - fn dataset_for_compression<'tx, T: Table>( + fn dataset_for_compression>( &self, - tx: &impl DbTx<'tx>, + provider: &DatabaseProviderRO<'_, DB>, range: &RangeInclusive, range_len: usize, ) -> RethResult>> { - let mut cursor = tx.cursor_read::>()?; + let mut cursor = provider.tx_ref().cursor_read::>()?; Ok(cursor .walk_back(Some(RawKey::from(*range.end())))? .take(range_len.min(1000)) @@ -40,14 +41,14 @@ impl Headers { } impl Segment for Headers { - fn snapshot<'tx>( + fn snapshot( &self, - tx: &impl DbTx<'tx>, + provider: &DatabaseProviderRO<'_, DB>, range: RangeInclusive, ) -> RethResult<()> { let range_len = range.clone().count(); - let mut jar = prepare_jar::<3, tables::Headers>( - tx, + let mut jar = prepare_jar::( + provider, SnapshotSegment::Headers, self.filters, self.compression, @@ -55,17 +56,21 @@ impl Segment for Headers { range_len, || { Ok([ - self.dataset_for_compression::(tx, &range, range_len)?, - self.dataset_for_compression::(tx, &range, range_len)?, - self.dataset_for_compression::( - tx, &range, range_len, + self.dataset_for_compression::( + provider, &range, range_len, + )?, + self.dataset_for_compression::( + provider, &range, range_len, + )?, + self.dataset_for_compression::( + provider, &range, range_len, )?, ]) }, )?; // Generate list of hashes for filters & PHF - let mut cursor = tx.cursor_read::>()?; + let mut cursor = provider.tx_ref().cursor_read::>()?; let mut hashes = None; if self.filters.has_filters() { hashes = Some( @@ -82,7 +87,7 @@ impl Segment for Headers { tables::CanonicalHeaders, BlockNumber, >( - tx, + provider.tx_ref(), range, None, // We already prepared the dictionary beforehand diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 94ab254fe9c..394ae02952c 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -4,13 +4,14 @@ mod headers; pub use headers::Headers; -use reth_db::{table::Table, transaction::DbTx}; +use reth_db::{database::Database, table::Table, transaction::DbTx}; use reth_interfaces::RethResult; use reth_nippy_jar::NippyJar; use reth_primitives::{ snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, BlockNumber, SnapshotSegment, }; +use reth_provider::{BlockReader, DatabaseProviderRO, ProviderError}; use std::{ops::RangeInclusive, path::PathBuf}; pub(crate) type Rows = [Vec>; COLUMNS]; @@ -18,16 +19,16 @@ pub(crate) type Rows = [Vec>; COLUMNS]; /// A segment represents a snapshotting of some portion of the data. pub trait Segment { /// Snapshot data using the provided range. - fn snapshot<'tx>( + fn snapshot( &self, - tx: &impl DbTx<'tx>, + provider: &DatabaseProviderRO<'_, DB>, range: RangeInclusive, ) -> RethResult<()>; } /// Returns a [`NippyJar`] according to the desired configuration. -pub(crate) fn prepare_jar<'tx, const COLUMNS: usize, T: Table>( - tx: &impl DbTx<'tx>, +pub(crate) fn prepare_jar( + provider: &DatabaseProviderRO<'_, DB>, segment: SnapshotSegment, filters: Filters, compression: Compression, @@ -54,7 +55,8 @@ pub(crate) fn prepare_jar<'tx, const COLUMNS: usize, T: Table>( }; if let Filters::WithFilters(inclusion_filter, phf) = filters { - let total_rows = (tx.entries::()? - *range.start() as usize).min(range_len); + let total_rows = + (provider.tx_ref().entries::()? - *range.start() as usize).min(range_len); nippy_jar = match inclusion_filter { InclusionFilter::Cuckoo => nippy_jar.with_cuckoo_filter(total_rows), }; diff --git a/crates/stages/src/stages/tx_lookup.rs b/crates/stages/src/stages/tx_lookup.rs index a8cd3307901..bad1ff9ca13 100644 --- a/crates/stages/src/stages/tx_lookup.rs +++ b/crates/stages/src/stages/tx_lookup.rs @@ -9,7 +9,7 @@ use reth_db::{ use reth_interfaces::provider::ProviderError; use reth_primitives::{ stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - PruneCheckpoint, PruneModes, PruneSegment, + PruneCheckpoint, PruneMode, PruneSegment, }; use reth_provider::{ BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, From 95e05741e9fc8e600bf3784b3133ba245a661008 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 20:22:13 +0000 Subject: [PATCH 04/44] add find_transaction_range --- crates/snapshot/src/segments/mod.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 394ae02952c..3230fddf31a 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -24,6 +24,25 @@ pub trait Segment { provider: &DatabaseProviderRO<'_, DB>, range: RangeInclusive, ) -> RethResult<()>; + + /// Finds the transaction range for the given block range. + fn find_transaction_range( + &mut self, + provider: &DatabaseProviderRO<'_, DB>, + block_range: RangeInclusive, + ) -> RethResult> { + let from = provider + .block_body_indices(*block_range.start())? + .ok_or(ProviderError::BlockBodyIndicesNotFound(*block_range.start()))? + .first_tx_num(); + + let to = provider + .block_body_indices(*block_range.end())? + .ok_or(ProviderError::BlockBodyIndicesNotFound(*block_range.end()))? + .last_tx_num(); + + Ok(from..=to) + } } /// Returns a [`NippyJar`] according to the desired configuration. From e03a3c0089c180bd46617847b697a66d49fa5737 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 20:22:38 +0000 Subject: [PATCH 05/44] cargo deps --- Cargo.lock | 2 ++ crates/storage/db/Cargo.toml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e777d246246..70c3a94ddb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5630,6 +5630,7 @@ dependencies = [ "futures", "heapless", "iai", + "itertools 0.11.0", "metrics", "modular-bitfield", "page_size", @@ -5641,6 +5642,7 @@ dependencies = [ "proptest", "proptest-derive", "rand 0.8.5", + "rayon", "reth-codecs", "reth-interfaces", "reth-libmdbx", diff --git a/crates/storage/db/Cargo.toml b/crates/storage/db/Cargo.toml index de96cc5b80f..5d3bf597f38 100644 --- a/crates/storage/db/Cargo.toml +++ b/crates/storage/db/Cargo.toml @@ -45,6 +45,8 @@ parking_lot.workspace = true derive_more = "0.99" eyre = "0.6.8" paste = "1.0" +rayon.workspace = true +itertools.workspace = true # arbitrary utils arbitrary = { workspace = true, features = ["derive"], optional = true } From 16e4457a4852b3b87dec1fb7fa11351fabfb9137 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 23:17:45 +0000 Subject: [PATCH 06/44] add TransactionsProviderExt --- .../src/providers/database/provider.rs | 4 ++- crates/storage/provider/src/traits/mod.rs | 2 +- .../provider/src/traits/transactions.rs | 28 +++++++++++++++++-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index 7beae9b5d15..31179e38c57 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -6,7 +6,7 @@ use crate::{ AccountReader, BlockExecutionWriter, BlockHashReader, BlockNumReader, BlockReader, BlockWriter, Chain, EvmEnvProvider, HashingWriter, HeaderProvider, HistoryWriter, OriginalValuesKnown, ProviderError, PruneCheckpointReader, PruneCheckpointWriter, StageCheckpointReader, - StorageReader, TransactionsProvider, WithdrawalsProvider, + StorageReader, TransactionsProvider, TransactionsProviderExt, WithdrawalsProvider, }; use itertools::{izip, Itertools}; use reth_db::{ @@ -1145,6 +1145,8 @@ impl<'this, TX: DbTx<'this>> BlockReader for DatabaseProvider<'this, TX> { } } +impl<'this, TX: DbTx<'this>> TransactionsProviderExt for DatabaseProvider<'this, TX> {} + impl<'this, TX: DbTx<'this>> TransactionsProvider for DatabaseProvider<'this, TX> { fn transaction_id(&self, tx_hash: TxHash) -> RethResult> { Ok(self.tx.get::(tx_hash)?) diff --git a/crates/storage/provider/src/traits/mod.rs b/crates/storage/provider/src/traits/mod.rs index 47454957718..2f5e5b00373 100644 --- a/crates/storage/provider/src/traits/mod.rs +++ b/crates/storage/provider/src/traits/mod.rs @@ -34,7 +34,7 @@ pub use state::{ }; mod transactions; -pub use transactions::TransactionsProvider; +pub use transactions::{TransactionsProvider, TransactionsProviderExt}; mod withdrawals; pub use withdrawals::WithdrawalsProvider; diff --git a/crates/storage/provider/src/traits/transactions.rs b/crates/storage/provider/src/traits/transactions.rs index 711b46c4f23..754ae5582f5 100644 --- a/crates/storage/provider/src/traits/transactions.rs +++ b/crates/storage/provider/src/traits/transactions.rs @@ -1,10 +1,10 @@ -use crate::BlockNumReader; -use reth_interfaces::RethResult; +use crate::{BlockNumReader, BlockReader}; +use reth_interfaces::{provider::ProviderError, RethResult}; use reth_primitives::{ Address, BlockHashOrNumber, BlockNumber, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, }; -use std::ops::RangeBounds; +use std::ops::{RangeBounds, RangeInclusive}; /// Client trait for fetching [TransactionSigned] related data. #[auto_impl::auto_impl(&, Arc)] @@ -63,3 +63,25 @@ pub trait TransactionsProvider: BlockNumReader + Send + Sync { /// Returns None if the transaction is not found. fn transaction_sender(&self, id: TxNumber) -> RethResult>; } + +/// Client trait for fetching additional [TransactionSigned] related data. +#[auto_impl::auto_impl(&, Arc)] +pub trait TransactionsProviderExt: BlockReader + Send + Sync { + /// Get transactions range by block range. + fn transaction_range_by_block_range( + &self, + block_range: RangeInclusive, + ) -> RethResult> { + let from = self + .block_body_indices(*block_range.start())? + .ok_or(ProviderError::BlockBodyIndicesNotFound(*block_range.start()))? + .first_tx_num(); + + let to = self + .block_body_indices(*block_range.end())? + .ok_or(ProviderError::BlockBodyIndicesNotFound(*block_range.end()))? + .last_tx_num(); + + Ok(from..=to) + } +} From a6edbc4481e5984efc0bf8b6ca6f125e7798ebc8 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 23:19:30 +0000 Subject: [PATCH 07/44] add transactions snapshot segment --- bin/reth/src/db/snapshots/mod.rs | 28 ++- bin/reth/src/db/snapshots/transactions.rs | 179 +++++++++++++++++++ crates/snapshot/src/segments/mod.rs | 25 +-- crates/snapshot/src/segments/transactions.rs | 91 ++++++++++ crates/storage/provider/src/lib.rs | 3 +- 5 files changed, 301 insertions(+), 25 deletions(-) create mode 100644 bin/reth/src/db/snapshots/transactions.rs create mode 100644 crates/snapshot/src/segments/transactions.rs diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index b2a4f094f6e..494113ad901 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -15,6 +15,7 @@ use std::{path::Path, sync::Arc}; mod bench; mod headers; +mod transactions; #[derive(Parser, Debug)] /// Arguments for the `reth db snapshot` command. @@ -80,7 +81,13 @@ impl Command { InclusionFilter::Cuckoo, *phf, )?, - SnapshotSegment::Transactions => todo!(), + SnapshotSegment::Transactions => self + .generate_transactions_snapshot::( + &provider, + *compression, + InclusionFilter::Cuckoo, + *phf, + )?, SnapshotSegment::Receipts => todo!(), } } @@ -98,7 +105,14 @@ impl Command { InclusionFilter::Cuckoo, *phf, )?, - SnapshotSegment::Transactions => todo!(), + SnapshotSegment::Transactions => self.bench_transactions_snapshot( + db_path, + log_level, + chain.clone(), + *compression, + InclusionFilter::Cuckoo, + *phf, + )?, SnapshotSegment::Receipts => todo!(), } } @@ -113,6 +127,7 @@ impl Command { &self, jar: &'a mut NippyJar, dictionaries: &'a mut Option>>, + tx_start: u64, ) -> eyre::Result<(SnapshotProvider<'a>, Vec>)> { let mut decompressors: Vec> = vec![]; if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor_mut() { @@ -122,6 +137,13 @@ impl Command { } } - Ok((SnapshotProvider { jar: &*jar, jar_start_block: self.from }, decompressors)) + Ok(( + SnapshotProvider { + jar: &*jar, + jar_start_block: self.from, + jar_start_transaction: tx_start, + }, + decompressors, + )) } } diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs new file mode 100644 index 00000000000..f6212339a02 --- /dev/null +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -0,0 +1,179 @@ +use super::{ + bench::{bench, BenchKind}, + Command, Compression, PerfectHashingFunction, +}; +use rand::{seq::SliceRandom, Rng}; +use reth_db::{database::Database, open_db_read_only, table::Decompress}; +use reth_interfaces::db::LogLevel; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + snapshot::{Filters, InclusionFilter}, + ChainSpec, SnapshotSegment, TransactionSignedNoHash, +}; +use reth_provider::{ + DatabaseProviderRO, ProviderError, ProviderFactory, TransactionsProvider, + TransactionsProviderExt, +}; +use reth_snapshot::{ + segments, + segments::{get_snapshot_segment_file_name, Segment}, +}; +use std::{path::Path, sync::Arc}; + +impl Command { + pub(crate) fn generate_transactions_snapshot( + &self, + provider: &DatabaseProviderRO<'_, DB>, + compression: Compression, + inclusion_filter: InclusionFilter, + phf: PerfectHashingFunction, + ) -> eyre::Result<()> { + let segment = segments::Transactions::new( + compression, + if self.with_filters { + Filters::WithFilters(inclusion_filter, phf) + } else { + Filters::WithoutFilters + }, + ); + segment.snapshot::(provider, self.from..=(self.from + self.block_interval - 1))?; + + Ok(()) + } + + pub(crate) fn bench_transactions_snapshot( + &self, + db_path: &Path, + log_level: Option, + chain: Arc, + compression: Compression, + inclusion_filter: InclusionFilter, + phf: PerfectHashingFunction, + ) -> eyre::Result<()> { + let filters = if self.with_filters { + Filters::WithFilters(inclusion_filter, phf) + } else { + Filters::WithoutFilters + }; + + let block_range = self.from..=(self.from + self.block_interval - 1); + + let mut rng = rand::thread_rng(); + let mut dictionaries = None; + let mut jar = NippyJar::load_without_header(&get_snapshot_segment_file_name( + SnapshotSegment::Headers, + filters, + compression, + &block_range, + ))?; + + let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) + .provider()? + .transaction_range_by_block_range(block_range)?; + + let mut row_indexes = tx_range.clone().collect::>(); + + let (provider, decompressors) = + self.prepare_jar_provider(&mut jar, &mut dictionaries, *tx_range.start())?; + let mut cursor = if !decompressors.is_empty() { + provider.cursor_with_decompressors(decompressors) + } else { + provider.cursor() + }; + + for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { + bench( + bench_kind, + (open_db_read_only(db_path, log_level)?, chain.clone()), + SnapshotSegment::Transactions, + filters, + compression, + || { + for num in row_indexes.iter() { + TransactionSignedNoHash::decompress( + cursor + .row_by_number_with_cols::<0b1, 1>( + (num - tx_range.start()) as usize, + )? + .ok_or(ProviderError::TransactionNotFound((*num).into()))?[0], + )?; + // TODO: replace with below when eventually SnapshotProvider re-uses cursor + // provider.transaction_by_id(num as + // u64)?.ok_or(ProviderError::TransactionNotFound((*num).into()))?; + } + Ok(()) + }, + |provider| { + for num in row_indexes.iter() { + provider + .transaction_by_id(*num)? + .ok_or(ProviderError::TransactionNotFound((*num).into()))?; + } + Ok(()) + }, + )?; + + // For random walk + row_indexes.shuffle(&mut rng); + } + + // BENCHMARK QUERYING A RANDOM HEADER BY NUMBER + { + let num = row_indexes[rng.gen_range(0..row_indexes.len())]; + bench( + BenchKind::RandomOne, + (open_db_read_only(db_path, log_level)?, chain.clone()), + SnapshotSegment::Transactions, + filters, + compression, + || { + Ok(TransactionSignedNoHash::decompress( + cursor + .row_by_number_with_cols::<0b1, 1>((num - tx_range.start()) as usize)? + .ok_or(ProviderError::TransactionNotFound((num as u64).into()))?[0], + )? + .with_hash()) + }, + |provider| { + Ok(provider + .transaction_by_id(num as u64)? + .ok_or(ProviderError::TransactionNotFound((num as u64).into()))?) + }, + )?; + } + + // BENCHMARK QUERYING A RANDOM HEADER BY HASH + { + let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; + let transaction_hash = + ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) + .transaction_by_id(num)? + .ok_or(ProviderError::TransactionNotFound(num.into()))? + .hash(); + + bench( + BenchKind::RandomHash, + (open_db_read_only(db_path, log_level)?, chain.clone()), + SnapshotSegment::Transactions, + filters, + compression, + || { + let transaction = TransactionSignedNoHash::decompress( + cursor + .row_by_key_with_cols::<0b1, 1>(transaction_hash.as_slice())? + .ok_or(ProviderError::TransactionNotFound(transaction_hash.into()))?[0], + )?; + + // Might be a false positive, so in the real world we have to validate it + Ok(transaction.with_hash()) + }, + |provider| { + Ok(provider + .transaction_by_hash(transaction_hash)? + .ok_or(ProviderError::TransactionNotFound(transaction_hash.into()))?) + }, + )?; + } + Ok(()) + } +} diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 3230fddf31a..e0d8c0f22e5 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -1,7 +1,9 @@ //! Snapshot segment implementations and utilities. -mod headers; +mod transactions; +pub use transactions::Transactions; +mod headers; pub use headers::Headers; use reth_db::{database::Database, table::Table, transaction::DbTx}; @@ -11,7 +13,7 @@ use reth_primitives::{ snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, BlockNumber, SnapshotSegment, }; -use reth_provider::{BlockReader, DatabaseProviderRO, ProviderError}; +use reth_provider::DatabaseProviderRO; use std::{ops::RangeInclusive, path::PathBuf}; pub(crate) type Rows = [Vec>; COLUMNS]; @@ -24,25 +26,6 @@ pub trait Segment { provider: &DatabaseProviderRO<'_, DB>, range: RangeInclusive, ) -> RethResult<()>; - - /// Finds the transaction range for the given block range. - fn find_transaction_range( - &mut self, - provider: &DatabaseProviderRO<'_, DB>, - block_range: RangeInclusive, - ) -> RethResult> { - let from = provider - .block_body_indices(*block_range.start())? - .ok_or(ProviderError::BlockBodyIndicesNotFound(*block_range.start()))? - .first_tx_num(); - - let to = provider - .block_body_indices(*block_range.end())? - .ok_or(ProviderError::BlockBodyIndicesNotFound(*block_range.end()))? - .last_tx_num(); - - Ok(from..=to) - } } /// Returns a [`NippyJar`] according to the desired configuration. diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs new file mode 100644 index 00000000000..ecabcc0e117 --- /dev/null +++ b/crates/snapshot/src/segments/transactions.rs @@ -0,0 +1,91 @@ +use crate::segments::{prepare_jar, Segment}; +use reth_db::{ + cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1, table::Table, tables, + transaction::DbTx, RawKey, RawTable, +}; +use reth_interfaces::RethResult; +use reth_primitives::{ + snapshot::{Compression, Filters}, + BlockNumber, SnapshotSegment, TxNumber, +}; +use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; +use std::ops::RangeInclusive; + +/// Snapshot segment responsible for [SnapshotSegment::Transactions] part of data. +#[derive(Debug)] +pub struct Transactions { + compression: Compression, + filters: Filters, +} + +impl Transactions { + /// Creates new instance of [Transactions] snapshot segment. + pub fn new(compression: Compression, filters: Filters) -> Self { + Self { compression, filters } + } + + // Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). + fn dataset_for_compression<'tx, T: Table>( + &self, + tx: &impl DbTx<'tx>, + range: &RangeInclusive, + range_len: usize, + ) -> RethResult>> { + let mut cursor = tx.cursor_read::>()?; + Ok(cursor + .walk_back(Some(RawKey::from(*range.end())))? + .take(range_len.min(1000)) + .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) + .collect::>()) + } +} + +impl Segment for Transactions { + fn snapshot( + &self, + provider: &DatabaseProviderRO<'_, DB>, + block_range: RangeInclusive, + ) -> RethResult<()> { + let range = provider.transaction_range_by_block_range(block_range)?; + let range_len = range.clone().count(); + + let mut jar = prepare_jar::( + provider, + SnapshotSegment::Transactions, + self.filters, + self.compression, + range.clone(), + range_len, + || { + Ok([self.dataset_for_compression::( + provider.tx_ref(), + &range, + range_len, + )?]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut hashes = None; + if self.filters.has_filters() { + hashes = Some( + tables::Transactions::recover_hashes(provider.tx_ref(), 0..10)? + .into_iter() + .map(|(tx, _)| Ok(tx)), + ); + } + + create_snapshot_T1::( + provider.tx_ref(), + range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + range_len, + &mut jar, + )?; + + Ok(()) + } +} diff --git a/crates/storage/provider/src/lib.rs b/crates/storage/provider/src/lib.rs index 02f0936641f..4585c9cd29b 100644 --- a/crates/storage/provider/src/lib.rs +++ b/crates/storage/provider/src/lib.rs @@ -24,7 +24,8 @@ pub use traits::{ HashingWriter, HeaderProvider, HistoryWriter, PrunableBlockExecutor, PruneCheckpointReader, PruneCheckpointWriter, ReceiptProvider, ReceiptProviderIdExt, StageCheckpointReader, StageCheckpointWriter, StateProvider, StateProviderBox, StateProviderFactory, - StateRootProvider, StorageReader, TransactionsProvider, WithdrawalsProvider, + StateRootProvider, StorageReader, TransactionsProvider, TransactionsProviderExt, + WithdrawalsProvider, }; /// Provider trait implementations. From 765ea2ef2c9a002055131c99b46679abeb2811f5 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 23:19:53 +0000 Subject: [PATCH 08/44] snapshot benches compare results --- bin/reth/src/db/snapshots/bench.rs | 26 ++++++++++++++++---------- bin/reth/src/db/snapshots/headers.rs | 22 ++++++++++------------ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/bin/reth/src/db/snapshots/bench.rs b/bin/reth/src/db/snapshots/bench.rs index edcfe6fa503..47c5ec2fa07 100644 --- a/bin/reth/src/db/snapshots/bench.rs +++ b/bin/reth/src/db/snapshots/bench.rs @@ -4,7 +4,7 @@ use reth_primitives::{ ChainSpec, SnapshotSegment, }; use reth_provider::{DatabaseProviderRO, ProviderFactory}; -use std::{sync::Arc, time::Instant}; +use std::{fmt::Debug, sync::Arc, time::Instant}; #[derive(Debug)] pub(crate) enum BenchKind { @@ -14,7 +14,7 @@ pub(crate) enum BenchKind { RandomHash, } -pub(crate) fn bench( +pub(crate) fn bench( bench_kind: BenchKind, db: (DatabaseEnvRO, Arc), segment: SnapshotSegment, @@ -24,28 +24,34 @@ pub(crate) fn bench( database_method: F2, ) -> eyre::Result<()> where - F1: FnMut() -> eyre::Result<()>, - F2: Fn(DatabaseProviderRO<'_, DatabaseEnvRO>) -> eyre::Result<()>, + F1: FnMut() -> eyre::Result, + F2: Fn(DatabaseProviderRO<'_, DatabaseEnvRO>) -> eyre::Result, + R: Debug + PartialEq, { let (db, chain) = db; println!(); println!("############"); println!("## [{segment:?}] [{compression:?}] [{filters:?}] [{bench_kind:?}]"); - { + let snap_result = { let start = Instant::now(); - snapshot_method()?; + let result = snapshot_method()?; let end = start.elapsed().as_micros(); println!("# snapshot {bench_kind:?} | {end} μs"); - } - { + result + }; + + let db_result = { let factory = ProviderFactory::new(db, chain); let provider = factory.provider()?; let start = Instant::now(); - database_method(provider)?; + let result = database_method(provider)?; let end = start.elapsed().as_micros(); println!("# database {bench_kind:?} | {end} μs"); - } + result + }; + + assert_eq!(snap_result, db_result); Ok(()) } diff --git a/bin/reth/src/db/snapshots/headers.rs b/bin/reth/src/db/snapshots/headers.rs index d65c383930a..422697905c2 100644 --- a/bin/reth/src/db/snapshots/headers.rs +++ b/bin/reth/src/db/snapshots/headers.rs @@ -30,7 +30,7 @@ impl Command { Filters::WithoutFilters }, ); - segment.snapshot::(&provider, self.from..=(self.from + self.block_interval - 1))?; + segment.snapshot::(provider, self.from..=(self.from + self.block_interval - 1))?; Ok(()) } @@ -62,7 +62,8 @@ impl Command { &range, ))?; - let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; + let (provider, decompressors) = + self.prepare_jar_provider(&mut jar, &mut dictionaries, 0)?; let mut cursor = if !decompressors.is_empty() { provider.cursor_with_decompressors(decompressors) } else { @@ -113,18 +114,16 @@ impl Command { filters, compression, || { - Header::decompress( + Ok(Header::decompress( cursor .row_by_number_with_cols::<0b01, 2>((num - self.from) as usize)? .ok_or(ProviderError::HeaderNotFound((num as u64).into()))?[0], - )?; - Ok(()) + )?) }, |provider| { - provider + Ok(provider .header_by_number(num as u64)? - .ok_or(ProviderError::HeaderNotFound((num as u64).into()))?; - Ok(()) + .ok_or(ProviderError::HeaderNotFound((num as u64).into()))?) }, )?; } @@ -153,13 +152,12 @@ impl Command { // Might be a false positive, so in the real world we have to validate it assert_eq!(header.hash_slow(), header_hash); - Ok(()) + Ok(header) }, |provider| { - provider + Ok(provider .header(&header_hash)? - .ok_or(ProviderError::HeaderNotFound(header_hash.into()))?; - Ok(()) + .ok_or(ProviderError::HeaderNotFound(header_hash.into()))?) }, )?; } From 1b41008c1e5536043707fe19442c0029c44d3bd2 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 23:20:13 +0000 Subject: [PATCH 09/44] add TransactionNotFound error --- crates/interfaces/src/provider.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index 68439c0cbb4..4849473be59 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -40,6 +40,9 @@ pub enum ProviderError { /// Thrown we were unable to find a specific block #[error("Block does not exist {0:?}")] BlockNotFound(BlockHashOrNumber), + /// Thrown we were unable to find a specific transaction + #[error("Transaction does not exist {0:?}")] + TransactionNotFound(BlockHashOrNumber), /// Thrown we were unable to find the best block #[error("Best block does not exist")] BestBlockNotFound, From b6789e45717561005b8a023de9e1abd1869dfca3 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 12 Oct 2023 23:20:51 +0000 Subject: [PATCH 10/44] add additional empty provider impls to SnapshotProvider --- .../provider/src/providers/snapshot.rs | 114 +++++++++++++++++- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index ec2c36e6b93..4a550e6840c 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -1,11 +1,14 @@ -use crate::HeaderProvider; +use crate::{BlockHashReader, BlockNumReader, HeaderProvider, TransactionsProvider}; use reth_db::{ table::{Decompress, Table}, HeaderTD, }; use reth_interfaces::{provider::ProviderError, RethResult}; use reth_nippy_jar::{compression::Decompressor, NippyJar, NippyJarCursor}; -use reth_primitives::{BlockHash, BlockNumber, Header, SealedHeader, U256}; +use reth_primitives::{ + Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, SealedHeader, + TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, +}; use std::ops::RangeBounds; /// SnapshotProvider @@ -17,8 +20,10 @@ use std::ops::RangeBounds; pub struct SnapshotProvider<'a> { /// NippyJar pub jar: &'a NippyJar, - /// Starting snapshot block + /// Starting snapshot block if applied. pub jar_start_block: u64, + /// Starting snapshot transaction if applied. + pub jar_start_transaction: u64, } impl<'a> SnapshotProvider<'a> { @@ -101,6 +106,106 @@ impl<'a> HeaderProvider for SnapshotProvider<'a> { } } +impl<'a> BlockHashReader for SnapshotProvider<'a> { + fn block_hash(&self, _number: u64) -> RethResult> { + todo!() + } + + fn canonical_hashes_range( + &self, + _start: BlockNumber, + _end: BlockNumber, + ) -> RethResult> { + todo!() + } +} + +impl<'a> BlockNumReader for SnapshotProvider<'a> { + fn chain_info(&self) -> RethResult { + todo!() + } + + fn best_block_number(&self) -> RethResult { + todo!() + } + + fn last_block_number(&self) -> RethResult { + todo!() + } + + fn block_number(&self, _hash: B256) -> RethResult> { + todo!() + } +} + +impl<'a> TransactionsProvider for SnapshotProvider<'a> { + fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_id(&self, num: TxNumber) -> RethResult> { + TransactionSignedNoHash::decompress( + self.cursor() + .row_by_number_with_cols::<0b1, 1>((num - self.jar_start_transaction) as usize)? + .ok_or(ProviderError::TransactionNotFound(num.into()))?[0], + ) + .map(Into::into) + .map(Some) + .map_err(Into::into) + } + + fn transaction_by_id_no_hash( + &self, + _id: TxNumber, + ) -> RethResult> { + todo!() + } + + fn transaction_by_hash(&self, _hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_hash_with_meta( + &self, + _hash: TxHash, + ) -> RethResult> { + todo!() + } + + fn transaction_block(&self, _id: TxNumber) -> RethResult> { + todo!() + } + + fn transactions_by_block( + &self, + _block_id: BlockHashOrNumber, + ) -> RethResult>> { + todo!() + } + + fn transactions_by_block_range( + &self, + _range: impl RangeBounds, + ) -> RethResult>> { + todo!() + } + + fn senders_by_tx_range(&self, _range: impl RangeBounds) -> RethResult> { + todo!() + } + + fn transactions_by_tx_range( + &self, + _range: impl RangeBounds, + ) -> RethResult> { + todo!() + } + + fn transaction_sender(&self, _id: TxNumber) -> RethResult> { + todo!() + } +} + #[cfg(test)] mod test { use super::*; @@ -197,7 +302,8 @@ mod test { let jar = NippyJar::load_without_header(snap_file.path()).unwrap(); let db_provider = factory.provider().unwrap(); - let snap_provider = SnapshotProvider { jar: &jar, jar_start_block: 0 }; + let snap_provider = + SnapshotProvider { jar: &jar, jar_start_block: 0, jar_start_transaction: 0 }; assert!(!headers.is_empty()); From a90086010da6265a2aa4edefcb11b2ad5afdc530 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 13 Oct 2023 00:08:51 +0000 Subject: [PATCH 11/44] add SegmentHeader to snapshot jars --- bin/reth/src/db/snapshots/headers.rs | 5 ++- bin/reth/src/db/snapshots/mod.rs | 14 ++------- bin/reth/src/db/snapshots/transactions.rs | 5 ++- crates/primitives/src/snapshot/mod.rs | 2 +- crates/primitives/src/snapshot/segment.rs | 29 +++++++++++++++++ crates/snapshot/src/segments/headers.rs | 5 +-- crates/snapshot/src/segments/mod.rs | 22 ++++++------- crates/snapshot/src/segments/transactions.rs | 22 ++++++------- crates/storage/db/src/snapshot.rs | 6 ++-- .../provider/src/providers/snapshot.rs | 31 +++++++++---------- 10 files changed, 81 insertions(+), 60 deletions(-) diff --git a/bin/reth/src/db/snapshots/headers.rs b/bin/reth/src/db/snapshots/headers.rs index 422697905c2..7a9e813566c 100644 --- a/bin/reth/src/db/snapshots/headers.rs +++ b/bin/reth/src/db/snapshots/headers.rs @@ -55,15 +55,14 @@ impl Command { let mut row_indexes = range.clone().collect::>(); let mut rng = rand::thread_rng(); let mut dictionaries = None; - let mut jar = NippyJar::load_without_header(&get_snapshot_segment_file_name( + let mut jar = NippyJar::load(&get_snapshot_segment_file_name( SnapshotSegment::Headers, filters, compression, &range, ))?; - let (provider, decompressors) = - self.prepare_jar_provider(&mut jar, &mut dictionaries, 0)?; + let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; let mut cursor = if !decompressors.is_empty() { provider.cursor_with_decompressors(decompressors) } else { diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index 494113ad901..c335430c50d 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -7,7 +7,7 @@ use reth_nippy_jar::{ NippyJar, }; use reth_primitives::{ - snapshot::{Compression, InclusionFilter, PerfectHashingFunction}, + snapshot::{Compression, InclusionFilter, PerfectHashingFunction, SegmentHeader}, BlockNumber, ChainSpec, SnapshotSegment, }; use reth_provider::{providers::SnapshotProvider, ProviderFactory}; @@ -125,9 +125,8 @@ impl Command { /// [`DecoderDictionary`] and [`Decompressor`] if necessary. fn prepare_jar_provider<'a>( &self, - jar: &'a mut NippyJar, + jar: &'a mut NippyJar, dictionaries: &'a mut Option>>, - tx_start: u64, ) -> eyre::Result<(SnapshotProvider<'a>, Vec>)> { let mut decompressors: Vec> = vec![]; if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor_mut() { @@ -137,13 +136,6 @@ impl Command { } } - Ok(( - SnapshotProvider { - jar: &*jar, - jar_start_block: self.from, - jar_start_transaction: tx_start, - }, - decompressors, - )) + Ok((SnapshotProvider { jar: &*jar }, decompressors)) } } diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index f6212339a02..bd3de4f0aca 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -60,7 +60,7 @@ impl Command { let mut rng = rand::thread_rng(); let mut dictionaries = None; - let mut jar = NippyJar::load_without_header(&get_snapshot_segment_file_name( + let mut jar = NippyJar::load(&get_snapshot_segment_file_name( SnapshotSegment::Headers, filters, compression, @@ -73,8 +73,7 @@ impl Command { let mut row_indexes = tx_range.clone().collect::>(); - let (provider, decompressors) = - self.prepare_jar_provider(&mut jar, &mut dictionaries, *tx_range.start())?; + let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; let mut cursor = if !decompressors.is_empty() { provider.cursor_with_decompressors(decompressors) } else { diff --git a/crates/primitives/src/snapshot/mod.rs b/crates/primitives/src/snapshot/mod.rs index 6355ff0efe5..a61dedfb3cf 100644 --- a/crates/primitives/src/snapshot/mod.rs +++ b/crates/primitives/src/snapshot/mod.rs @@ -6,4 +6,4 @@ mod segment; pub use compression::Compression; pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; -pub use segment::SnapshotSegment; +pub use segment::{SegmentHeader, SnapshotSegment}; diff --git a/crates/primitives/src/snapshot/segment.rs b/crates/primitives/src/snapshot/segment.rs index 8902e500537..d8ae9f2677a 100644 --- a/crates/primitives/src/snapshot/segment.rs +++ b/crates/primitives/src/snapshot/segment.rs @@ -1,4 +1,6 @@ +use crate::{BlockNumber, TxNumber}; use serde::{Deserialize, Serialize}; +use std::ops::RangeInclusive; #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Deserialize, Serialize)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] @@ -11,3 +13,30 @@ pub enum SnapshotSegment { /// Snapshot segment responsible for the `Receipts` table. Receipts, } + +/// A [`NippyJar`] segment header that contains information common to all segments. +#[derive(Debug, Serialize, Deserialize)] +pub struct SegmentHeader { + block_range: RangeInclusive, + tx_range: RangeInclusive, +} + +impl SegmentHeader { + /// Returns [`SegmentHeader`]. + pub fn new( + block_range: RangeInclusive, + tx_range: RangeInclusive, + ) -> Self { + Self { block_range, tx_range } + } + + /// Returns the first block number of the segment. + pub fn block_start(&self) -> BlockNumber { + *self.block_range.start() + } + + /// Returns the first transaction number of the segment. + pub fn tx_start(&self) -> TxNumber { + *self.tx_range.start() + } +} diff --git a/crates/snapshot/src/segments/headers.rs b/crates/snapshot/src/segments/headers.rs index a6d31798675..ff5b275b62f 100644 --- a/crates/snapshot/src/segments/headers.rs +++ b/crates/snapshot/src/segments/headers.rs @@ -1,4 +1,4 @@ -use crate::segments::{prepare_jar, Segment}; +use crate::segments::{prepare_jar, Segment, SegmentHeader}; use reth_db::{ cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1_T2_T3, table::Table, tables, transaction::DbTx, RawKey, RawTable, @@ -47,7 +47,7 @@ impl Segment for Headers { range: RangeInclusive, ) -> RethResult<()> { let range_len = range.clone().count(); - let mut jar = prepare_jar::( + let mut jar = prepare_jar::( provider, SnapshotSegment::Headers, self.filters, @@ -86,6 +86,7 @@ impl Segment for Headers { tables::HeaderTD, tables::CanonicalHeaders, BlockNumber, + SegmentHeader, >( provider.tx_ref(), range, diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index e0d8c0f22e5..677ad15b790 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -6,14 +6,14 @@ pub use transactions::Transactions; mod headers; pub use headers::Headers; -use reth_db::{database::Database, table::Table, transaction::DbTx}; +use reth_db::database::Database; use reth_interfaces::RethResult; use reth_nippy_jar::NippyJar; use reth_primitives::{ - snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, + snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentHeader}, BlockNumber, SnapshotSegment, }; -use reth_provider::DatabaseProviderRO; +use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; use std::{ops::RangeInclusive, path::PathBuf}; pub(crate) type Rows = [Vec>; COLUMNS]; @@ -29,18 +29,20 @@ pub trait Segment { } /// Returns a [`NippyJar`] according to the desired configuration. -pub(crate) fn prepare_jar( +pub(crate) fn prepare_jar( provider: &DatabaseProviderRO<'_, DB>, segment: SnapshotSegment, filters: Filters, compression: Compression, - range: RangeInclusive, - range_len: usize, + block_range: RangeInclusive, + total_rows: usize, prepare_compression: impl Fn() -> RethResult>, -) -> RethResult { - let mut nippy_jar = NippyJar::new_without_header( +) -> RethResult> { + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let mut nippy_jar = NippyJar::new( COLUMNS, - &get_snapshot_segment_file_name(segment, filters, compression, &range), + &get_snapshot_segment_file_name(segment, filters, compression, &block_range), + SegmentHeader::new(block_range, tx_range), ); nippy_jar = match compression { @@ -57,8 +59,6 @@ pub(crate) fn prepare_jar( }; if let Filters::WithFilters(inclusion_filter, phf) = filters { - let total_rows = - (provider.tx_ref().entries::()? - *range.start() as usize).min(range_len); nippy_jar = match inclusion_filter { InclusionFilter::Cuckoo => nippy_jar.with_cuckoo_filter(total_rows), }; diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs index ecabcc0e117..e8cdafcffff 100644 --- a/crates/snapshot/src/segments/transactions.rs +++ b/crates/snapshot/src/segments/transactions.rs @@ -5,7 +5,7 @@ use reth_db::{ }; use reth_interfaces::RethResult; use reth_primitives::{ - snapshot::{Compression, Filters}, + snapshot::{Compression, Filters, SegmentHeader}, BlockNumber, SnapshotSegment, TxNumber, }; use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; @@ -46,21 +46,21 @@ impl Segment for Transactions { provider: &DatabaseProviderRO<'_, DB>, block_range: RangeInclusive, ) -> RethResult<()> { - let range = provider.transaction_range_by_block_range(block_range)?; - let range_len = range.clone().count(); + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range_len = tx_range.clone().count(); - let mut jar = prepare_jar::( + let mut jar = prepare_jar::( provider, SnapshotSegment::Transactions, self.filters, self.compression, - range.clone(), - range_len, + block_range, + tx_range_len, || { Ok([self.dataset_for_compression::( provider.tx_ref(), - &range, - range_len, + &tx_range, + tx_range_len, )?]) }, )?; @@ -75,14 +75,14 @@ impl Segment for Transactions { ); } - create_snapshot_T1::( + create_snapshot_T1::( provider.tx_ref(), - range, + tx_range, None, // We already prepared the dictionary beforehand None::>>>, hashes, - range_len, + tx_range_len, &mut jar, )?; diff --git a/crates/storage/db/src/snapshot.rs b/crates/storage/db/src/snapshot.rs index 9736c3e1ff8..a77ef47dc9d 100644 --- a/crates/storage/db/src/snapshot.rs +++ b/crates/storage/db/src/snapshot.rs @@ -9,6 +9,7 @@ use crate::{ use reth_interfaces::RethResult; use reth_nippy_jar::{ColumnResult, NippyJar, PHFKey}; use reth_tracing::tracing::*; +use serde::{Deserialize, Serialize}; use std::{error::Error as StdError, ops::RangeInclusive}; /// Macro that generates snapshot creation functions that take an arbitratry number of [`Table`] and @@ -34,7 +35,8 @@ macro_rules! generate_snapshot_func { #[allow(non_snake_case)] pub fn []<'tx, $($tbl: Table,)+ - K + K, + H: for<'a> Deserialize<'a> + Send + Serialize + Sync + std::fmt::Debug > ( tx: &impl DbTx<'tx>, @@ -43,7 +45,7 @@ macro_rules! generate_snapshot_func { dict_compression_set: Option>>>, keys: Option>>, row_count: usize, - nippy_jar: &mut NippyJar + nippy_jar: &mut NippyJar ) -> RethResult<()> where K: Key + Copy { diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 4a550e6840c..f97ebe83c46 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -6,11 +6,11 @@ use reth_db::{ use reth_interfaces::{provider::ProviderError, RethResult}; use reth_nippy_jar::{compression::Decompressor, NippyJar, NippyJarCursor}; use reth_primitives::{ - Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, SealedHeader, - TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, + snapshot::SegmentHeader, Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, + SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, + B256, U256, }; use std::ops::RangeBounds; - /// SnapshotProvider /// /// WIP Rudimentary impl just for tests @@ -19,16 +19,12 @@ use std::ops::RangeBounds; #[derive(Debug)] pub struct SnapshotProvider<'a> { /// NippyJar - pub jar: &'a NippyJar, - /// Starting snapshot block if applied. - pub jar_start_block: u64, - /// Starting snapshot transaction if applied. - pub jar_start_transaction: u64, + pub jar: &'a NippyJar, } impl<'a> SnapshotProvider<'a> { /// Creates cursor - pub fn cursor(&self) -> NippyJarCursor<'a> { + pub fn cursor(&self) -> NippyJarCursor<'a, SegmentHeader> { NippyJarCursor::new(self.jar, None).unwrap() } @@ -36,7 +32,7 @@ impl<'a> SnapshotProvider<'a> { pub fn cursor_with_decompressors( &self, decompressors: Vec>, - ) -> NippyJarCursor<'a> { + ) -> NippyJarCursor<'a, SegmentHeader> { NippyJarCursor::new(self.jar, Some(decompressors)).unwrap() } } @@ -62,7 +58,9 @@ impl<'a> HeaderProvider for SnapshotProvider<'a> { fn header_by_number(&self, num: BlockNumber) -> RethResult> { Header::decompress( self.cursor() - .row_by_number_with_cols::<0b01, 2>((num - self.jar_start_block) as usize)? + .row_by_number_with_cols::<0b01, 2>( + (num - self.jar.user_header().block_start()) as usize, + )? .ok_or(ProviderError::HeaderNotFound(num.into()))?[0], ) .map(Some) @@ -146,7 +144,9 @@ impl<'a> TransactionsProvider for SnapshotProvider<'a> { fn transaction_by_id(&self, num: TxNumber) -> RethResult> { TransactionSignedNoHash::decompress( self.cursor() - .row_by_number_with_cols::<0b1, 1>((num - self.jar_start_transaction) as usize)? + .row_by_number_with_cols::<0b1, 1>( + (num - self.jar.user_header().tx_start()) as usize, + )? .ok_or(ProviderError::TransactionNotFound(num.into()))?[0], ) .map(Into::into) @@ -285,7 +285,7 @@ mod test { .unwrap() .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())); - create_snapshot_T1_T2::( + create_snapshot_T1_T2::( &tx, range, None, @@ -299,11 +299,10 @@ mod test { // Use providers to query Header data and compare if it matches { - let jar = NippyJar::load_without_header(snap_file.path()).unwrap(); + let jar = NippyJar::load(snap_file.path()).unwrap(); let db_provider = factory.provider().unwrap(); - let snap_provider = - SnapshotProvider { jar: &jar, jar_start_block: 0, jar_start_transaction: 0 }; + let snap_provider = SnapshotProvider { jar: &jar }; assert!(!headers.is_empty()); From 9ae59390f4b66c51efa9a6313ed39abd2690eb73 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 13 Oct 2023 00:18:03 +0000 Subject: [PATCH 12/44] add wip transaction_by_hash --- .../storage/provider/src/providers/snapshot.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index f97ebe83c46..6fe4a2de67a 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -161,8 +161,21 @@ impl<'a> TransactionsProvider for SnapshotProvider<'a> { todo!() } - fn transaction_by_hash(&self, _hash: TxHash) -> RethResult> { - todo!() + fn transaction_by_hash(&self, hash: TxHash) -> RethResult> { + // WIP + let mut cursor = self.cursor(); + + let tx = TransactionSignedNoHash::decompress( + cursor.row_by_key_with_cols::<0b1, 1>(&hash.0).unwrap().unwrap()[0], + ) + .unwrap().with_hash(); + + if tx.hash() == hash { + return Ok(Some(tx)) + } else { + // check next snapshot + } + Ok(None) } fn transaction_by_hash_with_meta( From 96f77fc14c83e7794f7a090ec54b5d85fcada643 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 13 Oct 2023 00:21:08 +0000 Subject: [PATCH 13/44] fix doc test --- crates/storage/db/src/tables/extensions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/db/src/tables/extensions.rs b/crates/storage/db/src/tables/extensions.rs index bbf962d7922..04f3dc04a25 100644 --- a/crates/storage/db/src/tables/extensions.rs +++ b/crates/storage/db/src/tables/extensions.rs @@ -7,7 +7,7 @@ use reth_primitives::{keccak256, TransactionSignedNoHash, TxHash, TxNumber, B256 use crate::{abstraction::cursor::DbCursorRO, transaction::DbTx, Transactions}; impl Transactions { - /// Recovers transaction hashes by walking through [`tables::Transactions`] table and + /// Recovers transaction hashes by walking through [`crate::tables::Transactions`] table and /// calculating them in a parallel manner. Returned unsorted. pub fn recover_hashes<'a, 'b, TX: DbTx<'a>>( tx: &'b TX, From 16c33a7f50de5a609f6b030700f8a60b9c002f35 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 13 Oct 2023 00:21:49 +0000 Subject: [PATCH 14/44] fmt --- crates/storage/provider/src/providers/snapshot.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 6fe4a2de67a..bce8c434b04 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -168,7 +168,8 @@ impl<'a> TransactionsProvider for SnapshotProvider<'a> { let tx = TransactionSignedNoHash::decompress( cursor.row_by_key_with_cols::<0b1, 1>(&hash.0).unwrap().unwrap()[0], ) - .unwrap().with_hash(); + .unwrap() + .with_hash(); if tx.hash() == hash { return Ok(Some(tx)) From d14a48346ebbcd501b3e5014b9cdfefe66e4e3a9 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 13 Oct 2023 00:25:19 +0000 Subject: [PATCH 15/44] add with_hash to walk bench as well --- bin/reth/src/db/snapshots/transactions.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index bd3de4f0aca..bf410585248 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -95,7 +95,8 @@ impl Command { (num - tx_range.start()) as usize, )? .ok_or(ProviderError::TransactionNotFound((*num).into()))?[0], - )?; + )? + .with_hash(); // TODO: replace with below when eventually SnapshotProvider re-uses cursor // provider.transaction_by_id(num as // u64)?.ok_or(ProviderError::TransactionNotFound((*num).into()))?; From 055a818470bf47c6df1b22e8e9a05700ea946601 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 13 Oct 2023 00:29:09 +0000 Subject: [PATCH 16/44] fix recover_hashes range --- crates/snapshot/src/segments/transactions.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs index e8cdafcffff..d5826298d18 100644 --- a/crates/snapshot/src/segments/transactions.rs +++ b/crates/snapshot/src/segments/transactions.rs @@ -69,9 +69,12 @@ impl Segment for Transactions { let mut hashes = None; if self.filters.has_filters() { hashes = Some( - tables::Transactions::recover_hashes(provider.tx_ref(), 0..10)? - .into_iter() - .map(|(tx, _)| Ok(tx)), + tables::Transactions::recover_hashes( + provider.tx_ref(), + *tx_range.start()..(*tx_range.end() + 1), + )? + .into_iter() + .map(|(tx, _)| Ok(tx)), ); } From a66b6034c12f85d4f3e804f53fe3c32c18aff494 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 16 Oct 2023 12:42:56 +0000 Subject: [PATCH 17/44] fix doc test --- crates/primitives/src/snapshot/segment.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/primitives/src/snapshot/segment.rs b/crates/primitives/src/snapshot/segment.rs index d8ae9f2677a..cbd9ad432a6 100644 --- a/crates/primitives/src/snapshot/segment.rs +++ b/crates/primitives/src/snapshot/segment.rs @@ -14,7 +14,7 @@ pub enum SnapshotSegment { Receipts, } -/// A [`NippyJar`] segment header that contains information common to all segments. +/// A segment header that contains information common to all segments. Used for storage. #[derive(Debug, Serialize, Deserialize)] pub struct SegmentHeader { block_range: RangeInclusive, From f327d9169be28fe8de1ad66d66c3b61c93326275 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 16 Oct 2023 13:17:04 +0000 Subject: [PATCH 18/44] fix test_snap --- crates/storage/provider/src/providers/snapshot.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index f54b22fb503..23d94ac42f7 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -243,6 +243,7 @@ mod test { // Ranges let row_count = 100u64; let range = 0..=(row_count - 1); + let segment_header = SegmentHeader::new(range.clone(), range.clone()); // Data sources let db = create_test_rw_db(); @@ -277,7 +278,7 @@ mod test { let with_compression = true; let with_filter = true; - let mut nippy_jar = NippyJar::new_without_header(2, snap_file.path()); + let mut nippy_jar = NippyJar::new(2, snap_file.path(), segment_header); if with_compression { nippy_jar = nippy_jar.with_zstd(false, 0); @@ -300,7 +301,7 @@ mod test { .unwrap() .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())); - create_snapshot_T1_T2::( + create_snapshot_T1_T2::( &tx, range, None, From 5582db37af86fb9cda9fae46d1b2a65828e869fb Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 16 Oct 2023 13:20:14 +0000 Subject: [PATCH 19/44] share dataset_for_compression implementation --- crates/snapshot/src/segments/headers.rs | 19 ++-------------- crates/snapshot/src/segments/mod.rs | 19 +++++++++++++++- crates/snapshot/src/segments/transactions.rs | 24 +++----------------- 3 files changed, 23 insertions(+), 39 deletions(-) diff --git a/crates/snapshot/src/segments/headers.rs b/crates/snapshot/src/segments/headers.rs index ff5b275b62f..4cc3ced2047 100644 --- a/crates/snapshot/src/segments/headers.rs +++ b/crates/snapshot/src/segments/headers.rs @@ -1,7 +1,7 @@ use crate::segments::{prepare_jar, Segment, SegmentHeader}; use reth_db::{ - cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1_T2_T3, table::Table, - tables, transaction::DbTx, RawKey, RawTable, + cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1_T2_T3, tables, + transaction::DbTx, RawKey, RawTable, }; use reth_interfaces::RethResult; use reth_primitives::{ @@ -23,21 +23,6 @@ impl Headers { pub fn new(compression: Compression, filters: Filters) -> Self { Self { compression, filters } } - - // Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). - fn dataset_for_compression>( - &self, - provider: &DatabaseProviderRO<'_, DB>, - range: &RangeInclusive, - range_len: usize, - ) -> RethResult>> { - let mut cursor = provider.tx_ref().cursor_read::>()?; - Ok(cursor - .walk_back(Some(RawKey::from(*range.end())))? - .take(range_len.min(1000)) - .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) - .collect::>()) - } } impl Segment for Headers { diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 677ad15b790..4ab3ad74855 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -6,7 +6,9 @@ pub use transactions::Transactions; mod headers; pub use headers::Headers; -use reth_db::database::Database; +use reth_db::{ + cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, RawKey, RawTable, +}; use reth_interfaces::RethResult; use reth_nippy_jar::NippyJar; use reth_primitives::{ @@ -26,6 +28,21 @@ pub trait Segment { provider: &DatabaseProviderRO<'_, DB>, range: RangeInclusive, ) -> RethResult<()>; + + /// Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). + fn dataset_for_compression>( + &self, + provider: &DatabaseProviderRO<'_, DB>, + range: &RangeInclusive, + range_len: usize, + ) -> RethResult>> { + let mut cursor = provider.tx_ref().cursor_read::>()?; + Ok(cursor + .walk_back(Some(RawKey::from(*range.end())))? + .take(range_len.min(1000)) + .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) + .collect::>()) + } } /// Returns a [`NippyJar`] according to the desired configuration. diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs index d5826298d18..889e8ece8eb 100644 --- a/crates/snapshot/src/segments/transactions.rs +++ b/crates/snapshot/src/segments/transactions.rs @@ -1,8 +1,5 @@ use crate::segments::{prepare_jar, Segment}; -use reth_db::{ - cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1, table::Table, tables, - transaction::DbTx, RawKey, RawTable, -}; +use reth_db::{database::Database, snapshot::create_snapshot_T1, tables}; use reth_interfaces::RethResult; use reth_primitives::{ snapshot::{Compression, Filters, SegmentHeader}, @@ -23,21 +20,6 @@ impl Transactions { pub fn new(compression: Compression, filters: Filters) -> Self { Self { compression, filters } } - - // Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). - fn dataset_for_compression<'tx, T: Table>( - &self, - tx: &impl DbTx<'tx>, - range: &RangeInclusive, - range_len: usize, - ) -> RethResult>> { - let mut cursor = tx.cursor_read::>()?; - Ok(cursor - .walk_back(Some(RawKey::from(*range.end())))? - .take(range_len.min(1000)) - .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) - .collect::>()) - } } impl Segment for Transactions { @@ -57,8 +39,8 @@ impl Segment for Transactions { block_range, tx_range_len, || { - Ok([self.dataset_for_compression::( - provider.tx_ref(), + Ok([self.dataset_for_compression::( + provider, &tx_range, tx_range_len, )?]) From 13136edfdbf5a59fb631d7e57c3ad143d8232972 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 16 Oct 2023 13:20:27 +0000 Subject: [PATCH 20/44] add snapshot segment --- crates/snapshot/src/segments/mod.rs | 3 + crates/snapshot/src/segments/receipts.rs | 76 ++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 crates/snapshot/src/segments/receipts.rs diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 4ab3ad74855..8d649115d09 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -6,6 +6,9 @@ pub use transactions::Transactions; mod headers; pub use headers::Headers; +mod receipts; +pub use receipts::Receipts; + use reth_db::{ cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, RawKey, RawTable, }; diff --git a/crates/snapshot/src/segments/receipts.rs b/crates/snapshot/src/segments/receipts.rs new file mode 100644 index 00000000000..45e9edfaac9 --- /dev/null +++ b/crates/snapshot/src/segments/receipts.rs @@ -0,0 +1,76 @@ +use crate::segments::{prepare_jar, Segment}; +use reth_db::{database::Database, snapshot::create_snapshot_T1, tables}; +use reth_interfaces::RethResult; +use reth_primitives::{ + snapshot::{Compression, Filters, SegmentHeader}, + BlockNumber, SnapshotSegment, TxNumber, +}; +use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; +use std::ops::RangeInclusive; + +/// Snapshot segment responsible for [SnapshotSegment::Receipts] part of data. +#[derive(Debug)] +pub struct Receipts { + compression: Compression, + filters: Filters, +} + +impl Receipts { + /// Creates new instance of [Receipts] snapshot segment. + pub fn new(compression: Compression, filters: Filters) -> Self { + Self { compression, filters } + } +} + +impl Segment for Receipts { + fn snapshot( + &self, + provider: &DatabaseProviderRO<'_, DB>, + block_range: RangeInclusive, + ) -> RethResult<()> { + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range_len = tx_range.clone().count(); + + let mut jar = prepare_jar::( + provider, + SnapshotSegment::Receipts, + self.filters, + self.compression, + block_range, + tx_range_len, + || { + Ok([self.dataset_for_compression::( + provider, + &tx_range, + tx_range_len, + )?]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut hashes = None; + if self.filters.has_filters() { + hashes = Some( + tables::Transactions::recover_hashes( + provider.tx_ref(), + *tx_range.start()..(*tx_range.end() + 1), + )? + .into_iter() + .map(|(tx, _)| Ok(tx)), + ); + } + + create_snapshot_T1::( + provider.tx_ref(), + tx_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + tx_range_len, + &mut jar, + )?; + + Ok(()) + } +} From d4c8b33919c0bce6bda6f96762d85da1cf373f65 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 16 Oct 2023 13:38:50 +0000 Subject: [PATCH 21/44] add receipts command --- bin/reth/src/db/snapshots/mod.rs | 18 ++- bin/reth/src/db/snapshots/receipts.rs | 176 ++++++++++++++++++++++++++ crates/interfaces/src/provider.rs | 3 + 3 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 bin/reth/src/db/snapshots/receipts.rs diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index c335430c50d..1e663321e19 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -15,6 +15,7 @@ use std::{path::Path, sync::Arc}; mod bench; mod headers; +mod receipts; mod transactions; #[derive(Parser, Debug)] @@ -88,7 +89,13 @@ impl Command { InclusionFilter::Cuckoo, *phf, )?, - SnapshotSegment::Receipts => todo!(), + SnapshotSegment::Receipts => self + .generate_receipts_snapshot::( + &provider, + *compression, + InclusionFilter::Cuckoo, + *phf, + )?, } } } @@ -113,7 +120,14 @@ impl Command { InclusionFilter::Cuckoo, *phf, )?, - SnapshotSegment::Receipts => todo!(), + SnapshotSegment::Receipts => self.bench_receipts_snapshot( + db_path, + log_level, + chain.clone(), + *compression, + InclusionFilter::Cuckoo, + *phf, + )?, } } } diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs new file mode 100644 index 00000000000..4d55438406b --- /dev/null +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -0,0 +1,176 @@ +use super::{ + bench::{bench, BenchKind}, + Command, Compression, PerfectHashingFunction, +}; +use rand::{seq::SliceRandom, Rng}; +use reth_db::{database::Database, open_db_read_only, table::Decompress}; +use reth_interfaces::db::LogLevel; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + snapshot::{Filters, InclusionFilter}, + ChainSpec, Receipt, SnapshotSegment, +}; +use reth_provider::{ + DatabaseProviderRO, ProviderError, ProviderFactory, ReceiptProvider, TransactionsProvider, + TransactionsProviderExt, +}; +use reth_snapshot::{ + segments, + segments::{get_snapshot_segment_file_name, Segment}, +}; +use std::{path::Path, sync::Arc}; + +impl Command { + pub(crate) fn generate_receipts_snapshot( + &self, + provider: &DatabaseProviderRO<'_, DB>, + compression: Compression, + inclusion_filter: InclusionFilter, + phf: PerfectHashingFunction, + ) -> eyre::Result<()> { + let segment = segments::Receipts::new( + compression, + if self.with_filters { + Filters::WithFilters(inclusion_filter, phf) + } else { + Filters::WithoutFilters + }, + ); + segment.snapshot::(provider, self.from..=(self.from + self.block_interval - 1))?; + + Ok(()) + } + + pub(crate) fn bench_receipts_snapshot( + &self, + db_path: &Path, + log_level: Option, + chain: Arc, + compression: Compression, + inclusion_filter: InclusionFilter, + phf: PerfectHashingFunction, + ) -> eyre::Result<()> { + let filters = if self.with_filters { + Filters::WithFilters(inclusion_filter, phf) + } else { + Filters::WithoutFilters + }; + + let block_range = self.from..=(self.from + self.block_interval - 1); + + let mut rng = rand::thread_rng(); + let mut dictionaries = None; + let mut jar = NippyJar::load(&get_snapshot_segment_file_name( + SnapshotSegment::Headers, + filters, + compression, + &block_range, + ))?; + + let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) + .provider()? + .transaction_range_by_block_range(block_range)?; + + let mut row_indexes = tx_range.clone().collect::>(); + + let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; + let mut cursor = if !decompressors.is_empty() { + provider.cursor_with_decompressors(decompressors) + } else { + provider.cursor() + }; + + for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { + bench( + bench_kind, + (open_db_read_only(db_path, log_level)?, chain.clone()), + SnapshotSegment::Receipts, + filters, + compression, + || { + for num in row_indexes.iter() { + Receipt::decompress( + cursor + .row_by_number_with_cols::<0b1, 1>( + (num - tx_range.start()) as usize, + )? + .ok_or(ProviderError::ReceiptNotFound((*num).into()))?[0], + )?; + // TODO: replace with below when eventually SnapshotProvider re-uses cursor + // provider.receipt(num as + // u64)?.ok_or(ProviderError::ReceiptNotFound((*num).into()))?; + } + Ok(()) + }, + |provider| { + for num in row_indexes.iter() { + provider + .receipt(*num)? + .ok_or(ProviderError::ReceiptNotFound((*num).into()))?; + } + Ok(()) + }, + )?; + + // For random walk + row_indexes.shuffle(&mut rng); + } + + // BENCHMARK QUERYING A RANDOM HEADER BY NUMBER + { + let num = row_indexes[rng.gen_range(0..row_indexes.len())]; + bench( + BenchKind::RandomOne, + (open_db_read_only(db_path, log_level)?, chain.clone()), + SnapshotSegment::Receipts, + filters, + compression, + || { + Ok(Receipt::decompress( + cursor + .row_by_number_with_cols::<0b1, 1>((num - tx_range.start()) as usize)? + .ok_or(ProviderError::ReceiptNotFound((num as u64).into()))?[0], + )?) + }, + |provider| { + Ok(provider + .receipt(num as u64)? + .ok_or(ProviderError::ReceiptNotFound((num as u64).into()))?) + }, + )?; + } + + // BENCHMARK QUERYING A RANDOM HEADER BY HASH + { + let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; + let tx_hash = + ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) + .transaction_by_id(num)? + .ok_or(ProviderError::ReceiptNotFound(num.into()))? + .hash(); + + bench( + BenchKind::RandomHash, + (open_db_read_only(db_path, log_level)?, chain.clone()), + SnapshotSegment::Receipts, + filters, + compression, + || { + let receipt = Receipt::decompress( + cursor + .row_by_key_with_cols::<0b1, 1>(tx_hash.as_slice())? + .ok_or(ProviderError::ReceiptNotFound(tx_hash.into()))?[0], + )?; + + Ok(receipt) + }, + |provider| { + Ok(provider + .receipt_by_hash(tx_hash)? + .ok_or(ProviderError::ReceiptNotFound(tx_hash.into()))?) + }, + )?; + } + Ok(()) + } +} diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index 4849473be59..59ed59d8c37 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -43,6 +43,9 @@ pub enum ProviderError { /// Thrown we were unable to find a specific transaction #[error("Transaction does not exist {0:?}")] TransactionNotFound(BlockHashOrNumber), + /// Thrown we were unable to find a specific receipt + #[error("Receipt does not exist {0:?}")] + ReceiptNotFound(BlockHashOrNumber), /// Thrown we were unable to find the best block #[error("Best block does not exist")] BestBlockNotFound, From 648d55ef4d05c3078fcb0c36b8798543b7e656ea Mon Sep 17 00:00:00 2001 From: joshieDo Date: Sat, 21 Oct 2023 21:23:00 +0000 Subject: [PATCH 22/44] move dictionaries inside NippyJar --- bin/reth/src/db/snapshots/headers.rs | 5 +- bin/reth/src/db/snapshots/mod.rs | 15 +- bin/reth/src/db/snapshots/receipts.rs | 5 +- bin/reth/src/db/snapshots/transactions.rs | 5 +- .../storage/nippy-jar/src/compression/zstd.rs | 178 ++++++++++++++---- crates/storage/nippy-jar/src/lib.rs | 60 +++--- 6 files changed, 178 insertions(+), 90 deletions(-) diff --git a/bin/reth/src/db/snapshots/headers.rs b/bin/reth/src/db/snapshots/headers.rs index 7a9e813566c..9aa9232ccb2 100644 --- a/bin/reth/src/db/snapshots/headers.rs +++ b/bin/reth/src/db/snapshots/headers.rs @@ -54,15 +54,14 @@ impl Command { let mut row_indexes = range.clone().collect::>(); let mut rng = rand::thread_rng(); - let mut dictionaries = None; - let mut jar = NippyJar::load(&get_snapshot_segment_file_name( + let jar = NippyJar::load(&get_snapshot_segment_file_name( SnapshotSegment::Headers, filters, compression, &range, ))?; - let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; + let (provider, decompressors) = self.prepare_jar_provider(&jar)?; let mut cursor = if !decompressors.is_empty() { provider.cursor_with_decompressors(decompressors) } else { diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index 1e663321e19..2bd87032e2d 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -2,10 +2,7 @@ use clap::Parser; use itertools::Itertools; use reth_db::{open_db_read_only, DatabaseEnvRO}; use reth_interfaces::db::LogLevel; -use reth_nippy_jar::{ - compression::{DecoderDictionary, Decompressor}, - NippyJar, -}; +use reth_nippy_jar::{compression::Decompressor, NippyJar}; use reth_primitives::{ snapshot::{Compression, InclusionFilter, PerfectHashingFunction, SegmentHeader}, BlockNumber, ChainSpec, SnapshotSegment, @@ -139,17 +136,15 @@ impl Command { /// [`DecoderDictionary`] and [`Decompressor`] if necessary. fn prepare_jar_provider<'a>( &self, - jar: &'a mut NippyJar, - dictionaries: &'a mut Option>>, + jar: &'a NippyJar, ) -> eyre::Result<(SnapshotProvider<'a>, Vec>)> { let mut decompressors: Vec> = vec![]; - if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor_mut() { + if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor() { if zstd.use_dict { - *dictionaries = zstd.generate_decompress_dictionaries(); - decompressors = zstd.generate_decompressors(dictionaries.as_ref().expect("qed"))?; + decompressors = zstd.decompressors()?; } } - Ok((SnapshotProvider { jar: &*jar }, decompressors)) + Ok((SnapshotProvider { jar }, decompressors)) } } diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs index 4d55438406b..4e20bae35ce 100644 --- a/bin/reth/src/db/snapshots/receipts.rs +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -59,8 +59,7 @@ impl Command { let block_range = self.from..=(self.from + self.block_interval - 1); let mut rng = rand::thread_rng(); - let mut dictionaries = None; - let mut jar = NippyJar::load(&get_snapshot_segment_file_name( + let jar = NippyJar::load(&get_snapshot_segment_file_name( SnapshotSegment::Headers, filters, compression, @@ -73,7 +72,7 @@ impl Command { let mut row_indexes = tx_range.clone().collect::>(); - let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; + let (provider, decompressors) = self.prepare_jar_provider(&jar)?; let mut cursor = if !decompressors.is_empty() { provider.cursor_with_decompressors(decompressors) } else { diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index bf410585248..6e2f935cd91 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -59,8 +59,7 @@ impl Command { let block_range = self.from..=(self.from + self.block_interval - 1); let mut rng = rand::thread_rng(); - let mut dictionaries = None; - let mut jar = NippyJar::load(&get_snapshot_segment_file_name( + let jar = NippyJar::load(&get_snapshot_segment_file_name( SnapshotSegment::Headers, filters, compression, @@ -73,7 +72,7 @@ impl Command { let mut row_indexes = tx_range.clone().collect::>(); - let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?; + let (provider, decompressors) = self.prepare_jar_provider(&jar)?; let mut cursor = if !decompressors.is_empty() { provider.cursor_with_decompressors(decompressors) } else { diff --git a/crates/storage/nippy-jar/src/compression/zstd.rs b/crates/storage/nippy-jar/src/compression/zstd.rs index df1182f2834..4560f2a402c 100644 --- a/crates/storage/nippy-jar/src/compression/zstd.rs +++ b/crates/storage/nippy-jar/src/compression/zstd.rs @@ -1,8 +1,9 @@ use crate::{compression::Compression, NippyJarError}; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::{ fs::File, io::{Read, Write}, + sync::Arc, }; use tracing::*; use zstd::bulk::Compressor; @@ -17,7 +18,8 @@ pub enum ZstdState { Ready, } -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug, Serialize, Deserialize)] /// Zstd compression structure. Supports a compression dictionary per column. pub struct Zstd { /// State. Should be ready before compressing. @@ -29,7 +31,11 @@ pub struct Zstd { /// Max size of a dictionary pub(crate) max_dict_size: usize, /// List of column dictionaries. - pub(crate) raw_dictionaries: Option>, + #[serde( + serialize_with = "serialize_dictionaries", + deserialize_with = "deserialize_dictionaries" + )] + pub(crate) dictionaries: Option>>, /// Number of columns to compress. columns: usize, } @@ -42,7 +48,7 @@ impl Zstd { level: 0, use_dict, max_dict_size, - raw_dictionaries: None, + dictionaries: None, columns, } } @@ -52,31 +58,18 @@ impl Zstd { self } - /// If using dictionaries, creates a list of [`DecoderDictionary`]. - /// - /// Consumes `self.raw_dictionaries` in the process. - pub fn generate_decompress_dictionaries<'a>(&mut self) -> Option>> { - self.raw_dictionaries.take().map(|dicts| { - // TODO Can we use ::new instead, and avoid consuming? - dicts.iter().map(|dict| DecoderDictionary::copy(dict)).collect() - }) - } - - /// Creates a list of [`Decompressor`] using the given dictionaries. - pub fn generate_decompressors<'a>( - &self, - dictionaries: &'a [DecoderDictionary<'a>], - ) -> Result>, NippyJarError> { - debug_assert!(dictionaries.len() == self.columns); + /// Creates a list of [`Decompressor`] if using dictionaries. + pub fn decompressors(&self) -> Result>, NippyJarError> { + if let Some(dictionaries) = &self.dictionaries { + debug_assert!(dictionaries.len() == self.columns); + return dictionaries.decompressors() + } - Ok(dictionaries - .iter() - .map(Decompressor::with_prepared_dictionary) - .collect::, _>>()?) + Ok(vec![]) } /// If using dictionaries, creates a list of [`Compressor`]. - pub fn generate_compressors<'a>(&self) -> Result>>, NippyJarError> { + pub fn compressors(&self) -> Result>>, NippyJarError> { match self.state { ZstdState::PendingDictionary => Err(NippyJarError::CompressorNotReady), ZstdState::Ready => { @@ -84,18 +77,11 @@ impl Zstd { return Ok(None) } - let mut compressors = None; - if let Some(dictionaries) = &self.raw_dictionaries { + if let Some(dictionaries) = &self.dictionaries { debug!(target: "nippy-jar", count=?dictionaries.len(), "Generating ZSTD compressor dictionaries."); - - let mut cmp = Vec::with_capacity(dictionaries.len()); - - for dict in dictionaries { - cmp.push(Compressor::with_dictionary(0, dict)?); - } - compressors = Some(cmp) + return Ok(Some(dictionaries.compressors()?)) } - Ok(compressors) + Ok(None) } } } @@ -243,9 +229,129 @@ impl Compression for Zstd { debug_assert_eq!(dictionaries.len(), self.columns); - self.raw_dictionaries = Some(dictionaries); + self.dictionaries = Some(Arc::new(ZstdDictionaries::new(dictionaries))); self.state = ZstdState::Ready; Ok(()) } } + +fn serialize_dictionaries( + dictionaries: &Option>>, + serializer: S, +) -> Result +where + S: Serializer, +{ + match dictionaries { + Some(dicts) => serializer.serialize_some(dicts.as_ref()), + None => serializer.serialize_none(), + } +} + +fn deserialize_dictionaries<'de, D>( + deserializer: D, +) -> Result>>, D::Error> +where + D: Deserializer<'de>, +{ + let dictionaries: Option> = Option::deserialize(deserializer)?; + Ok(dictionaries.map(|dicts| Arc::new(ZstdDictionaries::load(dicts)))) +} + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Serialize, Deserialize)] +pub struct ZstdDictionaries<'a>(Vec>); + +impl<'a> std::fmt::Debug for ZstdDictionaries<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ZstdDictionaries").field("num", &self.len()).finish_non_exhaustive() + } +} + +impl<'a> ZstdDictionaries<'a> { + pub fn new(raw: Vec) -> Self { + Self(raw.into_iter().map(ZstdDictionary::Raw).collect()) + } + + pub fn load(raw: Vec) -> Self { + Self( + raw.into_iter() + .map(|dict| ZstdDictionary::Loaded(DecoderDictionary::copy(&dict))) + .collect(), + ) + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn decompressors(&self) -> Result>, NippyJarError> { + Ok(self + .0 + .iter() + .map(|a| Decompressor::with_prepared_dictionary(a.loaded())) + .collect::, _>>()?) + } + + pub fn compressors(&self) -> Result>, NippyJarError> { + Ok(self + .0 + .iter() + .map(|a| Compressor::with_dictionary(0, a.raw())) + .collect::, _>>()?) + } +} + +pub enum ZstdDictionary<'a> { + Raw(RawDictionary), + Loaded(DecoderDictionary<'a>), +} + +impl<'a> ZstdDictionary<'a> { + fn raw(&self) -> &RawDictionary { + match self { + ZstdDictionary::Raw(dict) => dict, + ZstdDictionary::Loaded(_) => unreachable!(), + } + } + + fn loaded(&self) -> &DecoderDictionary<'_> { + match self { + ZstdDictionary::Raw(_) => unreachable!(), + ZstdDictionary::Loaded(dict) => dict, + } + } +} + +impl<'de, 'a> Deserialize<'de> for ZstdDictionary<'a> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let dict = RawDictionary::deserialize(deserializer)?; + Ok(Self::Loaded(DecoderDictionary::copy(&dict))) + } +} + +impl<'a> Serialize for ZstdDictionary<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + ZstdDictionary::Raw(r) => r.serialize(serializer), + ZstdDictionary::Loaded(_) => unreachable!(), + } + } +} + +#[cfg(test)] +impl<'a> PartialEq for ZstdDictionary<'a> { + fn eq(&self, other: &Self) -> bool { + if let (Self::Raw(a), Self::Raw(b)) = (self, &other) { + return a == b + } + unimplemented!("`DecoderDictionary` can't be compared. So comparison should be done after decompressing a value."); + } +} diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 7544e3c29e5..428a7fc8547 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -323,7 +323,7 @@ where // implementation let mut maybe_zstd_compressors = None; if let Some(Compressors::Zstd(zstd)) = &self.compressor { - maybe_zstd_compressors = zstd.generate_compressors()?; + maybe_zstd_compressors = zstd.compressors()?; } // Temporary buffer to avoid multiple reallocations if compressing to a buffer (eg. zstd w/ @@ -394,6 +394,9 @@ where column_iterators = iterators.into_iter(); } + // drops immutable borrow + drop(maybe_zstd_compressors); + // Write offsets and offset index to file self.freeze_offsets(offsets)?; @@ -622,7 +625,7 @@ mod tests { assert!(nippy.compressor().is_some()); if let Some(Compressors::Zstd(zstd)) = &mut nippy.compressor_mut() { - assert!(matches!(zstd.generate_compressors(), Err(NippyJarError::CompressorNotReady))); + assert!(matches!(zstd.compressors(), Err(NippyJarError::CompressorNotReady))); // Make sure the number of column iterators match the initial set up ones. assert!(matches!( @@ -642,27 +645,26 @@ mod tests { if let Some(Compressors::Zstd(zstd)) = &nippy.compressor() { assert!(matches!( - (&zstd.state, zstd.raw_dictionaries.as_ref().map(|dict| dict.len())), + (&zstd.state, zstd.dictionaries.as_ref().map(|dict| dict.len())), (compression::ZstdState::Ready, Some(columns)) if columns == num_columns )); } nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); - let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); - assert_eq!(nippy, loaded_nippy); - - let mut dicts = vec![]; - if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor_mut() { - dicts = zstd.generate_decompress_dictionaries().unwrap() - } + let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); + assert_eq!(nippy.version, loaded_nippy.version); + assert_eq!(nippy.columns, loaded_nippy.columns); + assert_eq!(nippy.filter, loaded_nippy.filter); + assert_eq!(nippy.phf, loaded_nippy.phf); + assert_eq!(nippy.offsets_index, loaded_nippy.offsets_index); + assert_eq!(nippy.offsets, loaded_nippy.offsets); + assert_eq!(nippy.max_row_size, loaded_nippy.max_row_size); + assert_eq!(nippy.path, loaded_nippy.path); if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new( - &loaded_nippy, - Some(zstd.generate_decompressors(&dicts).unwrap()), - ) - .unwrap(); + let mut cursor = + NippyJarCursor::new(&loaded_nippy, Some(zstd.decompressors().unwrap())).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -782,23 +784,17 @@ mod tests { // Read file { - let mut loaded_nippy = NippyJar::::load(file_path.path()).unwrap(); + let loaded_nippy = NippyJar::::load(file_path.path()).unwrap(); assert!(loaded_nippy.compressor().is_some()); assert!(loaded_nippy.filter.is_some()); assert!(loaded_nippy.phf.is_some()); assert_eq!(loaded_nippy.user_header().block_start, block_start); - let mut dicts = vec![]; - if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor_mut() { - dicts = zstd.generate_decompress_dictionaries().unwrap() - } if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new( - &loaded_nippy, - Some(zstd.generate_decompressors(&dicts).unwrap()), - ) - .unwrap(); + let mut cursor = + NippyJarCursor::new(&loaded_nippy, Some(zstd.decompressors().unwrap())) + .unwrap(); // Iterate over compressed values and compare let mut row_num = 0usize; @@ -860,18 +856,12 @@ mod tests { // Read file { - let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); + let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); - let mut dicts = vec![]; - if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor_mut() { - dicts = zstd.generate_decompress_dictionaries().unwrap() - } if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new( - &loaded_nippy, - Some(zstd.generate_decompressors(&dicts).unwrap()), - ) - .unwrap(); + let mut cursor = + NippyJarCursor::new(&loaded_nippy, Some(zstd.decompressors().unwrap())) + .unwrap(); // Shuffled for chaos. let mut data = col1.iter().zip(col2.iter()).enumerate().collect::>(); From bfde9556c3cba1fdcca147c8ca5eddae2194b513 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 23 Oct 2023 13:28:42 +0000 Subject: [PATCH 23/44] introduce snapshotjarprovider and dashmap --- Cargo.lock | 1 + bin/reth/src/db/snapshots/headers.rs | 26 +- bin/reth/src/db/snapshots/mod.rs | 21 +- bin/reth/src/db/snapshots/receipts.rs | 34 +-- bin/reth/src/db/snapshots/transactions.rs | 33 +-- crates/primitives/src/snapshot/mod.rs | 3 + crates/primitives/src/snapshot/segment.rs | 70 +++++- crates/snapshot/src/segments/mod.rs | 44 +--- .../storage/nippy-jar/src/compression/zstd.rs | 6 +- crates/storage/nippy-jar/src/cursor.rs | 62 ++--- crates/storage/nippy-jar/src/lib.rs | 22 +- crates/storage/provider/Cargo.toml | 1 + crates/storage/provider/src/providers/mod.rs | 2 +- .../provider/src/providers/snapshot.rs | 234 +++++++++++++++--- 14 files changed, 356 insertions(+), 203 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6fb720a3a56..fd43bc76fc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6098,6 +6098,7 @@ dependencies = [ "alloy-rlp", "assert_matches", "auto_impl", + "dashmap", "itertools 0.11.0", "parking_lot 0.12.1", "pin-project", diff --git a/bin/reth/src/db/snapshots/headers.rs b/bin/reth/src/db/snapshots/headers.rs index 9aa9232ccb2..48661b218a8 100644 --- a/bin/reth/src/db/snapshots/headers.rs +++ b/bin/reth/src/db/snapshots/headers.rs @@ -5,13 +5,14 @@ use super::{ use rand::{seq::SliceRandom, Rng}; use reth_db::{database::Database, open_db_read_only, table::Decompress}; use reth_interfaces::db::LogLevel; -use reth_nippy_jar::NippyJar; use reth_primitives::{ snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, ChainSpec, Header, SnapshotSegment, }; -use reth_provider::{DatabaseProviderRO, HeaderProvider, ProviderError, ProviderFactory}; -use reth_snapshot::segments::{get_snapshot_segment_file_name, Headers, Segment}; +use reth_provider::{ + providers::SnapshotProvider, DatabaseProviderRO, HeaderProvider, ProviderError, ProviderFactory, +}; +use reth_snapshot::segments::{Headers, Segment}; use std::{path::Path, sync::Arc}; impl Command { @@ -54,19 +55,12 @@ impl Command { let mut row_indexes = range.clone().collect::>(); let mut rng = rand::thread_rng(); - let jar = NippyJar::load(&get_snapshot_segment_file_name( - SnapshotSegment::Headers, - filters, - compression, - &range, - ))?; - - let (provider, decompressors) = self.prepare_jar_provider(&jar)?; - let mut cursor = if !decompressors.is_empty() { - provider.cursor_with_decompressors(decompressors) - } else { - provider.cursor() - }; + let path = + SnapshotSegment::Headers.filename_with_configuration(filters, compression, &range); + let provider = SnapshotProvider::default(); + let jar_provider = + provider.get_segment_provider(SnapshotSegment::Headers, self.from, Some(path))?; + let mut cursor = jar_provider.cursor()?; for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index 2bd87032e2d..efce4878393 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -2,12 +2,11 @@ use clap::Parser; use itertools::Itertools; use reth_db::{open_db_read_only, DatabaseEnvRO}; use reth_interfaces::db::LogLevel; -use reth_nippy_jar::{compression::Decompressor, NippyJar}; use reth_primitives::{ - snapshot::{Compression, InclusionFilter, PerfectHashingFunction, SegmentHeader}, + snapshot::{Compression, InclusionFilter, PerfectHashingFunction}, BlockNumber, ChainSpec, SnapshotSegment, }; -use reth_provider::{providers::SnapshotProvider, ProviderFactory}; +use reth_provider::ProviderFactory; use std::{path::Path, sync::Arc}; mod bench; @@ -131,20 +130,4 @@ impl Command { Ok(()) } - - /// Returns a [`SnapshotProvider`] of the provided [`NippyJar`], alongside a list of - /// [`DecoderDictionary`] and [`Decompressor`] if necessary. - fn prepare_jar_provider<'a>( - &self, - jar: &'a NippyJar, - ) -> eyre::Result<(SnapshotProvider<'a>, Vec>)> { - let mut decompressors: Vec> = vec![]; - if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor() { - if zstd.use_dict { - decompressors = zstd.decompressors()?; - } - } - - Ok((SnapshotProvider { jar }, decompressors)) - } } diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs index 4e20bae35ce..a13cdc607d1 100644 --- a/bin/reth/src/db/snapshots/receipts.rs +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -5,19 +5,15 @@ use super::{ use rand::{seq::SliceRandom, Rng}; use reth_db::{database::Database, open_db_read_only, table::Decompress}; use reth_interfaces::db::LogLevel; -use reth_nippy_jar::NippyJar; use reth_primitives::{ snapshot::{Filters, InclusionFilter}, ChainSpec, Receipt, SnapshotSegment, }; use reth_provider::{ - DatabaseProviderRO, ProviderError, ProviderFactory, ReceiptProvider, TransactionsProvider, - TransactionsProviderExt, -}; -use reth_snapshot::{ - segments, - segments::{get_snapshot_segment_file_name, Segment}, + providers::SnapshotProvider, DatabaseProviderRO, ProviderError, ProviderFactory, + ReceiptProvider, TransactionsProvider, TransactionsProviderExt, }; +use reth_snapshot::{segments, segments::Segment}; use std::{path::Path, sync::Arc}; impl Command { @@ -59,25 +55,21 @@ impl Command { let block_range = self.from..=(self.from + self.block_interval - 1); let mut rng = rand::thread_rng(); - let jar = NippyJar::load(&get_snapshot_segment_file_name( - SnapshotSegment::Headers, - filters, - compression, - &block_range, - ))?; - let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) .provider()? - .transaction_range_by_block_range(block_range)?; + .transaction_range_by_block_range(block_range.clone())?; let mut row_indexes = tx_range.clone().collect::>(); - let (provider, decompressors) = self.prepare_jar_provider(&jar)?; - let mut cursor = if !decompressors.is_empty() { - provider.cursor_with_decompressors(decompressors) - } else { - provider.cursor() - }; + let path = SnapshotSegment::Receipts.filename_with_configuration( + filters, + compression, + &block_range, + ); + let provider = SnapshotProvider::default(); + let jar_provider = + provider.get_segment_provider(SnapshotSegment::Receipts, self.from, Some(path))?; + let mut cursor = jar_provider.cursor()?; for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index 6e2f935cd91..7c8d88c381d 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -5,19 +5,15 @@ use super::{ use rand::{seq::SliceRandom, Rng}; use reth_db::{database::Database, open_db_read_only, table::Decompress}; use reth_interfaces::db::LogLevel; -use reth_nippy_jar::NippyJar; use reth_primitives::{ snapshot::{Filters, InclusionFilter}, ChainSpec, SnapshotSegment, TransactionSignedNoHash, }; use reth_provider::{ - DatabaseProviderRO, ProviderError, ProviderFactory, TransactionsProvider, - TransactionsProviderExt, -}; -use reth_snapshot::{ - segments, - segments::{get_snapshot_segment_file_name, Segment}, + providers::SnapshotProvider, DatabaseProviderRO, ProviderError, ProviderFactory, + TransactionsProvider, TransactionsProviderExt, }; +use reth_snapshot::{segments, segments::Segment}; use std::{path::Path, sync::Arc}; impl Command { @@ -59,25 +55,22 @@ impl Command { let block_range = self.from..=(self.from + self.block_interval - 1); let mut rng = rand::thread_rng(); - let jar = NippyJar::load(&get_snapshot_segment_file_name( - SnapshotSegment::Headers, - filters, - compression, - &block_range, - ))?; let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) .provider()? - .transaction_range_by_block_range(block_range)?; + .transaction_range_by_block_range(block_range.clone())?; let mut row_indexes = tx_range.clone().collect::>(); - let (provider, decompressors) = self.prepare_jar_provider(&jar)?; - let mut cursor = if !decompressors.is_empty() { - provider.cursor_with_decompressors(decompressors) - } else { - provider.cursor() - }; + let path = SnapshotSegment::Transactions.filename_with_configuration( + filters, + compression, + &block_range, + ); + let provider = SnapshotProvider::default(); + let jar_provider = + provider.get_segment_provider(SnapshotSegment::Transactions, self.from, Some(path))?; + let mut cursor = jar_provider.cursor()?; for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( diff --git a/crates/primitives/src/snapshot/mod.rs b/crates/primitives/src/snapshot/mod.rs index a61dedfb3cf..95c3f1f53fb 100644 --- a/crates/primitives/src/snapshot/mod.rs +++ b/crates/primitives/src/snapshot/mod.rs @@ -4,6 +4,9 @@ mod compression; mod filters; mod segment; +/// Default snapshot block count. +pub const SNAPSHOT_BLOCK_NUMBER_CHUNKS: u64 = 500_000; + pub use compression::Compression; pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; pub use segment::{SegmentHeader, SnapshotSegment}; diff --git a/crates/primitives/src/snapshot/segment.rs b/crates/primitives/src/snapshot/segment.rs index cbd9ad432a6..79d98fe0e8b 100644 --- a/crates/primitives/src/snapshot/segment.rs +++ b/crates/primitives/src/snapshot/segment.rs @@ -1,6 +1,8 @@ -use crate::{BlockNumber, TxNumber}; +use crate::{snapshot::PerfectHashingFunction, BlockNumber, TxNumber}; use serde::{Deserialize, Serialize}; -use std::ops::RangeInclusive; +use std::{ops::RangeInclusive, path::PathBuf}; + +use super::{Compression, Filters, InclusionFilter}; #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Deserialize, Serialize)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] @@ -14,8 +16,70 @@ pub enum SnapshotSegment { Receipts, } +impl SnapshotSegment { + /// Returns the default configuration of the segment. + const fn config(&self) -> (Filters, Compression) { + let default_config = ( + Filters::WithFilters(InclusionFilter::Cuckoo, super::PerfectHashingFunction::Fmph), + Compression::Lz4, + ); + + match self { + SnapshotSegment::Headers => default_config, + SnapshotSegment::Transactions => default_config, + SnapshotSegment::Receipts => default_config, + } + } + + /// Returns the default file name for the provided segment and range. + pub fn filename(&self, range: &RangeInclusive) -> PathBuf { + let (filters, compression) = self.config(); + self.filename_with_configuration(filters, compression, range) + } + + /// Returns file name for the provided segment, filters, compression and range. + pub fn filename_with_configuration( + &self, + filters: Filters, + compression: Compression, + range: &RangeInclusive, + ) -> PathBuf { + let segment_name = match self { + SnapshotSegment::Headers => "headers", + SnapshotSegment::Transactions => "transactions", + SnapshotSegment::Receipts => "receipts", + }; + let filters_name = match filters { + Filters::WithFilters(inclusion_filter, phf) => { + let inclusion_filter = match inclusion_filter { + InclusionFilter::Cuckoo => "cuckoo", + }; + let phf = match phf { + PerfectHashingFunction::Fmph => "fmph", + PerfectHashingFunction::GoFmph => "gofmph", + }; + format!("{inclusion_filter}-{phf}") + } + Filters::WithoutFilters => "none".to_string(), + }; + let compression_name = match compression { + Compression::Lz4 => "lz4", + Compression::Zstd => "zstd", + Compression::ZstdWithDictionary => "zstd-dict", + Compression::Uncompressed => "uncompressed", + }; + + format!( + "snapshot_{segment_name}_{}_{}_{filters_name}_{compression_name}", + range.start(), + range.end(), + ) + .into() + } +} + /// A segment header that contains information common to all segments. Used for storage. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)] pub struct SegmentHeader { block_range: RangeInclusive, tx_range: RangeInclusive, diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 8d649115d09..991677a781c 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -19,7 +19,7 @@ use reth_primitives::{ BlockNumber, SnapshotSegment, }; use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::PathBuf}; +use std::ops::RangeInclusive; pub(crate) type Rows = [Vec>; COLUMNS]; @@ -61,7 +61,7 @@ pub(crate) fn prepare_jar( let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; let mut nippy_jar = NippyJar::new( COLUMNS, - &get_snapshot_segment_file_name(segment, filters, compression, &block_range), + &segment.filename_with_configuration(filters, compression, &block_range), SegmentHeader::new(block_range, tx_range), ); @@ -90,43 +90,3 @@ pub(crate) fn prepare_jar( Ok(nippy_jar) } - -/// Returns file name for the provided segment, filters, compression and range. -pub fn get_snapshot_segment_file_name( - segment: SnapshotSegment, - filters: Filters, - compression: Compression, - range: &RangeInclusive, -) -> PathBuf { - let segment_name = match segment { - SnapshotSegment::Headers => "headers", - SnapshotSegment::Transactions => "transactions", - SnapshotSegment::Receipts => "receipts", - }; - let filters_name = match filters { - Filters::WithFilters(inclusion_filter, phf) => { - let inclusion_filter = match inclusion_filter { - InclusionFilter::Cuckoo => "cuckoo", - }; - let phf = match phf { - PerfectHashingFunction::Fmph => "fmph", - PerfectHashingFunction::GoFmph => "gofmph", - }; - format!("{inclusion_filter}-{phf}") - } - Filters::WithoutFilters => "none".to_string(), - }; - let compression_name = match compression { - Compression::Lz4 => "lz4", - Compression::Zstd => "zstd", - Compression::ZstdWithDictionary => "zstd-dict", - Compression::Uncompressed => "uncompressed", - }; - - format!( - "snapshot_{segment_name}_{}_{}_{filters_name}_{compression_name}", - range.start(), - range.end(), - ) - .into() -} diff --git a/crates/storage/nippy-jar/src/compression/zstd.rs b/crates/storage/nippy-jar/src/compression/zstd.rs index 4560f2a402c..4d8a1212631 100644 --- a/crates/storage/nippy-jar/src/compression/zstd.rs +++ b/crates/storage/nippy-jar/src/compression/zstd.rs @@ -261,7 +261,7 @@ where #[cfg_attr(test, derive(PartialEq))] #[derive(Serialize, Deserialize)] -pub struct ZstdDictionaries<'a>(Vec>); +pub struct ZstdDictionaries<'a>(pub Vec>); impl<'a> std::fmt::Debug for ZstdDictionaries<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -309,14 +309,14 @@ pub enum ZstdDictionary<'a> { } impl<'a> ZstdDictionary<'a> { - fn raw(&self) -> &RawDictionary { + pub fn raw(&self) -> &RawDictionary { match self { ZstdDictionary::Raw(dict) => dict, ZstdDictionary::Loaded(_) => unreachable!(), } } - fn loaded(&self) -> &DecoderDictionary<'_> { + pub fn loaded(&self) -> &DecoderDictionary<'_> { match self { ZstdDictionary::Raw(_) => unreachable!(), ZstdDictionary::Loaded(dict) => dict, diff --git a/crates/storage/nippy-jar/src/cursor.rs b/crates/storage/nippy-jar/src/cursor.rs index 19e39fa0cd2..05217d6ffe2 100644 --- a/crates/storage/nippy-jar/src/cursor.rs +++ b/crates/storage/nippy-jar/src/cursor.rs @@ -1,24 +1,23 @@ use crate::{ - compression::{Compression, Zstd}, + compression::{Compression, Compressors, Zstd}, InclusionFilter, NippyJar, NippyJarError, PerfectHashingFunction, RefRow, }; use memmap2::Mmap; use serde::{de::Deserialize, ser::Serialize}; -use std::{fs::File, ops::Range}; +use std::{fs::File, ops::Range, sync::Arc}; use sucds::int_vectors::Access; use zstd::bulk::Decompressor; /// Simple cursor implementation to retrieve data from [`NippyJar`]. +#[derive(Clone)] pub struct NippyJarCursor<'a, H = ()> { /// [`NippyJar`] which holds most of the required configuration to read from the file. jar: &'a NippyJar, - /// Optional dictionary decompressors. - zstd_decompressors: Option>>, /// Data file. #[allow(unused)] - file_handle: File, + file_handle: Arc, /// Data file. - mmap_handle: Mmap, + mmap_handle: Arc, /// Internal buffer to unload data to without reallocating memory on each retrieval. internal_buffer: Vec, /// Cursor row position. @@ -36,24 +35,21 @@ where impl<'a, H> NippyJarCursor<'a, H> where - H: Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug, + H: Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug + 'static, { - pub fn new( - jar: &'a NippyJar, - zstd_decompressors: Option>>, - ) -> Result { + pub fn new(jar: &'a NippyJar) -> Result { let file = File::open(jar.data_path())?; // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle. let mmap = unsafe { Mmap::map(&file)? }; + let max_row_size = jar.max_row_size; Ok(NippyJarCursor { jar, - zstd_decompressors, - file_handle: file, - mmap_handle: mmap, + file_handle: Arc::new(file), + mmap_handle: Arc::new(mmap), // Makes sure that we have enough buffer capacity to decompress any row of data. - internal_buffer: Vec::with_capacity(jar.max_row_size), + internal_buffer: Vec::with_capacity(max_row_size), row: 0, }) } @@ -218,23 +214,27 @@ where value_offset..next_value_offset }; - if let Some(zstd_dict_decompressors) = self.zstd_decompressors.as_mut() { - let from: usize = self.internal_buffer.len(); - if let Some(decompressor) = zstd_dict_decompressors.get_mut(column) { - Zstd::decompress_with_dictionary( - &self.mmap_handle[column_offset_range], - &mut self.internal_buffer, - decompressor, - )?; - } - let to = self.internal_buffer.len(); - - row.push(ValueRange::Internal(from..to)); - } else if let Some(compression) = self.jar.compressor() { - // Uses the chosen default decompressor + if let Some(compression) = self.jar.compressor() { let from = self.internal_buffer.len(); - compression - .decompress_to(&self.mmap_handle[column_offset_range], &mut self.internal_buffer)?; + match compression { + Compressors::Zstd(z) if z.use_dict => { + let mut decompressor = Decompressor::with_prepared_dictionary( + z.dictionaries.as_ref().expect("qed").0[column].loaded(), + )?; + Zstd::decompress_with_dictionary( + &self.mmap_handle[column_offset_range], + &mut self.internal_buffer, + &mut decompressor, + )?; + } + _ => { + // Uses the chosen default decompressor + compression.decompress_to( + &self.mmap_handle[column_offset_range], + &mut self.internal_buffer, + )?; + } + } let to = self.internal_buffer.len(); row.push(ValueRange::Internal(from..to)); diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 428a7fc8547..972bc9f0e52 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -663,8 +663,8 @@ mod tests { assert_eq!(nippy.path, loaded_nippy.path); if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { - let mut cursor = - NippyJarCursor::new(&loaded_nippy, Some(zstd.decompressors().unwrap())).unwrap(); + assert!(zstd.use_dict); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -675,6 +675,8 @@ mod tests { ); row_index += 1; } + } else { + panic!("Expected Zstd compressor") } } @@ -697,7 +699,7 @@ mod tests { assert_eq!(nippy, loaded_nippy); if let Some(Compressors::Lz4(_)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -735,7 +737,7 @@ mod tests { if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { assert!(!zstd.use_dict); - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -791,10 +793,8 @@ mod tests { assert!(loaded_nippy.phf.is_some()); assert_eq!(loaded_nippy.user_header().block_start, block_start); - if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { - let mut cursor = - NippyJarCursor::new(&loaded_nippy, Some(zstd.decompressors().unwrap())) - .unwrap(); + if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() { + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_num = 0usize; @@ -858,10 +858,8 @@ mod tests { { let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); - if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { - let mut cursor = - NippyJarCursor::new(&loaded_nippy, Some(zstd.decompressors().unwrap())) - .unwrap(); + if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() { + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Shuffled for chaos. let mut data = col1.iter().zip(col2.iter()).enumerate().collect::>(); diff --git a/crates/storage/provider/Cargo.toml b/crates/storage/provider/Cargo.toml index 2be95fce1c8..d0df9e7a868 100644 --- a/crates/storage/provider/Cargo.toml +++ b/crates/storage/provider/Cargo.toml @@ -31,6 +31,7 @@ auto_impl = "1.0" itertools.workspace = true pin-project.workspace = true parking_lot.workspace = true +dashmap = { version = "5.5", features = ["inline"] } # test-utils alloy-rlp = { workspace = true, optional = true } diff --git a/crates/storage/provider/src/providers/mod.rs b/crates/storage/provider/src/providers/mod.rs index a865f5a636d..b017605bea3 100644 --- a/crates/storage/provider/src/providers/mod.rs +++ b/crates/storage/provider/src/providers/mod.rs @@ -37,7 +37,7 @@ mod bundle_state_provider; mod chain_info; mod database; mod snapshot; -pub use snapshot::SnapshotProvider; +pub use snapshot::{SnapshotJarProvider, SnapshotProvider}; mod state; use crate::{providers::chain_info::ChainInfoTracker, traits::BlockSource}; pub use bundle_state_provider::BundleStateProvider; diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 6c81965d75a..9229b4f0e8b 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -1,47 +1,210 @@ use crate::{BlockHashReader, BlockNumReader, HeaderProvider, TransactionsProvider}; +use dashmap::DashMap; use reth_db::{ table::{Decompress, Table}, HeaderTD, }; use reth_interfaces::{provider::ProviderError, RethResult}; -use reth_nippy_jar::{compression::Decompressor, NippyJar, NippyJarCursor}; +use reth_nippy_jar::{NippyJar, NippyJarCursor}; use reth_primitives::{ - snapshot::SegmentHeader, Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, - SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, + snapshot::{SegmentHeader, SNAPSHOT_BLOCK_NUMBER_CHUNKS}, + Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, SealedHeader, + SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, }; -use std::ops::RangeBounds; +use std::{ops::RangeBounds, path::PathBuf}; + +/// Alias type for each specific `NippyJar`. +type NippyJarRef<'a> = + dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), NippyJar>; /// SnapshotProvider -/// -/// WIP Rudimentary impl just for tests -/// TODO: should be able to walk through snapshot files/block_ranges -/// TODO: Arc over NippyJars and/or NippyJarCursors (LRU) -#[derive(Debug)] -pub struct SnapshotProvider<'a> { - /// NippyJar - pub jar: &'a NippyJar, +#[derive(Debug, Default)] +pub struct SnapshotProvider { + /// Maintains a map which allows for concurrent access to different `NippyJars`, over different + /// segments and ranges. + pub map: DashMap<(BlockNumber, SnapshotSegment), NippyJar>, +} + +impl SnapshotProvider { + /// Gets the provider of the requested segment and range. + pub fn get_segment_provider( + &self, + segment: SnapshotSegment, + block: BlockNumber, + mut path: Option, + ) -> RethResult> { + // TODO this invalidates custom length snapshots. + let snapshot = block / SNAPSHOT_BLOCK_NUMBER_CHUNKS; + let key = (snapshot, segment); + + if let Some(jar) = self.map.get(&key) { + return Ok(SnapshotJarProvider { jar }) + } + + if let Some(path) = &path { + let jar = NippyJar::load(path)?; + self.map.insert(key, jar); + } else { + path = Some(segment.filename( + &((snapshot * SNAPSHOT_BLOCK_NUMBER_CHUNKS)..= + ((snapshot + 1) * SNAPSHOT_BLOCK_NUMBER_CHUNKS - 1)), + )); + } + + self.get_segment_provider(segment, block, path) + } } -impl<'a> SnapshotProvider<'a> { - /// Creates cursor - pub fn cursor(&self) -> NippyJarCursor<'a, SegmentHeader> { - NippyJarCursor::new(self.jar, None).unwrap() +impl HeaderProvider for SnapshotProvider { + fn header(&self, _block_hash: &BlockHash) -> RethResult> { + todo!() + } + + fn header_by_number(&self, num: BlockNumber) -> RethResult> { + self.get_segment_provider(SnapshotSegment::Headers, num, None)?.header_by_number(num) + } + + fn header_td(&self, _block_hash: &BlockHash) -> RethResult> { + todo!() + } + + fn header_td_by_number(&self, _number: BlockNumber) -> RethResult> { + todo!(); + } + + fn headers_range(&self, _range: impl RangeBounds) -> RethResult> { + todo!(); } - /// Creates cursor with zstd decompressors - pub fn cursor_with_decompressors( + fn sealed_headers_range( &self, - decompressors: Vec>, - ) -> NippyJarCursor<'a, SegmentHeader> { - NippyJarCursor::new(self.jar, Some(decompressors)).unwrap() + _range: impl RangeBounds, + ) -> RethResult> { + todo!(); + } + + fn sealed_header(&self, _number: BlockNumber) -> RethResult> { + todo!(); } } -impl<'a> HeaderProvider for SnapshotProvider<'a> { +impl BlockHashReader for SnapshotProvider { + fn block_hash(&self, _number: u64) -> RethResult> { + todo!() + } + + fn canonical_hashes_range( + &self, + _start: BlockNumber, + _end: BlockNumber, + ) -> RethResult> { + todo!() + } +} + +impl BlockNumReader for SnapshotProvider { + fn chain_info(&self) -> RethResult { + todo!() + } + + fn best_block_number(&self) -> RethResult { + todo!() + } + + fn last_block_number(&self) -> RethResult { + todo!() + } + + fn block_number(&self, _hash: B256) -> RethResult> { + todo!() + } +} + +impl TransactionsProvider for SnapshotProvider { + fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_id(&self, num: TxNumber) -> RethResult> { + // TODO `num` is provided after checking the index + let block_num = num; + self.get_segment_provider(SnapshotSegment::Transactions, block_num, None)?.transaction_by_id(num) + } + + fn transaction_by_id_no_hash( + &self, + _id: TxNumber, + ) -> RethResult> { + todo!() + } + + fn transaction_by_hash(&self, _hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_hash_with_meta( + &self, + _hash: TxHash, + ) -> RethResult> { + todo!() + } + + fn transaction_block(&self, _id: TxNumber) -> RethResult> { + todo!() + } + + fn transactions_by_block( + &self, + _block_id: BlockHashOrNumber, + ) -> RethResult>> { + todo!() + } + + fn transactions_by_block_range( + &self, + _range: impl RangeBounds, + ) -> RethResult>> { + todo!() + } + + fn senders_by_tx_range(&self, _range: impl RangeBounds) -> RethResult> { + todo!() + } + + fn transactions_by_tx_range( + &self, + _range: impl RangeBounds, + ) -> RethResult> { + todo!() + } + + fn transaction_sender(&self, _id: TxNumber) -> RethResult> { + todo!() + } +} + +/// Provider over a specific `NippyJar` and range. +#[derive(Debug)] +pub struct SnapshotJarProvider<'a> { + /// Reference to a value on [`SnapshotProvider`] + pub jar: NippyJarRef<'a>, +} + +impl<'a> SnapshotJarProvider<'a> { + /// Provides a cursor for more granular data access. + pub fn cursor<'b>(&'b self) -> RethResult> + where + 'b: 'a, + { + Ok(NippyJarCursor::new(self.jar.value())?) + } +} + +impl<'a> HeaderProvider for SnapshotJarProvider<'a> { fn header(&self, block_hash: &BlockHash) -> RethResult> { // WIP - let mut cursor = self.cursor(); + let mut cursor = NippyJarCursor::new(self.jar.value())?; let header = Header::decompress( cursor.row_by_key_with_cols::<0b01, 2>(&block_hash.0).unwrap().unwrap()[0], @@ -58,7 +221,7 @@ impl<'a> HeaderProvider for SnapshotProvider<'a> { fn header_by_number(&self, num: BlockNumber) -> RethResult> { Header::decompress( - self.cursor() + NippyJarCursor::new(self.jar.value())? .row_by_number_with_cols::<0b01, 2>( (num - self.jar.user_header().block_start()) as usize, )? @@ -70,7 +233,7 @@ impl<'a> HeaderProvider for SnapshotProvider<'a> { fn header_td(&self, block_hash: &BlockHash) -> RethResult> { // WIP - let mut cursor = self.cursor(); + let mut cursor = NippyJarCursor::new(self.jar.value())?; let row = cursor.row_by_key_with_cols::<0b11, 2>(&block_hash.0).unwrap().unwrap(); @@ -105,7 +268,7 @@ impl<'a> HeaderProvider for SnapshotProvider<'a> { } } -impl<'a> BlockHashReader for SnapshotProvider<'a> { +impl<'a> BlockHashReader for SnapshotJarProvider<'a> { fn block_hash(&self, _number: u64) -> RethResult> { todo!() } @@ -119,7 +282,7 @@ impl<'a> BlockHashReader for SnapshotProvider<'a> { } } -impl<'a> BlockNumReader for SnapshotProvider<'a> { +impl<'a> BlockNumReader for SnapshotJarProvider<'a> { fn chain_info(&self) -> RethResult { todo!() } @@ -137,14 +300,14 @@ impl<'a> BlockNumReader for SnapshotProvider<'a> { } } -impl<'a> TransactionsProvider for SnapshotProvider<'a> { +impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { todo!() } fn transaction_by_id(&self, num: TxNumber) -> RethResult> { TransactionSignedNoHash::decompress( - self.cursor() + NippyJarCursor::new(self.jar.value())? .row_by_number_with_cols::<0b1, 1>( (num - self.jar.user_header().tx_start()) as usize, )? @@ -164,7 +327,7 @@ impl<'a> TransactionsProvider for SnapshotProvider<'a> { fn transaction_by_hash(&self, hash: TxHash) -> RethResult> { // WIP - let mut cursor = self.cursor(); + let mut cursor = NippyJarCursor::new(self.jar.value())?; let tx = TransactionSignedNoHash::decompress( cursor.row_by_key_with_cols::<0b1, 1>(&hash.0).unwrap().unwrap()[0], @@ -315,10 +478,11 @@ mod test { // Use providers to query Header data and compare if it matches { - let jar = NippyJar::load(snap_file.path()).unwrap(); - let db_provider = factory.provider().unwrap(); - let snap_provider = SnapshotProvider { jar: &jar }; + let manager = SnapshotProvider::default(); + let jar_provider = manager + .get_segment_provider(SnapshotSegment::Headers, 0, Some(snap_file.path().into())) + .unwrap(); assert!(!headers.is_empty()); @@ -331,12 +495,12 @@ mod test { // Compare Header assert_eq!(header, db_provider.header(&header_hash).unwrap().unwrap()); - assert_eq!(header, snap_provider.header(&header_hash).unwrap().unwrap()); + assert_eq!(header, jar_provider.header(&header_hash).unwrap().unwrap()); // Compare HeaderTD assert_eq!( db_provider.header_td(&header_hash).unwrap().unwrap(), - snap_provider.header_td(&header_hash).unwrap().unwrap() + jar_provider.header_td(&header_hash).unwrap().unwrap() ); } } From 0beb01ac34417d952165988f1240d2f702e5a165 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Mon, 23 Oct 2023 13:43:08 +0000 Subject: [PATCH 24/44] docs --- crates/storage/nippy-jar/src/compression/zstd.rs | 10 ++++++++++ crates/storage/provider/src/providers/snapshot.rs | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/storage/nippy-jar/src/compression/zstd.rs b/crates/storage/nippy-jar/src/compression/zstd.rs index 4d8a1212631..c862358bb11 100644 --- a/crates/storage/nippy-jar/src/compression/zstd.rs +++ b/crates/storage/nippy-jar/src/compression/zstd.rs @@ -259,6 +259,7 @@ where Ok(dictionaries.map(|dicts| Arc::new(ZstdDictionaries::load(dicts)))) } +/// List of [`ZstdDictionary`] #[cfg_attr(test, derive(PartialEq))] #[derive(Serialize, Deserialize)] pub struct ZstdDictionaries<'a>(pub Vec>); @@ -270,10 +271,12 @@ impl<'a> std::fmt::Debug for ZstdDictionaries<'a> { } impl<'a> ZstdDictionaries<'a> { + /// Creates [`ZstdDictionaries`]. pub fn new(raw: Vec) -> Self { Self(raw.into_iter().map(ZstdDictionary::Raw).collect()) } + /// Loads a list [`RawDictionary`] into a list of [`ZstdDictionary::Loaded`]. pub fn load(raw: Vec) -> Self { Self( raw.into_iter() @@ -282,10 +285,12 @@ impl<'a> ZstdDictionaries<'a> { ) } + /// Dictionary count. pub fn len(&self) -> usize { self.0.len() } + /// Creates a list of decompressors from a list of [`ZstdDictionary::Loaded`]. pub fn decompressors(&self) -> Result>, NippyJarError> { Ok(self .0 @@ -294,6 +299,7 @@ impl<'a> ZstdDictionaries<'a> { .collect::, _>>()?) } + /// Creates a list of compressors from a list of [`ZstdDictionary::Raw`]. pub fn compressors(&self) -> Result>, NippyJarError> { Ok(self .0 @@ -303,12 +309,15 @@ impl<'a> ZstdDictionaries<'a> { } } +/// A Zstd dictionary. It's created and serialized with [`ZstdDictionary::Raw`], and deserialized as +/// [`ZstdDictionary::Loaded`]. pub enum ZstdDictionary<'a> { Raw(RawDictionary), Loaded(DecoderDictionary<'a>), } impl<'a> ZstdDictionary<'a> { + /// Returns a reference to the expected `RawDictionary` pub fn raw(&self) -> &RawDictionary { match self { ZstdDictionary::Raw(dict) => dict, @@ -316,6 +325,7 @@ impl<'a> ZstdDictionary<'a> { } } + /// Returns a reference to the expected `DecoderDictionary` pub fn loaded(&self) -> &DecoderDictionary<'_> { match self { ZstdDictionary::Raw(_) => unreachable!(), diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 9229b4f0e8b..53439f426bd 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -129,7 +129,8 @@ impl TransactionsProvider for SnapshotProvider { fn transaction_by_id(&self, num: TxNumber) -> RethResult> { // TODO `num` is provided after checking the index let block_num = num; - self.get_segment_provider(SnapshotSegment::Transactions, block_num, None)?.transaction_by_id(num) + self.get_segment_provider(SnapshotSegment::Transactions, block_num, None)? + .transaction_by_id(num) } fn transaction_by_id_no_hash( From b4d1d1e5c50f13dc0e227ea2180777660a99262a Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:10:59 +0100 Subject: [PATCH 25/44] Update bin/reth/src/db/snapshots/receipts.rs Co-authored-by: Alexey Shekhirin --- bin/reth/src/db/snapshots/receipts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs index 4d55438406b..ded05c63c9d 100644 --- a/bin/reth/src/db/snapshots/receipts.rs +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -116,7 +116,7 @@ impl Command { row_indexes.shuffle(&mut rng); } - // BENCHMARK QUERYING A RANDOM HEADER BY NUMBER + // BENCHMARK QUERYING A RANDOM RECEIPT BY NUMBER { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( From 79c72db0ee802a11eb860eefcb1dfc11972fac15 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:11:08 +0100 Subject: [PATCH 26/44] Update bin/reth/src/db/snapshots/transactions.rs Co-authored-by: Alexey Shekhirin --- bin/reth/src/db/snapshots/transactions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index bf410585248..97b20c8a7e6 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -117,7 +117,7 @@ impl Command { row_indexes.shuffle(&mut rng); } - // BENCHMARK QUERYING A RANDOM HEADER BY NUMBER + // BENCHMARK QUERYING A RANDOM TRANSACTION BY NUMBER { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( From 8880d6f718a95f4cc4934a514a46980711e4512e Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:11:19 +0100 Subject: [PATCH 27/44] Update bin/reth/src/db/snapshots/transactions.rs Co-authored-by: Alexey Shekhirin --- bin/reth/src/db/snapshots/transactions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index 97b20c8a7e6..d5286076e5f 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -142,7 +142,7 @@ impl Command { )?; } - // BENCHMARK QUERYING A RANDOM HEADER BY HASH + // BENCHMARK QUERYING A RANDOM TRANSACTION BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; let transaction_hash = From 6224354a9827352d74a1c93ead5d889315046cf1 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:11:27 +0100 Subject: [PATCH 28/44] Update bin/reth/src/db/snapshots/receipts.rs Co-authored-by: Alexey Shekhirin --- bin/reth/src/db/snapshots/receipts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs index ded05c63c9d..e5aa2e9158e 100644 --- a/bin/reth/src/db/snapshots/receipts.rs +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -140,7 +140,7 @@ impl Command { )?; } - // BENCHMARK QUERYING A RANDOM HEADER BY HASH + // BENCHMARK QUERYING A RANDOM RECEIPT BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; let tx_hash = From 8e56d1d270720d1788455c4e5f4d6a5c76920770 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:13:30 +0100 Subject: [PATCH 29/44] Update crates/interfaces/src/provider.rs Co-authored-by: Alexey Shekhirin --- crates/interfaces/src/provider.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index 59ed59d8c37..45230ae81b6 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -37,15 +37,15 @@ pub enum ProviderError { /// Thrown when required header related data was not found but was required. #[error("No header found for {0:?}")] HeaderNotFound(BlockHashOrNumber), - /// Thrown we were unable to find a specific block + /// The specific block is missing. #[error("Block does not exist {0:?}")] BlockNotFound(BlockHashOrNumber), - /// Thrown we were unable to find a specific transaction + /// The specific transaction is missing. #[error("Transaction does not exist {0:?}")] - TransactionNotFound(BlockHashOrNumber), - /// Thrown we were unable to find a specific receipt + TransactionNotFound(TxNumber), + /// The specific receipt is missing #[error("Receipt does not exist {0:?}")] - ReceiptNotFound(BlockHashOrNumber), + ReceiptNotFound(TxNumber), /// Thrown we were unable to find the best block #[error("Best block does not exist")] BestBlockNotFound, From 4fb187152b4913de6f51ff2b1d6854ab8543c60d Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 10:48:16 +0000 Subject: [PATCH 30/44] add transaction_hashes_by_range to provider --- crates/snapshot/src/segments/receipts.rs | 10 ++- crates/snapshot/src/segments/transactions.rs | 10 ++- crates/stages/src/stages/tx_lookup.rs | 5 +- crates/storage/db/src/tables/extensions.rs | 65 ------------------- crates/storage/db/src/tables/mod.rs | 1 - .../src/providers/database/provider.rs | 63 +++++++++++++++++- .../provider/src/traits/transactions.rs | 8 ++- 7 files changed, 78 insertions(+), 84 deletions(-) delete mode 100644 crates/storage/db/src/tables/extensions.rs diff --git a/crates/snapshot/src/segments/receipts.rs b/crates/snapshot/src/segments/receipts.rs index 45e9edfaac9..4fb2e399d11 100644 --- a/crates/snapshot/src/segments/receipts.rs +++ b/crates/snapshot/src/segments/receipts.rs @@ -51,12 +51,10 @@ impl Segment for Receipts { let mut hashes = None; if self.filters.has_filters() { hashes = Some( - tables::Transactions::recover_hashes( - provider.tx_ref(), - *tx_range.start()..(*tx_range.end() + 1), - )? - .into_iter() - .map(|(tx, _)| Ok(tx)), + provider + .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? + .into_iter() + .map(|(tx, _)| Ok(tx)), ); } diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs index 889e8ece8eb..09d120c09db 100644 --- a/crates/snapshot/src/segments/transactions.rs +++ b/crates/snapshot/src/segments/transactions.rs @@ -51,12 +51,10 @@ impl Segment for Transactions { let mut hashes = None; if self.filters.has_filters() { hashes = Some( - tables::Transactions::recover_hashes( - provider.tx_ref(), - *tx_range.start()..(*tx_range.end() + 1), - )? - .into_iter() - .map(|(tx, _)| Ok(tx)), + provider + .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? + .into_iter() + .map(|(tx, _)| Ok(tx)), ); } diff --git a/crates/stages/src/stages/tx_lookup.rs b/crates/stages/src/stages/tx_lookup.rs index bad1ff9ca13..758fa403320 100644 --- a/crates/stages/src/stages/tx_lookup.rs +++ b/crates/stages/src/stages/tx_lookup.rs @@ -13,6 +13,7 @@ use reth_primitives::{ }; use reth_provider::{ BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, + TransactionsProviderExt, }; use tracing::*; @@ -92,12 +93,12 @@ impl Stage for TransactionLookupStage { debug!(target: "sync::stages::transaction_lookup", ?tx_range, "Updating transaction lookup"); - let tx = provider.tx_ref(); - let mut tx_list = tables::Transactions::recover_hashes(tx, tx_range)?; + let mut tx_list = provider.transaction_hashes_by_range(tx_range)?; // Sort before inserting the reverse lookup for hash -> tx_id. tx_list.par_sort_unstable_by(|txa, txb| txa.0.cmp(&txb.0)); + let tx = provider.tx_ref(); let mut txhash_cursor = tx.cursor_write::()?; // If the last inserted element in the database is equal or bigger than the first diff --git a/crates/storage/db/src/tables/extensions.rs b/crates/storage/db/src/tables/extensions.rs deleted file mode 100644 index 4b5ee4118a8..00000000000 --- a/crates/storage/db/src/tables/extensions.rs +++ /dev/null @@ -1,65 +0,0 @@ -use itertools::Itertools; -use std::{ops::Range, sync::mpsc}; - -use reth_interfaces::{db::DatabaseError, RethError, RethResult}; -use reth_primitives::{keccak256, TransactionSignedNoHash, TxHash, TxNumber, B256}; - -use crate::{abstraction::cursor::DbCursorRO, transaction::DbTx, Transactions}; - -impl Transactions { - /// Recovers transaction hashes by walking through [`crate::tables::Transactions`] table and - /// calculating them in a parallel manner. Returned unsorted. - pub fn recover_hashes( - tx: &TX, - tx_range: Range, - ) -> RethResult> { - let mut tx_cursor = tx.cursor_read::()?; - let tx_range_size = tx_range.clone().count(); - let tx_walker = tx_cursor.walk_range(tx_range)?; - - let chunk_size = (tx_range_size / rayon::current_num_threads()).max(1); - let mut channels = Vec::with_capacity(chunk_size); - let mut transaction_count = 0; - - for chunk in &tx_walker.chunks(chunk_size) { - let (tx, rx) = mpsc::channel(); - channels.push(rx); - - // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send) - let chunk: Vec<_> = chunk.collect(); - transaction_count += chunk.len(); - - // Spawn the task onto the global rayon pool - // This task will send the results through the channel after it has calculated the hash. - rayon::spawn(move || { - let mut rlp_buf = Vec::with_capacity(128); - for entry in chunk { - rlp_buf.clear(); - let _ = tx.send(calculate_hash(entry, &mut rlp_buf)); - } - }); - } - let mut tx_list = Vec::with_capacity(transaction_count); - - // Iterate over channels and append the tx hashes unsorted - for channel in channels { - while let Ok(tx) = channel.recv() { - let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; - tx_list.push((tx_hash, tx_id)); - } - } - - Ok(tx_list) - } -} - -/// Calculates the hash of the given transaction -#[inline] -fn calculate_hash( - entry: Result<(TxNumber, TransactionSignedNoHash), DatabaseError>, - rlp_buf: &mut Vec, -) -> Result<(B256, TxNumber), Box> { - let (tx_id, tx) = entry.map_err(|e| Box::new(e.into()))?; - tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false); - Ok((keccak256(rlp_buf), tx_id)) -} diff --git a/crates/storage/db/src/tables/mod.rs b/crates/storage/db/src/tables/mod.rs index efd00143c46..7171f137c6b 100644 --- a/crates/storage/db/src/tables/mod.rs +++ b/crates/storage/db/src/tables/mod.rs @@ -13,7 +13,6 @@ //! TODO(onbjerg): Find appropriate format for this... pub mod codecs; -mod extensions; pub mod models; mod raw; pub(crate) mod utils; diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index c90b7b78bb7..9c281c0b02b 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -24,7 +24,7 @@ use reth_db::{ }; use reth_interfaces::{ executor::{BlockExecutionError, BlockValidationError}, - RethResult, + RethError, RethResult, }; use reth_primitives::{ keccak256, @@ -46,7 +46,7 @@ use std::{ collections::{hash_map, BTreeMap, BTreeSet, HashMap, HashSet}, fmt::Debug, ops::{Deref, DerefMut, Range, RangeBounds, RangeInclusive}, - sync::Arc, + sync::{mpsc, Arc}, }; /// A [`DatabaseProvider`] that holds a read-only database transaction. @@ -1139,7 +1139,64 @@ impl BlockReader for DatabaseProvider { } } -impl TransactionsProviderExt for DatabaseProvider {} +impl TransactionsProviderExt for DatabaseProvider { + /// Recovers transaction hashes by walking through [`crate::tables::Transactions`] table and + /// calculating them in a parallel manner. Returned unsorted. + fn transaction_hashes_by_range( + &self, + tx_range: Range, + ) -> RethResult> { + let mut tx_cursor = self.tx.cursor_read::()?; + let tx_range_size = tx_range.clone().count(); + let tx_walker = tx_cursor.walk_range(tx_range)?; + + let chunk_size = (tx_range_size / rayon::current_num_threads()).max(1); + let mut channels = Vec::with_capacity(chunk_size); + let mut transaction_count = 0; + + #[inline] + fn calculate_hash( + entry: Result<(TxNumber, TransactionSignedNoHash), DatabaseError>, + rlp_buf: &mut Vec, + ) -> Result<(B256, TxNumber), Box> { + let (tx_id, tx) = entry.map_err(|e| Box::new(e.into()))?; + tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false); + Ok((keccak256(rlp_buf), tx_id)) + } + + for chunk in &tx_walker.chunks(chunk_size) { + let (tx, rx) = mpsc::channel(); + channels.push(rx); + + // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send) + let chunk: Vec<_> = chunk.collect(); + transaction_count += chunk.len(); + + // Spawn the task onto the global rayon pool + // This task will send the results through the channel after it has calculated the hash. + rayon::spawn(move || { + let mut rlp_buf = Vec::with_capacity(128); + for entry in chunk { + rlp_buf.clear(); + let _ = tx.send(calculate_hash(entry, &mut rlp_buf)); + } + }); + } + let mut tx_list = Vec::with_capacity(transaction_count); + + // Iterate over channels and append the tx hashes unsorted + for channel in channels { + while let Ok(tx) = channel.recv() { + let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; + tx_list.push((tx_hash, tx_id)); + } + } + + Ok(tx_list) + } +} + +/// Calculates the hash of the given transaction impl TransactionsProvider for DatabaseProvider { fn transaction_id(&self, tx_hash: TxHash) -> RethResult> { diff --git a/crates/storage/provider/src/traits/transactions.rs b/crates/storage/provider/src/traits/transactions.rs index 754ae5582f5..2f9c72ed191 100644 --- a/crates/storage/provider/src/traits/transactions.rs +++ b/crates/storage/provider/src/traits/transactions.rs @@ -4,7 +4,7 @@ use reth_primitives::{ Address, BlockHashOrNumber, BlockNumber, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, }; -use std::ops::{RangeBounds, RangeInclusive}; +use std::ops::{Range, RangeBounds, RangeInclusive}; /// Client trait for fetching [TransactionSigned] related data. #[auto_impl::auto_impl(&, Arc)] @@ -84,4 +84,10 @@ pub trait TransactionsProviderExt: BlockReader + Send + Sync { Ok(from..=to) } + + /// Get transaction hashes from a transaction range. + fn transaction_hashes_by_range( + &self, + tx_range: Range, + ) -> RethResult>; } From 9ab6333ea53bbeac45ea9667149d64d341318d11 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 10:48:39 +0000 Subject: [PATCH 31/44] add TxHashOrNumber type --- crates/interfaces/src/provider.rs | 8 +++++--- crates/primitives/src/lib.rs | 2 +- crates/primitives/src/transaction/mod.rs | 5 ++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index 45230ae81b6..441a8c10185 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -1,4 +1,6 @@ -use reth_primitives::{Address, BlockHash, BlockHashOrNumber, BlockNumber, TxNumber, B256}; +use reth_primitives::{ + Address, BlockHash, BlockHashOrNumber, BlockNumber, TxHashOrNumber, TxNumber, B256, +}; /// Bundled errors variants thrown by various providers. #[allow(missing_docs)] @@ -42,10 +44,10 @@ pub enum ProviderError { BlockNotFound(BlockHashOrNumber), /// The specific transaction is missing. #[error("Transaction does not exist {0:?}")] - TransactionNotFound(TxNumber), + TransactionNotFound(TxHashOrNumber), /// The specific receipt is missing #[error("Receipt does not exist {0:?}")] - ReceiptNotFound(TxNumber), + ReceiptNotFound(TxHashOrNumber), /// Thrown we were unable to find the best block #[error("Best block does not exist")] BestBlockNotFound, diff --git a/crates/primitives/src/lib.rs b/crates/primitives/src/lib.rs index c82cf4c3fa4..e573c750956 100644 --- a/crates/primitives/src/lib.rs +++ b/crates/primitives/src/lib.rs @@ -91,7 +91,7 @@ pub use transaction::{ IntoRecoveredTransaction, InvalidTransactionError, PooledTransactionsElement, PooledTransactionsElementEcRecovered, Signature, Transaction, TransactionKind, TransactionMeta, TransactionSigned, TransactionSignedEcRecovered, TransactionSignedNoHash, TxEip1559, TxEip2930, - TxEip4844, TxLegacy, TxType, TxValue, EIP1559_TX_TYPE_ID, EIP2930_TX_TYPE_ID, + TxEip4844, TxHashOrNumber, TxLegacy, TxType, TxValue, EIP1559_TX_TYPE_ID, EIP2930_TX_TYPE_ID, EIP4844_TX_TYPE_ID, LEGACY_TX_TYPE_ID, }; pub use withdrawal::Withdrawal; diff --git a/crates/primitives/src/transaction/mod.rs b/crates/primitives/src/transaction/mod.rs index 5c9cccaebd6..018f1b32be2 100644 --- a/crates/primitives/src/transaction/mod.rs +++ b/crates/primitives/src/transaction/mod.rs @@ -1,6 +1,6 @@ use crate::{ compression::{TRANSACTION_COMPRESSOR, TRANSACTION_DECOMPRESSOR}, - keccak256, Address, Bytes, TxHash, B256, + keccak256, Address, BlockHashOrNumber, Bytes, TxHash, B256, }; use alloy_rlp::{ Decodable, Encodable, Error as RlpError, Header, EMPTY_LIST_CODE, EMPTY_STRING_CODE, @@ -1308,6 +1308,9 @@ impl IntoRecoveredTransaction for TransactionSignedEcRecovered { } } +/// Either a transaction hash or number. +pub type TxHashOrNumber = BlockHashOrNumber; + #[cfg(test)] mod tests { use crate::{ From c562e0c77e3b7a2c569fb94b85d0805497147760 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:49:28 +0100 Subject: [PATCH 32/44] Update bin/reth/src/db/snapshots/receipts.rs Co-authored-by: Alexey Shekhirin --- bin/reth/src/db/snapshots/receipts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs index e5aa2e9158e..b6b472170d2 100644 --- a/bin/reth/src/db/snapshots/receipts.rs +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -61,7 +61,7 @@ impl Command { let mut rng = rand::thread_rng(); let mut dictionaries = None; let mut jar = NippyJar::load(&get_snapshot_segment_file_name( - SnapshotSegment::Headers, + SnapshotSegment::Receipts, filters, compression, &block_range, From 27d6ab287c3227dd3a81b1fba1f5b4d4fba6ba52 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:49:35 +0100 Subject: [PATCH 33/44] Update bin/reth/src/db/snapshots/transactions.rs Co-authored-by: Alexey Shekhirin --- bin/reth/src/db/snapshots/transactions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index d5286076e5f..8c4544386b6 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -61,7 +61,7 @@ impl Command { let mut rng = rand::thread_rng(); let mut dictionaries = None; let mut jar = NippyJar::load(&get_snapshot_segment_file_name( - SnapshotSegment::Headers, + SnapshotSegment::Transactions, filters, compression, &block_range, From 2f8d3035169b24a422dd34cbd27312c2a1efd2ed Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 11:11:31 +0000 Subject: [PATCH 34/44] clippy --- crates/storage/codecs/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/storage/codecs/src/lib.rs b/crates/storage/codecs/src/lib.rs index 2866e79fb38..fee674a23ab 100644 --- a/crates/storage/codecs/src/lib.rs +++ b/crates/storage/codecs/src/lib.rs @@ -14,8 +14,6 @@ use revm_primitives::{ Address, Bytes, B256, U256, }; -pub use codecs_derive::*; - /// Trait that implements the `Compact` codec. /// /// When deriving the trait for custom structs, be aware of certain limitations/recommendations: From f2768f306e0e090ee94bfc9f15f1e3c07da309b0 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 11:37:41 +0000 Subject: [PATCH 35/44] small fixes --- Cargo.lock | 1 + crates/primitives/src/snapshot/mod.rs | 6 ++-- crates/storage/nippy-jar/Cargo.toml | 1 + .../storage/nippy-jar/src/compression/zstd.rs | 36 ++++++++++--------- crates/storage/nippy-jar/src/cursor.rs | 11 ++++-- crates/storage/nippy-jar/src/error.rs | 4 +++ .../provider/src/providers/snapshot.rs | 2 +- 7 files changed, 37 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3852d25d464..38c2281d437 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6195,6 +6195,7 @@ dependencies = [ "bincode", "bytes", "cuckoofilter", + "derive_more", "hex", "lz4_flex", "memmap2 0.7.1", diff --git a/crates/primitives/src/snapshot/mod.rs b/crates/primitives/src/snapshot/mod.rs index 95c3f1f53fb..76b56e7d0b6 100644 --- a/crates/primitives/src/snapshot/mod.rs +++ b/crates/primitives/src/snapshot/mod.rs @@ -4,9 +4,9 @@ mod compression; mod filters; mod segment; -/// Default snapshot block count. -pub const SNAPSHOT_BLOCK_NUMBER_CHUNKS: u64 = 500_000; - pub use compression::Compression; pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; pub use segment::{SegmentHeader, SnapshotSegment}; + +/// Default snapshot block count. +pub const SNAPSHOT_BLOCK_NUMBER_CHUNKS: u64 = 500_000; diff --git a/crates/storage/nippy-jar/Cargo.toml b/crates/storage/nippy-jar/Cargo.toml index 9be19824a6b..4a2fc967bdc 100644 --- a/crates/storage/nippy-jar/Cargo.toml +++ b/crates/storage/nippy-jar/Cargo.toml @@ -34,6 +34,7 @@ tracing-appender = "0.2" anyhow = "1.0" thiserror.workspace = true hex = "*" +derive_more = "0.99" [dev-dependencies] rand = { version = "0.8", features = ["small_rng"] } diff --git a/crates/storage/nippy-jar/src/compression/zstd.rs b/crates/storage/nippy-jar/src/compression/zstd.rs index c862358bb11..64b102e0374 100644 --- a/crates/storage/nippy-jar/src/compression/zstd.rs +++ b/crates/storage/nippy-jar/src/compression/zstd.rs @@ -1,4 +1,5 @@ use crate::{compression::Compression, NippyJarError}; +use derive_more::Deref; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::{ fs::File, @@ -261,8 +262,8 @@ where /// List of [`ZstdDictionary`] #[cfg_attr(test, derive(PartialEq))] -#[derive(Serialize, Deserialize)] -pub struct ZstdDictionaries<'a>(pub Vec>); +#[derive(Serialize, Deserialize, Deref)] +pub struct ZstdDictionaries<'a>(Vec>); impl<'a> std::fmt::Debug for ZstdDictionaries<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -285,26 +286,27 @@ impl<'a> ZstdDictionaries<'a> { ) } - /// Dictionary count. - pub fn len(&self) -> usize { - self.0.len() - } - /// Creates a list of decompressors from a list of [`ZstdDictionary::Loaded`]. pub fn decompressors(&self) -> Result>, NippyJarError> { Ok(self - .0 .iter() - .map(|a| Decompressor::with_prepared_dictionary(a.loaded())) + .flat_map(|dict| { + dict.loaded() + .ok_or(NippyJarError::DictionaryNotLoaded) + .map(Decompressor::with_prepared_dictionary) + }) .collect::, _>>()?) } /// Creates a list of compressors from a list of [`ZstdDictionary::Raw`]. pub fn compressors(&self) -> Result>, NippyJarError> { Ok(self - .0 .iter() - .map(|a| Compressor::with_dictionary(0, a.raw())) + .flat_map(|dict| { + dict.raw() + .ok_or(NippyJarError::CompressorNotAllowed) + .map(|dict| Compressor::with_dictionary(0, dict)) + }) .collect::, _>>()?) } } @@ -318,18 +320,18 @@ pub enum ZstdDictionary<'a> { impl<'a> ZstdDictionary<'a> { /// Returns a reference to the expected `RawDictionary` - pub fn raw(&self) -> &RawDictionary { + pub fn raw(&self) -> Option<&RawDictionary> { match self { - ZstdDictionary::Raw(dict) => dict, - ZstdDictionary::Loaded(_) => unreachable!(), + ZstdDictionary::Raw(dict) => Some(dict), + ZstdDictionary::Loaded(_) => None, } } /// Returns a reference to the expected `DecoderDictionary` - pub fn loaded(&self) -> &DecoderDictionary<'_> { + pub fn loaded(&self) -> Option<&DecoderDictionary<'_>> { match self { - ZstdDictionary::Raw(_) => unreachable!(), - ZstdDictionary::Loaded(dict) => dict, + ZstdDictionary::Raw(_) => None, + ZstdDictionary::Loaded(dict) => Some(dict), } } } diff --git a/crates/storage/nippy-jar/src/cursor.rs b/crates/storage/nippy-jar/src/cursor.rs index 05217d6ffe2..ba876ccaeee 100644 --- a/crates/storage/nippy-jar/src/cursor.rs +++ b/crates/storage/nippy-jar/src/cursor.rs @@ -218,9 +218,14 @@ where let from = self.internal_buffer.len(); match compression { Compressors::Zstd(z) if z.use_dict => { - let mut decompressor = Decompressor::with_prepared_dictionary( - z.dictionaries.as_ref().expect("qed").0[column].loaded(), - )?; + // If we are here, then for sure we have the necessary dictionaries and they're + // loaded (happens during deserialization). Otherwise, there's an issue + // somewhere else and we can't recover here anyway. + let dictionaries = z.dictionaries.as_ref().expect("dictionaries to exist") + [column] + .loaded() + .expect("dictionary to be loaded"); + let mut decompressor = Decompressor::with_prepared_dictionary(dictionaries)?; Zstd::decompress_with_dictionary( &self.mmap_handle[column_offset_range], &mut self.internal_buffer, diff --git a/crates/storage/nippy-jar/src/error.rs b/crates/storage/nippy-jar/src/error.rs index b17d3d2163a..b466ce1be71 100644 --- a/crates/storage/nippy-jar/src/error.rs +++ b/crates/storage/nippy-jar/src/error.rs @@ -37,4 +37,8 @@ pub enum NippyJarError { UnsupportedFilterQuery, #[error("Compression or decompression requires a bigger destination output.")] OutputTooSmall, + #[error("Dictionary is not loaded.")] + DictionaryNotLoaded, + #[error("It's not possible to generate a compressor after loading a dictionary.")] + CompressorNotAllowed, } diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 53439f426bd..76e34c1efa1 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -23,7 +23,7 @@ type NippyJarRef<'a> = pub struct SnapshotProvider { /// Maintains a map which allows for concurrent access to different `NippyJars`, over different /// segments and ranges. - pub map: DashMap<(BlockNumber, SnapshotSegment), NippyJar>, + map: DashMap<(BlockNumber, SnapshotSegment), NippyJar>, } impl SnapshotProvider { From 47e0036d556ae7b7510af4430fe1c0617831a2c6 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 11:50:57 +0000 Subject: [PATCH 36/44] add serde dictionaries --- .../storage/nippy-jar/src/compression/zstd.rs | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/crates/storage/nippy-jar/src/compression/zstd.rs b/crates/storage/nippy-jar/src/compression/zstd.rs index 64b102e0374..a70d566ca43 100644 --- a/crates/storage/nippy-jar/src/compression/zstd.rs +++ b/crates/storage/nippy-jar/src/compression/zstd.rs @@ -32,10 +32,7 @@ pub struct Zstd { /// Max size of a dictionary pub(crate) max_dict_size: usize, /// List of column dictionaries. - #[serde( - serialize_with = "serialize_dictionaries", - deserialize_with = "deserialize_dictionaries" - )] + #[serde(with = "dictionaries_serde")] pub(crate) dictionaries: Option>>, /// Number of columns to compress. columns: usize, @@ -237,27 +234,31 @@ impl Compression for Zstd { } } -fn serialize_dictionaries( - dictionaries: &Option>>, - serializer: S, -) -> Result -where - S: Serializer, -{ - match dictionaries { - Some(dicts) => serializer.serialize_some(dicts.as_ref()), - None => serializer.serialize_none(), +mod dictionaries_serde { + use super::*; + + pub fn serialize( + dictionaries: &Option>>, + serializer: S, + ) -> Result + where + S: Serializer, + { + match dictionaries { + Some(dicts) => serializer.serialize_some(dicts.as_ref()), + None => serializer.serialize_none(), + } } -} -fn deserialize_dictionaries<'de, D>( - deserializer: D, -) -> Result>>, D::Error> -where - D: Deserializer<'de>, -{ - let dictionaries: Option> = Option::deserialize(deserializer)?; - Ok(dictionaries.map(|dicts| Arc::new(ZstdDictionaries::load(dicts)))) + pub fn deserialize<'de, D>( + deserializer: D, + ) -> Result>>, D::Error> + where + D: Deserializer<'de>, + { + let dictionaries: Option> = Option::deserialize(deserializer)?; + Ok(dictionaries.map(|dicts| Arc::new(ZstdDictionaries::load(dicts)))) + } } /// List of [`ZstdDictionary`] From e5e71be6c05fa538a7611e3e539dec5fff219b3c Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 12:01:22 +0000 Subject: [PATCH 37/44] yes clippy --- crates/net/network/tests/it/clique/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/net/network/tests/it/clique/mod.rs b/crates/net/network/tests/it/clique/mod.rs index a8b2b8894db..d8fb1070c66 100644 --- a/crates/net/network/tests/it/clique/mod.rs +++ b/crates/net/network/tests/it/clique/mod.rs @@ -1,5 +1,5 @@ pub mod clique_middleware; mod geth; -pub use clique_middleware::{CliqueError, CliqueMiddleware, CliqueMiddlewareError}; +pub use clique_middleware::CliqueMiddleware; pub use geth::CliqueGethInstance; From 9cd888d8e9dd53a23b63add4f322cb6398a3dbb3 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 14:30:27 +0000 Subject: [PATCH 38/44] share mmap handle between snap providers --- crates/storage/nippy-jar/src/cursor.rs | 26 +++++----- crates/storage/nippy-jar/src/lib.rs | 45 ++++++++++++++-- .../provider/src/providers/snapshot.rs | 52 ++++++++++++++----- 3 files changed, 91 insertions(+), 32 deletions(-) diff --git a/crates/storage/nippy-jar/src/cursor.rs b/crates/storage/nippy-jar/src/cursor.rs index ba876ccaeee..96a4d73f83d 100644 --- a/crates/storage/nippy-jar/src/cursor.rs +++ b/crates/storage/nippy-jar/src/cursor.rs @@ -1,10 +1,9 @@ use crate::{ compression::{Compression, Compressors, Zstd}, - InclusionFilter, NippyJar, NippyJarError, PerfectHashingFunction, RefRow, + InclusionFilter, MmapHandle, NippyJar, NippyJarError, PerfectHashingFunction, RefRow, }; -use memmap2::Mmap; use serde::{de::Deserialize, ser::Serialize}; -use std::{fs::File, ops::Range, sync::Arc}; +use std::ops::Range; use sucds::int_vectors::Access; use zstd::bulk::Decompressor; @@ -14,10 +13,7 @@ pub struct NippyJarCursor<'a, H = ()> { /// [`NippyJar`] which holds most of the required configuration to read from the file. jar: &'a NippyJar, /// Data file. - #[allow(unused)] - file_handle: Arc, - /// Data file. - mmap_handle: Arc, + mmap_handle: MmapHandle, /// Internal buffer to unload data to without reallocating memory on each retrieval. internal_buffer: Vec, /// Cursor row position. @@ -37,17 +33,19 @@ impl<'a, H> NippyJarCursor<'a, H> where H: Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug + 'static, { - pub fn new(jar: &'a NippyJar) -> Result { - let file = File::open(jar.data_path())?; - - // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle. - let mmap = unsafe { Mmap::map(&file)? }; + pub fn new( + jar: &'a NippyJar, + mmap_handle: Option, + ) -> Result { let max_row_size = jar.max_row_size; + let mmap_handle = match mmap_handle { + Some(h) => h, + None => jar.open_data()?, + }; Ok(NippyJarCursor { jar, - file_handle: Arc::new(file), - mmap_handle: Arc::new(mmap), + mmap_handle, // Makes sure that we have enough buffer capacity to decompress any row of data. internal_buffer: Vec::with_capacity(max_row_size), row: 0, diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 972bc9f0e52..ab8c4946476 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -10,6 +10,7 @@ #![deny(unused_must_use, rust_2018_idioms)] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +use memmap2::Mmap; use serde::{Deserialize, Serialize}; use std::{ clone::Clone, @@ -17,7 +18,9 @@ use std::{ fs::File, io::{Seek, Write}, marker::Sync, + ops::Deref, path::{Path, PathBuf}, + sync::Arc, }; use sucds::{ int_vectors::PrefixSummedEliasFano, @@ -247,6 +250,11 @@ where .join(format!("{}.idx", data_path.file_name().expect("exists").to_string_lossy())) } + /// Returns a [`MmapHandle`] of the data file + pub fn open_data(&self) -> Result { + MmapHandle::new(self.data_path()) + } + /// If required, prepares any compression algorithm to an early pass of the data. pub fn prepare_compression( &mut self, @@ -487,6 +495,33 @@ where } } +#[derive(Debug, Clone)] +pub struct MmapHandle { + /// File descriptor. Needs to be kept alive as long as the mmap handle. + #[allow(unused)] + file: Arc, + /// Mmap handle. + mmap: Arc, +} + +impl MmapHandle { + pub fn new(path: PathBuf) -> Result { + let file = File::open(path)?; + + // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle. + let mmap = unsafe { Mmap::map(&file)? }; + + Ok(Self { file: Arc::new(file), mmap: Arc::new(mmap) }) + } +} + +impl Deref for MmapHandle { + type Target = Mmap; + fn deref(&self) -> &Self::Target { + &self.mmap + } +} + #[cfg(test)] mod tests { use super::*; @@ -664,7 +699,7 @@ mod tests { if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { assert!(zstd.use_dict); - let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -699,7 +734,7 @@ mod tests { assert_eq!(nippy, loaded_nippy); if let Some(Compressors::Lz4(_)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -737,7 +772,7 @@ mod tests { if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { assert!(!zstd.use_dict); - let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -794,7 +829,7 @@ mod tests { assert_eq!(loaded_nippy.user_header().block_start, block_start); if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); // Iterate over compressed values and compare let mut row_num = 0usize; @@ -859,7 +894,7 @@ mod tests { let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); // Shuffled for chaos. let mut data = col1.iter().zip(col2.iter()).enumerate().collect::>(); diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 76e34c1efa1..2154fe7272e 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -12,18 +12,45 @@ use reth_primitives::{ SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, }; -use std::{ops::RangeBounds, path::PathBuf}; +use std::{ + ops::{Deref, RangeBounds}, + path::PathBuf, +}; /// Alias type for each specific `NippyJar`. -type NippyJarRef<'a> = - dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), NippyJar>; +type NippyJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), LoadedJar>; + +/// Helper type to reuse an associated snapshot mmap handle on created cursors. +#[derive(Debug)] +struct LoadedJar { + jar: NippyJar, + data_file: reth_nippy_jar::MmapHandle, +} + +impl LoadedJar { + fn new(jar: NippyJar) -> RethResult { + let data_file = jar.open_data()?; + Ok(Self { jar, data_file }) + } + + fn data_file(&self) -> reth_nippy_jar::MmapHandle { + self.data_file.clone() + } +} + +impl Deref for LoadedJar { + type Target = NippyJar; + fn deref(&self) -> &Self::Target { + &self.jar + } +} /// SnapshotProvider #[derive(Debug, Default)] pub struct SnapshotProvider { /// Maintains a map which allows for concurrent access to different `NippyJars`, over different /// segments and ranges. - map: DashMap<(BlockNumber, SnapshotSegment), NippyJar>, + map: DashMap<(BlockNumber, SnapshotSegment), LoadedJar>, } impl SnapshotProvider { @@ -43,8 +70,7 @@ impl SnapshotProvider { } if let Some(path) = &path { - let jar = NippyJar::load(path)?; - self.map.insert(key, jar); + self.map.insert(key, LoadedJar::new(NippyJar::load(path)?)?); } else { path = Some(segment.filename( &((snapshot * SNAPSHOT_BLOCK_NUMBER_CHUNKS)..= @@ -189,7 +215,7 @@ impl TransactionsProvider for SnapshotProvider { #[derive(Debug)] pub struct SnapshotJarProvider<'a> { /// Reference to a value on [`SnapshotProvider`] - pub jar: NippyJarRef<'a>, + jar: NippyJarRef<'a>, } impl<'a> SnapshotJarProvider<'a> { @@ -198,14 +224,14 @@ impl<'a> SnapshotJarProvider<'a> { where 'b: 'a, { - Ok(NippyJarCursor::new(self.jar.value())?) + Ok(NippyJarCursor::new(self.jar.value(), Some(self.jar.data_file()))?) } } impl<'a> HeaderProvider for SnapshotJarProvider<'a> { fn header(&self, block_hash: &BlockHash) -> RethResult> { // WIP - let mut cursor = NippyJarCursor::new(self.jar.value())?; + let mut cursor = self.cursor()?; let header = Header::decompress( cursor.row_by_key_with_cols::<0b01, 2>(&block_hash.0).unwrap().unwrap()[0], @@ -222,7 +248,7 @@ impl<'a> HeaderProvider for SnapshotJarProvider<'a> { fn header_by_number(&self, num: BlockNumber) -> RethResult> { Header::decompress( - NippyJarCursor::new(self.jar.value())? + self.cursor()? .row_by_number_with_cols::<0b01, 2>( (num - self.jar.user_header().block_start()) as usize, )? @@ -234,7 +260,7 @@ impl<'a> HeaderProvider for SnapshotJarProvider<'a> { fn header_td(&self, block_hash: &BlockHash) -> RethResult> { // WIP - let mut cursor = NippyJarCursor::new(self.jar.value())?; + let mut cursor = NippyJarCursor::new(self.jar.value(), Some(self.jar.data_file()))?; let row = cursor.row_by_key_with_cols::<0b11, 2>(&block_hash.0).unwrap().unwrap(); @@ -308,7 +334,7 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { fn transaction_by_id(&self, num: TxNumber) -> RethResult> { TransactionSignedNoHash::decompress( - NippyJarCursor::new(self.jar.value())? + self.cursor()? .row_by_number_with_cols::<0b1, 1>( (num - self.jar.user_header().tx_start()) as usize, )? @@ -328,7 +354,7 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { fn transaction_by_hash(&self, hash: TxHash) -> RethResult> { // WIP - let mut cursor = NippyJarCursor::new(self.jar.value())?; + let mut cursor = self.cursor()?; let tx = TransactionSignedNoHash::decompress( cursor.row_by_key_with_cols::<0b1, 1>(&hash.0).unwrap().unwrap()[0], From ab58542595bdc46c11d8f9a723325a82319e0ae4 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Tue, 24 Oct 2023 15:00:16 +0000 Subject: [PATCH 39/44] split up snapshot provider mod --- crates/storage/nippy-jar/src/lib.rs | 1 + .../provider/src/providers/snapshot.rs | 535 ------------------ .../provider/src/providers/snapshot/jar.rs | 222 ++++++++ .../src/providers/snapshot/manager.rs | 177 ++++++ .../provider/src/providers/snapshot/mod.rs | 162 ++++++ 5 files changed, 562 insertions(+), 535 deletions(-) delete mode 100644 crates/storage/provider/src/providers/snapshot.rs create mode 100644 crates/storage/provider/src/providers/snapshot/jar.rs create mode 100644 crates/storage/provider/src/providers/snapshot/manager.rs create mode 100644 crates/storage/provider/src/providers/snapshot/mod.rs diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index ab8c4946476..47e1b066d64 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -495,6 +495,7 @@ where } } +/// Holds an `Arc` over a file and its associated mmap handle. #[derive(Debug, Clone)] pub struct MmapHandle { /// File descriptor. Needs to be kept alive as long as the mmap handle. diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs deleted file mode 100644 index 2154fe7272e..00000000000 --- a/crates/storage/provider/src/providers/snapshot.rs +++ /dev/null @@ -1,535 +0,0 @@ -use crate::{BlockHashReader, BlockNumReader, HeaderProvider, TransactionsProvider}; -use dashmap::DashMap; -use reth_db::{ - table::{Decompress, Table}, - HeaderTD, -}; -use reth_interfaces::{provider::ProviderError, RethResult}; -use reth_nippy_jar::{NippyJar, NippyJarCursor}; -use reth_primitives::{ - snapshot::{SegmentHeader, SNAPSHOT_BLOCK_NUMBER_CHUNKS}, - Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, SealedHeader, - SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, - B256, U256, -}; -use std::{ - ops::{Deref, RangeBounds}, - path::PathBuf, -}; - -/// Alias type for each specific `NippyJar`. -type NippyJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), LoadedJar>; - -/// Helper type to reuse an associated snapshot mmap handle on created cursors. -#[derive(Debug)] -struct LoadedJar { - jar: NippyJar, - data_file: reth_nippy_jar::MmapHandle, -} - -impl LoadedJar { - fn new(jar: NippyJar) -> RethResult { - let data_file = jar.open_data()?; - Ok(Self { jar, data_file }) - } - - fn data_file(&self) -> reth_nippy_jar::MmapHandle { - self.data_file.clone() - } -} - -impl Deref for LoadedJar { - type Target = NippyJar; - fn deref(&self) -> &Self::Target { - &self.jar - } -} - -/// SnapshotProvider -#[derive(Debug, Default)] -pub struct SnapshotProvider { - /// Maintains a map which allows for concurrent access to different `NippyJars`, over different - /// segments and ranges. - map: DashMap<(BlockNumber, SnapshotSegment), LoadedJar>, -} - -impl SnapshotProvider { - /// Gets the provider of the requested segment and range. - pub fn get_segment_provider( - &self, - segment: SnapshotSegment, - block: BlockNumber, - mut path: Option, - ) -> RethResult> { - // TODO this invalidates custom length snapshots. - let snapshot = block / SNAPSHOT_BLOCK_NUMBER_CHUNKS; - let key = (snapshot, segment); - - if let Some(jar) = self.map.get(&key) { - return Ok(SnapshotJarProvider { jar }) - } - - if let Some(path) = &path { - self.map.insert(key, LoadedJar::new(NippyJar::load(path)?)?); - } else { - path = Some(segment.filename( - &((snapshot * SNAPSHOT_BLOCK_NUMBER_CHUNKS)..= - ((snapshot + 1) * SNAPSHOT_BLOCK_NUMBER_CHUNKS - 1)), - )); - } - - self.get_segment_provider(segment, block, path) - } -} - -impl HeaderProvider for SnapshotProvider { - fn header(&self, _block_hash: &BlockHash) -> RethResult> { - todo!() - } - - fn header_by_number(&self, num: BlockNumber) -> RethResult> { - self.get_segment_provider(SnapshotSegment::Headers, num, None)?.header_by_number(num) - } - - fn header_td(&self, _block_hash: &BlockHash) -> RethResult> { - todo!() - } - - fn header_td_by_number(&self, _number: BlockNumber) -> RethResult> { - todo!(); - } - - fn headers_range(&self, _range: impl RangeBounds) -> RethResult> { - todo!(); - } - - fn sealed_headers_range( - &self, - _range: impl RangeBounds, - ) -> RethResult> { - todo!(); - } - - fn sealed_header(&self, _number: BlockNumber) -> RethResult> { - todo!(); - } -} - -impl BlockHashReader for SnapshotProvider { - fn block_hash(&self, _number: u64) -> RethResult> { - todo!() - } - - fn canonical_hashes_range( - &self, - _start: BlockNumber, - _end: BlockNumber, - ) -> RethResult> { - todo!() - } -} - -impl BlockNumReader for SnapshotProvider { - fn chain_info(&self) -> RethResult { - todo!() - } - - fn best_block_number(&self) -> RethResult { - todo!() - } - - fn last_block_number(&self) -> RethResult { - todo!() - } - - fn block_number(&self, _hash: B256) -> RethResult> { - todo!() - } -} - -impl TransactionsProvider for SnapshotProvider { - fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { - todo!() - } - - fn transaction_by_id(&self, num: TxNumber) -> RethResult> { - // TODO `num` is provided after checking the index - let block_num = num; - self.get_segment_provider(SnapshotSegment::Transactions, block_num, None)? - .transaction_by_id(num) - } - - fn transaction_by_id_no_hash( - &self, - _id: TxNumber, - ) -> RethResult> { - todo!() - } - - fn transaction_by_hash(&self, _hash: TxHash) -> RethResult> { - todo!() - } - - fn transaction_by_hash_with_meta( - &self, - _hash: TxHash, - ) -> RethResult> { - todo!() - } - - fn transaction_block(&self, _id: TxNumber) -> RethResult> { - todo!() - } - - fn transactions_by_block( - &self, - _block_id: BlockHashOrNumber, - ) -> RethResult>> { - todo!() - } - - fn transactions_by_block_range( - &self, - _range: impl RangeBounds, - ) -> RethResult>> { - todo!() - } - - fn senders_by_tx_range(&self, _range: impl RangeBounds) -> RethResult> { - todo!() - } - - fn transactions_by_tx_range( - &self, - _range: impl RangeBounds, - ) -> RethResult> { - todo!() - } - - fn transaction_sender(&self, _id: TxNumber) -> RethResult> { - todo!() - } -} - -/// Provider over a specific `NippyJar` and range. -#[derive(Debug)] -pub struct SnapshotJarProvider<'a> { - /// Reference to a value on [`SnapshotProvider`] - jar: NippyJarRef<'a>, -} - -impl<'a> SnapshotJarProvider<'a> { - /// Provides a cursor for more granular data access. - pub fn cursor<'b>(&'b self) -> RethResult> - where - 'b: 'a, - { - Ok(NippyJarCursor::new(self.jar.value(), Some(self.jar.data_file()))?) - } -} - -impl<'a> HeaderProvider for SnapshotJarProvider<'a> { - fn header(&self, block_hash: &BlockHash) -> RethResult> { - // WIP - let mut cursor = self.cursor()?; - - let header = Header::decompress( - cursor.row_by_key_with_cols::<0b01, 2>(&block_hash.0).unwrap().unwrap()[0], - ) - .unwrap(); - - if &header.hash_slow() == block_hash { - return Ok(Some(header)) - } else { - // check next snapshot - } - Ok(None) - } - - fn header_by_number(&self, num: BlockNumber) -> RethResult> { - Header::decompress( - self.cursor()? - .row_by_number_with_cols::<0b01, 2>( - (num - self.jar.user_header().block_start()) as usize, - )? - .ok_or(ProviderError::HeaderNotFound(num.into()))?[0], - ) - .map(Some) - .map_err(Into::into) - } - - fn header_td(&self, block_hash: &BlockHash) -> RethResult> { - // WIP - let mut cursor = NippyJarCursor::new(self.jar.value(), Some(self.jar.data_file()))?; - - let row = cursor.row_by_key_with_cols::<0b11, 2>(&block_hash.0).unwrap().unwrap(); - - let header = Header::decompress(row[0]).unwrap(); - let td = ::Value::decompress(row[1]).unwrap(); - - if &header.hash_slow() == block_hash { - return Ok(Some(td.0)) - } else { - // check next snapshot - } - Ok(None) - } - - fn header_td_by_number(&self, _number: BlockNumber) -> RethResult> { - unimplemented!(); - } - - fn headers_range(&self, _range: impl RangeBounds) -> RethResult> { - unimplemented!(); - } - - fn sealed_headers_range( - &self, - _range: impl RangeBounds, - ) -> RethResult> { - unimplemented!(); - } - - fn sealed_header(&self, _number: BlockNumber) -> RethResult> { - unimplemented!(); - } -} - -impl<'a> BlockHashReader for SnapshotJarProvider<'a> { - fn block_hash(&self, _number: u64) -> RethResult> { - todo!() - } - - fn canonical_hashes_range( - &self, - _start: BlockNumber, - _end: BlockNumber, - ) -> RethResult> { - todo!() - } -} - -impl<'a> BlockNumReader for SnapshotJarProvider<'a> { - fn chain_info(&self) -> RethResult { - todo!() - } - - fn best_block_number(&self) -> RethResult { - todo!() - } - - fn last_block_number(&self) -> RethResult { - todo!() - } - - fn block_number(&self, _hash: B256) -> RethResult> { - todo!() - } -} - -impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { - fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { - todo!() - } - - fn transaction_by_id(&self, num: TxNumber) -> RethResult> { - TransactionSignedNoHash::decompress( - self.cursor()? - .row_by_number_with_cols::<0b1, 1>( - (num - self.jar.user_header().tx_start()) as usize, - )? - .ok_or(ProviderError::TransactionNotFound(num.into()))?[0], - ) - .map(Into::into) - .map(Some) - .map_err(Into::into) - } - - fn transaction_by_id_no_hash( - &self, - _id: TxNumber, - ) -> RethResult> { - todo!() - } - - fn transaction_by_hash(&self, hash: TxHash) -> RethResult> { - // WIP - let mut cursor = self.cursor()?; - - let tx = TransactionSignedNoHash::decompress( - cursor.row_by_key_with_cols::<0b1, 1>(&hash.0).unwrap().unwrap()[0], - ) - .unwrap() - .with_hash(); - - if tx.hash() == hash { - return Ok(Some(tx)) - } else { - // check next snapshot - } - Ok(None) - } - - fn transaction_by_hash_with_meta( - &self, - _hash: TxHash, - ) -> RethResult> { - todo!() - } - - fn transaction_block(&self, _id: TxNumber) -> RethResult> { - todo!() - } - - fn transactions_by_block( - &self, - _block_id: BlockHashOrNumber, - ) -> RethResult>> { - todo!() - } - - fn transactions_by_block_range( - &self, - _range: impl RangeBounds, - ) -> RethResult>> { - todo!() - } - - fn senders_by_tx_range(&self, _range: impl RangeBounds) -> RethResult> { - todo!() - } - - fn transactions_by_tx_range( - &self, - _range: impl RangeBounds, - ) -> RethResult> { - todo!() - } - - fn transaction_sender(&self, _id: TxNumber) -> RethResult> { - todo!() - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::ProviderFactory; - use rand::{self, seq::SliceRandom}; - use reth_db::{ - cursor::DbCursorRO, - database::Database, - snapshot::create_snapshot_T1_T2, - test_utils::create_test_rw_db, - transaction::{DbTx, DbTxMut}, - CanonicalHeaders, DatabaseError, HeaderNumbers, HeaderTD, Headers, RawTable, - }; - use reth_interfaces::test_utils::generators::{self, random_header_range}; - use reth_nippy_jar::NippyJar; - use reth_primitives::{B256, MAINNET}; - - #[test] - fn test_snap() { - // Ranges - let row_count = 100u64; - let range = 0..=(row_count - 1); - let segment_header = SegmentHeader::new(range.clone(), range.clone()); - - // Data sources - let db = create_test_rw_db(); - let factory = ProviderFactory::new(&db, MAINNET.clone()); - let snap_file = tempfile::NamedTempFile::new().unwrap(); - - // Setup data - let mut headers = random_header_range( - &mut generators::rng(), - *range.start()..(*range.end() + 1), - B256::random(), - ); - - db.update(|tx| -> Result<(), DatabaseError> { - let mut td = U256::ZERO; - for header in headers.clone() { - td += header.header.difficulty; - let hash = header.hash(); - - tx.put::(header.number, hash)?; - tx.put::(header.number, header.clone().unseal())?; - tx.put::(header.number, td.into())?; - tx.put::(hash, header.number)?; - } - Ok(()) - }) - .unwrap() - .unwrap(); - - // Create Snapshot - { - let with_compression = true; - let with_filter = true; - - let mut nippy_jar = NippyJar::new(2, snap_file.path(), segment_header); - - if with_compression { - nippy_jar = nippy_jar.with_zstd(false, 0); - } - - if with_filter { - nippy_jar = nippy_jar.with_cuckoo_filter(row_count as usize + 10).with_fmph(); - } - - let tx = db.tx().unwrap(); - - // Hacky type inference. TODO fix - let mut none_vec = Some(vec![vec![vec![0u8]].into_iter()]); - let _ = none_vec.take(); - - // Generate list of hashes for filters & PHF - let mut cursor = tx.cursor_read::>().unwrap(); - let hashes = cursor - .walk(None) - .unwrap() - .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())); - - create_snapshot_T1_T2::( - &tx, - range, - None, - none_vec, - Some(hashes), - row_count as usize, - &mut nippy_jar, - ) - .unwrap(); - } - - // Use providers to query Header data and compare if it matches - { - let db_provider = factory.provider().unwrap(); - let manager = SnapshotProvider::default(); - let jar_provider = manager - .get_segment_provider(SnapshotSegment::Headers, 0, Some(snap_file.path().into())) - .unwrap(); - - assert!(!headers.is_empty()); - - // Shuffled for chaos. - headers.shuffle(&mut generators::rng()); - - for header in headers { - let header_hash = header.hash(); - let header = header.unseal(); - - // Compare Header - assert_eq!(header, db_provider.header(&header_hash).unwrap().unwrap()); - assert_eq!(header, jar_provider.header(&header_hash).unwrap().unwrap()); - - // Compare HeaderTD - assert_eq!( - db_provider.header_td(&header_hash).unwrap().unwrap(), - jar_provider.header_td(&header_hash).unwrap().unwrap() - ); - } - } - } -} diff --git a/crates/storage/provider/src/providers/snapshot/jar.rs b/crates/storage/provider/src/providers/snapshot/jar.rs new file mode 100644 index 00000000000..a0e8883ce3a --- /dev/null +++ b/crates/storage/provider/src/providers/snapshot/jar.rs @@ -0,0 +1,222 @@ +use super::LoadedJarRef; +use crate::{BlockHashReader, BlockNumReader, HeaderProvider, TransactionsProvider}; +use reth_db::{ + table::{Decompress, Table}, + HeaderTD, +}; +use reth_interfaces::{provider::ProviderError, RethResult}; +use reth_nippy_jar::NippyJarCursor; +use reth_primitives::{ + snapshot::SegmentHeader, Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, + SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, + B256, U256, +}; +use std::ops::{Deref, RangeBounds}; + +/// Provider over a specific `NippyJar` and range. +#[derive(Debug)] +pub struct SnapshotJarProvider<'a>(LoadedJarRef<'a>); + +impl<'a> Deref for SnapshotJarProvider<'a> { + type Target = LoadedJarRef<'a>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> From> for SnapshotJarProvider<'a> { + fn from(value: LoadedJarRef<'a>) -> Self { + SnapshotJarProvider(value) + } +} + +impl<'a> SnapshotJarProvider<'a> { + /// Provides a cursor for more granular data access. + pub fn cursor<'b>(&'b self) -> RethResult> + where + 'b: 'a, + { + Ok(NippyJarCursor::new(self.value(), Some(self.mmap_handle()))?) + } +} + +impl<'a> HeaderProvider for SnapshotJarProvider<'a> { + fn header(&self, block_hash: &BlockHash) -> RethResult> { + // WIP + let mut cursor = self.cursor()?; + + let header = Header::decompress( + cursor.row_by_key_with_cols::<0b01, 2>(&block_hash.0).unwrap().unwrap()[0], + ) + .unwrap(); + + if &header.hash_slow() == block_hash { + return Ok(Some(header)) + } else { + // check next snapshot + } + Ok(None) + } + + fn header_by_number(&self, num: BlockNumber) -> RethResult> { + Header::decompress( + self.cursor()? + .row_by_number_with_cols::<0b01, 2>( + (num - self.user_header().block_start()) as usize, + )? + .ok_or(ProviderError::HeaderNotFound(num.into()))?[0], + ) + .map(Some) + .map_err(Into::into) + } + + fn header_td(&self, block_hash: &BlockHash) -> RethResult> { + // WIP + let mut cursor = NippyJarCursor::new(self.value(), Some(self.mmap_handle()))?; + + let row = cursor.row_by_key_with_cols::<0b11, 2>(&block_hash.0).unwrap().unwrap(); + + let header = Header::decompress(row[0]).unwrap(); + let td = ::Value::decompress(row[1]).unwrap(); + + if &header.hash_slow() == block_hash { + return Ok(Some(td.0)) + } else { + // check next snapshot + } + Ok(None) + } + + fn header_td_by_number(&self, _number: BlockNumber) -> RethResult> { + unimplemented!(); + } + + fn headers_range(&self, _range: impl RangeBounds) -> RethResult> { + unimplemented!(); + } + + fn sealed_headers_range( + &self, + _range: impl RangeBounds, + ) -> RethResult> { + unimplemented!(); + } + + fn sealed_header(&self, _number: BlockNumber) -> RethResult> { + unimplemented!(); + } +} + +impl<'a> BlockHashReader for SnapshotJarProvider<'a> { + fn block_hash(&self, _number: u64) -> RethResult> { + todo!() + } + + fn canonical_hashes_range( + &self, + _start: BlockNumber, + _end: BlockNumber, + ) -> RethResult> { + todo!() + } +} + +impl<'a> BlockNumReader for SnapshotJarProvider<'a> { + fn chain_info(&self) -> RethResult { + todo!() + } + + fn best_block_number(&self) -> RethResult { + todo!() + } + + fn last_block_number(&self) -> RethResult { + todo!() + } + + fn block_number(&self, _hash: B256) -> RethResult> { + todo!() + } +} + +impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { + fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_id(&self, num: TxNumber) -> RethResult> { + TransactionSignedNoHash::decompress( + self.cursor()? + .row_by_number_with_cols::<0b1, 1>((num - self.user_header().tx_start()) as usize)? + .ok_or(ProviderError::TransactionNotFound(num.into()))?[0], + ) + .map(Into::into) + .map(Some) + .map_err(Into::into) + } + + fn transaction_by_id_no_hash( + &self, + _id: TxNumber, + ) -> RethResult> { + todo!() + } + + fn transaction_by_hash(&self, hash: TxHash) -> RethResult> { + // WIP + let mut cursor = self.cursor()?; + + let tx = TransactionSignedNoHash::decompress( + cursor.row_by_key_with_cols::<0b1, 1>(&hash.0).unwrap().unwrap()[0], + ) + .unwrap() + .with_hash(); + + if tx.hash() == hash { + return Ok(Some(tx)) + } else { + // check next snapshot + } + Ok(None) + } + + fn transaction_by_hash_with_meta( + &self, + _hash: TxHash, + ) -> RethResult> { + todo!() + } + + fn transaction_block(&self, _id: TxNumber) -> RethResult> { + todo!() + } + + fn transactions_by_block( + &self, + _block_id: BlockHashOrNumber, + ) -> RethResult>> { + todo!() + } + + fn transactions_by_block_range( + &self, + _range: impl RangeBounds, + ) -> RethResult>> { + todo!() + } + + fn senders_by_tx_range(&self, _range: impl RangeBounds) -> RethResult> { + todo!() + } + + fn transactions_by_tx_range( + &self, + _range: impl RangeBounds, + ) -> RethResult> { + todo!() + } + + fn transaction_sender(&self, _id: TxNumber) -> RethResult> { + todo!() + } +} diff --git a/crates/storage/provider/src/providers/snapshot/manager.rs b/crates/storage/provider/src/providers/snapshot/manager.rs new file mode 100644 index 00000000000..0ef1818415c --- /dev/null +++ b/crates/storage/provider/src/providers/snapshot/manager.rs @@ -0,0 +1,177 @@ +use super::{LoadedJar, SnapshotJarProvider}; +use crate::{BlockHashReader, BlockNumReader, HeaderProvider, TransactionsProvider}; +use dashmap::DashMap; +use reth_interfaces::RethResult; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + snapshot::SNAPSHOT_BLOCK_NUMBER_CHUNKS, Address, BlockHash, BlockHashOrNumber, BlockNumber, + ChainInfo, Header, SealedHeader, SnapshotSegment, TransactionMeta, TransactionSigned, + TransactionSignedNoHash, TxHash, TxNumber, B256, U256, +}; +use std::{ops::RangeBounds, path::PathBuf}; + +/// SnapshotProvider +#[derive(Debug, Default)] +pub struct SnapshotProvider { + /// Maintains a map which allows for concurrent access to different `NippyJars`, over different + /// segments and ranges. + map: DashMap<(BlockNumber, SnapshotSegment), LoadedJar>, +} + +impl SnapshotProvider { + /// Gets the provider of the requested segment and range. + pub fn get_segment_provider( + &self, + segment: SnapshotSegment, + block: BlockNumber, + mut path: Option, + ) -> RethResult> { + // TODO this invalidates custom length snapshots. + let snapshot = block / SNAPSHOT_BLOCK_NUMBER_CHUNKS; + let key = (snapshot, segment); + + if let Some(jar) = self.map.get(&key) { + return Ok(jar.into()) + } + + if let Some(path) = &path { + self.map.insert(key, LoadedJar::new(NippyJar::load(path)?)?); + } else { + path = Some(segment.filename( + &((snapshot * SNAPSHOT_BLOCK_NUMBER_CHUNKS)..= + ((snapshot + 1) * SNAPSHOT_BLOCK_NUMBER_CHUNKS - 1)), + )); + } + + self.get_segment_provider(segment, block, path) + } +} + +impl HeaderProvider for SnapshotProvider { + fn header(&self, _block_hash: &BlockHash) -> RethResult> { + todo!() + } + + fn header_by_number(&self, num: BlockNumber) -> RethResult> { + self.get_segment_provider(SnapshotSegment::Headers, num, None)?.header_by_number(num) + } + + fn header_td(&self, _block_hash: &BlockHash) -> RethResult> { + todo!() + } + + fn header_td_by_number(&self, _number: BlockNumber) -> RethResult> { + todo!(); + } + + fn headers_range(&self, _range: impl RangeBounds) -> RethResult> { + todo!(); + } + + fn sealed_headers_range( + &self, + _range: impl RangeBounds, + ) -> RethResult> { + todo!(); + } + + fn sealed_header(&self, _number: BlockNumber) -> RethResult> { + todo!(); + } +} + +impl BlockHashReader for SnapshotProvider { + fn block_hash(&self, _number: u64) -> RethResult> { + todo!() + } + + fn canonical_hashes_range( + &self, + _start: BlockNumber, + _end: BlockNumber, + ) -> RethResult> { + todo!() + } +} + +impl BlockNumReader for SnapshotProvider { + fn chain_info(&self) -> RethResult { + todo!() + } + + fn best_block_number(&self) -> RethResult { + todo!() + } + + fn last_block_number(&self) -> RethResult { + todo!() + } + + fn block_number(&self, _hash: B256) -> RethResult> { + todo!() + } +} + +impl TransactionsProvider for SnapshotProvider { + fn transaction_id(&self, _tx_hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_id(&self, num: TxNumber) -> RethResult> { + // TODO `num` is provided after checking the index + let block_num = num; + self.get_segment_provider(SnapshotSegment::Transactions, block_num, None)? + .transaction_by_id(num) + } + + fn transaction_by_id_no_hash( + &self, + _id: TxNumber, + ) -> RethResult> { + todo!() + } + + fn transaction_by_hash(&self, _hash: TxHash) -> RethResult> { + todo!() + } + + fn transaction_by_hash_with_meta( + &self, + _hash: TxHash, + ) -> RethResult> { + todo!() + } + + fn transaction_block(&self, _id: TxNumber) -> RethResult> { + todo!() + } + + fn transactions_by_block( + &self, + _block_id: BlockHashOrNumber, + ) -> RethResult>> { + todo!() + } + + fn transactions_by_block_range( + &self, + _range: impl RangeBounds, + ) -> RethResult>> { + todo!() + } + + fn senders_by_tx_range(&self, _range: impl RangeBounds) -> RethResult> { + todo!() + } + + fn transactions_by_tx_range( + &self, + _range: impl RangeBounds, + ) -> RethResult> { + todo!() + } + + fn transaction_sender(&self, _id: TxNumber) -> RethResult> { + todo!() + } +} diff --git a/crates/storage/provider/src/providers/snapshot/mod.rs b/crates/storage/provider/src/providers/snapshot/mod.rs new file mode 100644 index 00000000000..2f0dad3c582 --- /dev/null +++ b/crates/storage/provider/src/providers/snapshot/mod.rs @@ -0,0 +1,162 @@ +mod manager; +pub use manager::SnapshotProvider; + +mod jar; +pub use jar::SnapshotJarProvider; + +use reth_interfaces::RethResult; +use reth_nippy_jar::NippyJar; +use reth_primitives::{snapshot::SegmentHeader, SnapshotSegment}; +use std::ops::Deref; + +/// Alias type for each specific `NippyJar`. +type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), LoadedJar>; + +/// Helper type to reuse an associated snapshot mmap handle on created cursors. +#[derive(Debug)] +pub struct LoadedJar { + jar: NippyJar, + mmap_handle: reth_nippy_jar::MmapHandle, +} + +impl LoadedJar { + fn new(jar: NippyJar) -> RethResult { + let mmap_handle = jar.open_data()?; + Ok(Self { jar, mmap_handle }) + } + + /// Returns a clone of the mmap handle that can be used to instantiate a cursor. + fn mmap_handle(&self) -> reth_nippy_jar::MmapHandle { + self.mmap_handle.clone() + } +} + +impl Deref for LoadedJar { + type Target = NippyJar; + fn deref(&self) -> &Self::Target { + &self.jar + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{HeaderProvider, ProviderFactory}; + use rand::{self, seq::SliceRandom}; + use reth_db::{ + cursor::DbCursorRO, + database::Database, + snapshot::create_snapshot_T1_T2, + test_utils::create_test_rw_db, + transaction::{DbTx, DbTxMut}, + CanonicalHeaders, DatabaseError, HeaderNumbers, HeaderTD, Headers, RawTable, + }; + use reth_interfaces::test_utils::generators::{self, random_header_range}; + use reth_nippy_jar::NippyJar; + use reth_primitives::{BlockNumber, B256, MAINNET, U256}; + + #[test] + fn test_snap() { + // Ranges + let row_count = 100u64; + let range = 0..=(row_count - 1); + let segment_header = SegmentHeader::new(range.clone(), range.clone()); + + // Data sources + let db = create_test_rw_db(); + let factory = ProviderFactory::new(&db, MAINNET.clone()); + let snap_file = tempfile::NamedTempFile::new().unwrap(); + + // Setup data + let mut headers = random_header_range( + &mut generators::rng(), + *range.start()..(*range.end() + 1), + B256::random(), + ); + + db.update(|tx| -> Result<(), DatabaseError> { + let mut td = U256::ZERO; + for header in headers.clone() { + td += header.header.difficulty; + let hash = header.hash(); + + tx.put::(header.number, hash)?; + tx.put::(header.number, header.clone().unseal())?; + tx.put::(header.number, td.into())?; + tx.put::(hash, header.number)?; + } + Ok(()) + }) + .unwrap() + .unwrap(); + + // Create Snapshot + { + let with_compression = true; + let with_filter = true; + + let mut nippy_jar = NippyJar::new(2, snap_file.path(), segment_header); + + if with_compression { + nippy_jar = nippy_jar.with_zstd(false, 0); + } + + if with_filter { + nippy_jar = nippy_jar.with_cuckoo_filter(row_count as usize + 10).with_fmph(); + } + + let tx = db.tx().unwrap(); + + // Hacky type inference. TODO fix + let mut none_vec = Some(vec![vec![vec![0u8]].into_iter()]); + let _ = none_vec.take(); + + // Generate list of hashes for filters & PHF + let mut cursor = tx.cursor_read::>().unwrap(); + let hashes = cursor + .walk(None) + .unwrap() + .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())); + + create_snapshot_T1_T2::( + &tx, + range, + None, + none_vec, + Some(hashes), + row_count as usize, + &mut nippy_jar, + ) + .unwrap(); + } + + // Use providers to query Header data and compare if it matches + { + let db_provider = factory.provider().unwrap(); + let manager = SnapshotProvider::default(); + let jar_provider = manager + .get_segment_provider(SnapshotSegment::Headers, 0, Some(snap_file.path().into())) + .unwrap(); + + assert!(!headers.is_empty()); + + // Shuffled for chaos. + headers.shuffle(&mut generators::rng()); + + for header in headers { + let header_hash = header.hash(); + let header = header.unseal(); + + // Compare Header + assert_eq!(header, db_provider.header(&header_hash).unwrap().unwrap()); + assert_eq!(header, jar_provider.header(&header_hash).unwrap().unwrap()); + + // Compare HeaderTD + assert_eq!( + db_provider.header_td(&header_hash).unwrap().unwrap(), + jar_provider.header_td(&header_hash).unwrap().unwrap() + ); + } + } + } +} From 6bf07fda278162f7eecd8fac2b72553696a8637f Mon Sep 17 00:00:00 2001 From: joshieDo Date: Wed, 25 Oct 2023 10:11:42 +0000 Subject: [PATCH 40/44] add NippyJarCursor::with_handle --- crates/storage/nippy-jar/src/cursor.rs | 20 ++++++++++++------- crates/storage/nippy-jar/src/lib.rs | 10 +++++----- .../provider/src/providers/snapshot/jar.rs | 4 ++-- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/crates/storage/nippy-jar/src/cursor.rs b/crates/storage/nippy-jar/src/cursor.rs index 96a4d73f83d..010bb1df2a0 100644 --- a/crates/storage/nippy-jar/src/cursor.rs +++ b/crates/storage/nippy-jar/src/cursor.rs @@ -33,16 +33,22 @@ impl<'a, H> NippyJarCursor<'a, H> where H: Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug + 'static, { - pub fn new( + pub fn new(jar: &'a NippyJar) -> Result { + let max_row_size = jar.max_row_size; + Ok(NippyJarCursor { + jar, + mmap_handle: jar.open_data()?, + // Makes sure that we have enough buffer capacity to decompress any row of data. + internal_buffer: Vec::with_capacity(max_row_size), + row: 0, + }) + } + + pub fn with_handle( jar: &'a NippyJar, - mmap_handle: Option, + mmap_handle: MmapHandle, ) -> Result { let max_row_size = jar.max_row_size; - let mmap_handle = match mmap_handle { - Some(h) => h, - None => jar.open_data()?, - }; - Ok(NippyJarCursor { jar, mmap_handle, diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 47e1b066d64..248d7eaa3f1 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -700,7 +700,7 @@ mod tests { if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { assert!(zstd.use_dict); - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -735,7 +735,7 @@ mod tests { assert_eq!(nippy, loaded_nippy); if let Some(Compressors::Lz4(_)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -773,7 +773,7 @@ mod tests { if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() { assert!(!zstd.use_dict); - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_index = 0usize; @@ -830,7 +830,7 @@ mod tests { assert_eq!(loaded_nippy.user_header().block_start, block_start); if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Iterate over compressed values and compare let mut row_num = 0usize; @@ -895,7 +895,7 @@ mod tests { let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() { - let mut cursor = NippyJarCursor::new(&loaded_nippy, None).unwrap(); + let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap(); // Shuffled for chaos. let mut data = col1.iter().zip(col2.iter()).enumerate().collect::>(); diff --git a/crates/storage/provider/src/providers/snapshot/jar.rs b/crates/storage/provider/src/providers/snapshot/jar.rs index a0e8883ce3a..4dd8099cf6c 100644 --- a/crates/storage/provider/src/providers/snapshot/jar.rs +++ b/crates/storage/provider/src/providers/snapshot/jar.rs @@ -36,7 +36,7 @@ impl<'a> SnapshotJarProvider<'a> { where 'b: 'a, { - Ok(NippyJarCursor::new(self.value(), Some(self.mmap_handle()))?) + Ok(NippyJarCursor::with_handle(self.value(), self.mmap_handle())?) } } @@ -72,7 +72,7 @@ impl<'a> HeaderProvider for SnapshotJarProvider<'a> { fn header_td(&self, block_hash: &BlockHash) -> RethResult> { // WIP - let mut cursor = NippyJarCursor::new(self.value(), Some(self.mmap_handle()))?; + let mut cursor = NippyJarCursor::with_handle(self.value(), self.mmap_handle())?; let row = cursor.row_by_key_with_cols::<0b11, 2>(&block_hash.0).unwrap().unwrap(); From e92031845eaeee3450c4ae2e211811e986aecb2f Mon Sep 17 00:00:00 2001 From: joshieDo Date: Wed, 25 Oct 2023 10:18:20 +0000 Subject: [PATCH 41/44] fix doc --- crates/storage/provider/src/providers/database/provider.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index 9d6773d2d30..f92a37c4be1 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -1142,7 +1142,7 @@ impl BlockReader for DatabaseProvider { } impl TransactionsProviderExt for DatabaseProvider { - /// Recovers transaction hashes by walking through [`crate::tables::Transactions`] table and + /// Recovers transaction hashes by walking through `Transactions` table and /// calculating them in a parallel manner. Returned unsorted. fn transaction_hashes_by_range( &self, From efeb602918c4c708c334b7ac217f1ac9ae3b01b2 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 26 Oct 2023 11:47:55 +0000 Subject: [PATCH 42/44] fmt --- crates/interfaces/src/provider.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index cbc6a02b12b..71b1371f34c 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -42,7 +42,7 @@ pub enum ProviderError { /// when required header related data was not found but was required. #[error("no header found for {0:?}")] HeaderNotFound(BlockHashOrNumber), - /// The specific transaction is missing. + /// The specific transaction is missing. #[error("no transaction found for {0:?}")] TransactionNotFound(TxHashOrNumber), /// The specific receipt is missing From 2e2cabd9e92cb696f52002c3c2804f10d41cf3b8 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Thu, 26 Oct 2023 12:07:54 +0000 Subject: [PATCH 43/44] use AsRef Path instead --- crates/storage/nippy-jar/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 5918e1ace75..b1f932bece6 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -506,7 +506,7 @@ pub struct MmapHandle { } impl MmapHandle { - pub fn new(path: PathBuf) -> Result { + pub fn new(path: impl AsRef) -> Result { let file = File::open(path)?; // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle. From 8f9cbdb16d8e7467336f1d1ed8c95ac1bcba6030 Mon Sep 17 00:00:00 2001 From: joshieDo Date: Fri, 27 Oct 2023 09:56:36 +0000 Subject: [PATCH 44/44] use BLOCKS_PER_SNAPSHOT instead --- crates/primitives/src/snapshot/mod.rs | 2 +- crates/storage/provider/src/providers/snapshot.rs | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/primitives/src/snapshot/mod.rs b/crates/primitives/src/snapshot/mod.rs index 76b56e7d0b6..d8fc8db5362 100644 --- a/crates/primitives/src/snapshot/mod.rs +++ b/crates/primitives/src/snapshot/mod.rs @@ -9,4 +9,4 @@ pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; pub use segment::{SegmentHeader, SnapshotSegment}; /// Default snapshot block count. -pub const SNAPSHOT_BLOCK_NUMBER_CHUNKS: u64 = 500_000; +pub const BLOCKS_PER_SNAPSHOT: u64 = 500_000; diff --git a/crates/storage/provider/src/providers/snapshot.rs b/crates/storage/provider/src/providers/snapshot.rs index 76e34c1efa1..8087eb159c2 100644 --- a/crates/storage/provider/src/providers/snapshot.rs +++ b/crates/storage/provider/src/providers/snapshot.rs @@ -7,7 +7,7 @@ use reth_db::{ use reth_interfaces::{provider::ProviderError, RethResult}; use reth_nippy_jar::{NippyJar, NippyJarCursor}; use reth_primitives::{ - snapshot::{SegmentHeader, SNAPSHOT_BLOCK_NUMBER_CHUNKS}, + snapshot::{SegmentHeader, BLOCKS_PER_SNAPSHOT}, Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, SealedHeader, SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, @@ -35,7 +35,7 @@ impl SnapshotProvider { mut path: Option, ) -> RethResult> { // TODO this invalidates custom length snapshots. - let snapshot = block / SNAPSHOT_BLOCK_NUMBER_CHUNKS; + let snapshot = block / BLOCKS_PER_SNAPSHOT; let key = (snapshot, segment); if let Some(jar) = self.map.get(&key) { @@ -47,8 +47,7 @@ impl SnapshotProvider { self.map.insert(key, jar); } else { path = Some(segment.filename( - &((snapshot * SNAPSHOT_BLOCK_NUMBER_CHUNKS)..= - ((snapshot + 1) * SNAPSHOT_BLOCK_NUMBER_CHUNKS - 1)), + &((snapshot * BLOCKS_PER_SNAPSHOT)..=((snapshot + 1) * BLOCKS_PER_SNAPSHOT - 1)), )); }