From 74c6e61741ac751cea6c055b99081ca70a036623 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:22:25 +0100 Subject: [PATCH 01/14] refactor: move `CompressionAlgorithm` in a dedicated module --- .../src/entities/compression_algorithm.rs | 39 +++++++++++++++++ mithril-common/src/entities/mod.rs | 4 +- mithril-common/src/entities/snapshot.rs | 43 ++----------------- 3 files changed, 46 insertions(+), 40 deletions(-) create mode 100644 mithril-common/src/entities/compression_algorithm.rs diff --git a/mithril-common/src/entities/compression_algorithm.rs b/mithril-common/src/entities/compression_algorithm.rs new file mode 100644 index 00000000000..b35cca04ad4 --- /dev/null +++ b/mithril-common/src/entities/compression_algorithm.rs @@ -0,0 +1,39 @@ +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, IntoEnumIterator}; + +/// Compression algorithm for the snapshot archive artifacts. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, EnumIter, Display)] +#[serde(rename_all = "lowercase")] +pub enum CompressionAlgorithm { + /// Gzip compression format + #[default] + Gzip, + /// Zstandard compression format + Zstandard, +} + +impl CompressionAlgorithm { + /// Get the extension associated to tar archive using the current algorithm. + pub fn tar_file_extension(&self) -> String { + match self { + CompressionAlgorithm::Gzip => "tar.gz".to_owned(), + CompressionAlgorithm::Zstandard => "tar.zst".to_owned(), + } + } + + /// List all the available [algorithms][CompressionAlgorithm]. + pub fn list() -> Vec { + Self::iter().collect() + } + + /// Those ratio will be multiplied by the snapshot size to check if the available + /// disk space is sufficient to store the archive plus the extracted files. + /// If the available space is lower than that, a warning is raised. + /// Those ratio have been experimentally established. + pub fn free_space_snapshot_ratio(&self) -> f64 { + match self { + CompressionAlgorithm::Gzip => 2.5, + CompressionAlgorithm::Zstandard => 4.0, + } + } +} diff --git a/mithril-common/src/entities/mod.rs b/mithril-common/src/entities/mod.rs index a8dbfc24c57..d784e6fbb3e 100644 --- a/mithril-common/src/entities/mod.rs +++ b/mithril-common/src/entities/mod.rs @@ -13,6 +13,7 @@ mod cardano_transactions_snapshot; mod certificate; mod certificate_metadata; mod certificate_pending; +mod compression_algorithm; mod epoch; mod http_server_error; mod mithril_stake_distribution; @@ -40,6 +41,7 @@ pub use cardano_transactions_snapshot::CardanoTransactionsSnapshot; pub use certificate::{Certificate, CertificateSignature}; pub use certificate_metadata::{CertificateMetadata, StakeDistributionParty}; pub use certificate_pending::CertificatePending; +pub use compression_algorithm::*; pub use epoch::{Epoch, EpochError}; pub use http_server_error::{ClientError, ServerError}; pub use mithril_stake_distribution::MithrilStakeDistribution; @@ -51,6 +53,6 @@ pub use signed_entity_type::*; pub use signer::{Signer, SignerWithStake}; pub use single_signatures::*; pub use slot_number::SlotNumber; -pub use snapshot::{CompressionAlgorithm, Snapshot}; +pub use snapshot::Snapshot; pub use time_point::*; pub use type_alias::*; diff --git a/mithril-common/src/entities/snapshot.rs b/mithril-common/src/entities/snapshot.rs index 5e1e73a8cc1..9a4f7ad91f4 100644 --- a/mithril-common/src/entities/snapshot.rs +++ b/mithril-common/src/entities/snapshot.rs @@ -1,7 +1,9 @@ -use crate::{entities::CardanoDbBeacon, signable_builder::Artifact}; +use crate::{ + entities::{CardanoDbBeacon, CompressionAlgorithm}, + signable_builder::Artifact, +}; use semver::Version; use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, IntoEnumIterator}; /// Snapshot represents a snapshot file and its metadata #[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)] @@ -28,43 +30,6 @@ pub struct Snapshot { pub cardano_node_version: String, } -/// Compression algorithm for the snapshot archive artifacts. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, EnumIter, Display)] -#[serde(rename_all = "lowercase")] -pub enum CompressionAlgorithm { - /// Gzip compression format - #[default] - Gzip, - /// Zstandard compression format - Zstandard, -} - -impl CompressionAlgorithm { - /// Get the extension associated to tar archive using the current algorithm. - pub fn tar_file_extension(&self) -> String { - match self { - CompressionAlgorithm::Gzip => "tar.gz".to_owned(), - CompressionAlgorithm::Zstandard => "tar.zst".to_owned(), - } - } - - /// List all the available [algorithms][CompressionAlgorithm]. - pub fn list() -> Vec { - Self::iter().collect() - } - - /// Those ratio will be multiplied by the snapshot size to check if the available - /// disk space is sufficient to store the archive plus the extracted files. - /// If the available space is lower than that, a warning is raised. - /// Those ratio have been experimentally established. - pub fn free_space_snapshot_ratio(&self) -> f64 { - match self { - CompressionAlgorithm::Gzip => 2.5, - CompressionAlgorithm::Zstandard => 4.0, - } - } -} - impl Snapshot { /// Snapshot factory pub fn new>( From 4bddc114e5c46250601f76bbe06c25f3ae09c8e9 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Mon, 2 Dec 2024 20:06:38 +0100 Subject: [PATCH 02/14] feat: add `CardanoDatabase` entity --- .../src/entities/cardano_database.rs | 83 +++++++++++++++++++ mithril-common/src/entities/mod.rs | 2 + 2 files changed, 85 insertions(+) create mode 100644 mithril-common/src/entities/cardano_database.rs diff --git a/mithril-common/src/entities/cardano_database.rs b/mithril-common/src/entities/cardano_database.rs new file mode 100644 index 00000000000..6b462a6a58c --- /dev/null +++ b/mithril-common/src/entities/cardano_database.rs @@ -0,0 +1,83 @@ +use semver::Version; +use serde::{Deserialize, Serialize}; + +use crate::{ + entities::{CardanoDbBeacon, CompressionAlgorithm}, + signable_builder::Artifact, +}; + +/// Cardano database incremental. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct CardanoDatabase { + /// Merkle root of the Cardano database. + pub merkle_root: String, + + /// Mithril beacon on the Cardano chain. + pub beacon: CardanoDbBeacon, + + /// Size of the uncompressed Cardano database (including the ledger and volatile) in Bytes. + pub total_db_size_uncompressed: u64, + + /// Locations of the Cardano database artifacts. + pub locations: ArtifactsLocations, + + /// Compression algorithm of the Cardano database archives + pub compression_algorithm: CompressionAlgorithm, + + /// Version of the Cardano node used to create the archives. + pub cardano_node_version: String, +} + +impl CardanoDatabase { + /// [CardanoDatabase] factory + pub fn new( + merkle_root: String, + beacon: CardanoDbBeacon, + total_db_size_uncompressed: u64, + locations: ArtifactsLocations, + compression_algorithm: CompressionAlgorithm, + cardano_node_version: &Version, + ) -> Self { + let cardano_node_version = format!("{cardano_node_version}"); + + Self { + merkle_root, + beacon, + locations, + total_db_size_uncompressed, + compression_algorithm, + cardano_node_version, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ArtifactLocationType { + Aggregator, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct ArtifactLocationEntry { + #[serde(rename = "type")] + pub location_type: ArtifactLocationType, + pub uri: String, +} + +/// Locations of the Cardano database related files. +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct ArtifactsLocations { + /// Locations of the file containing the digests of the immutable files. + pub digests: Vec, + /// Locations of the immutable files. + pub immutables: Vec, + /// Locations of the ancillary files (ledger and volatile). + pub ancillary: Vec, +} + +#[typetag::serde] +impl Artifact for CardanoDatabase { + fn get_id(&self) -> String { + self.merkle_root.clone() + } +} diff --git a/mithril-common/src/entities/mod.rs b/mithril-common/src/entities/mod.rs index d784e6fbb3e..42574b96acd 100644 --- a/mithril-common/src/entities/mod.rs +++ b/mithril-common/src/entities/mod.rs @@ -4,6 +4,7 @@ pub(crate) mod arithmetic_operation_wrapper; mod block_number; mod block_range; mod cardano_chain_point; +mod cardano_database; mod cardano_db_beacon; mod cardano_network; mod cardano_stake_distribution; @@ -32,6 +33,7 @@ mod type_alias; pub use block_number::BlockNumber; pub use block_range::{BlockRange, BlockRangeLength, BlockRangesSequence}; pub use cardano_chain_point::{BlockHash, ChainPoint}; +pub use cardano_database::{ArtifactsLocations, CardanoDatabase}; pub use cardano_db_beacon::CardanoDbBeacon; pub use cardano_network::CardanoNetwork; pub use cardano_stake_distribution::CardanoStakeDistribution; From 92ac8ca015fa11c6bdc7770a842d36dd8214ee2b Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Mon, 2 Dec 2024 20:05:37 +0100 Subject: [PATCH 03/14] test: add support for ledger and volatile files management in `DummyImmutablesDbBuilder` --- .../digesters/cardano_immutable_digester.rs | 2 +- .../digesters/dummy_immutable_db_builder.rs | 68 ++++++++++++++++--- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/mithril-common/src/digesters/cardano_immutable_digester.rs b/mithril-common/src/digesters/cardano_immutable_digester.rs index 4fe2f417b83..4b6d88c9a53 100644 --- a/mithril-common/src/digesters/cardano_immutable_digester.rs +++ b/mithril-common/src/digesters/cardano_immutable_digester.rs @@ -617,7 +617,7 @@ mod tests { let immutable_db = db_builder("hash_computation_is_quicker_with_a_full_cache") .with_immutables(&(1..=50).collect::>()) .append_immutable_trio() - .set_file_size(65536) + .set_immutable_file_size(65536) .build(); let cache = MemoryImmutableFileDigestCacheProvider::default(); let logger = TestLogger::stdout(); diff --git a/mithril-common/src/digesters/dummy_immutable_db_builder.rs b/mithril-common/src/digesters/dummy_immutable_db_builder.rs index 881204d13a8..2afd6e2f4fc 100644 --- a/mithril-common/src/digesters/dummy_immutable_db_builder.rs +++ b/mithril-common/src/digesters/dummy_immutable_db_builder.rs @@ -12,7 +12,11 @@ pub struct DummyImmutablesDbBuilder { immutables_to_write: Vec, non_immutables_to_write: Vec, append_uncompleted_trio: bool, - file_size: Option, + immutable_file_size: Option, + ledger_files_to_write: Vec, + ledger_file_size: Option, + volatile_files_to_write: Vec, + volatile_file_size: Option, } /// A dummy cardano immutable db. @@ -51,7 +55,11 @@ impl DummyImmutablesDbBuilder { immutables_to_write: vec![], non_immutables_to_write: vec![], append_uncompleted_trio: false, - file_size: None, + immutable_file_size: None, + ledger_files_to_write: vec![], + ledger_file_size: None, + volatile_files_to_write: vec![], + volatile_file_size: None, } } @@ -68,6 +76,30 @@ impl DummyImmutablesDbBuilder { self } + /// Set ledger files to write to the db in the 'ledger' subdirectory. + pub fn with_ledger_files(&mut self, files: Vec) -> &mut Self { + self.ledger_files_to_write = files; + self + } + + /// Set the size of all ledger files written by [build][Self::build] to the given `file_size` in bytes. + pub fn set_ledger_file_size(&mut self, file_size: u64) -> &mut Self { + self.ledger_file_size = Some(file_size); + self + } + + /// Set volatile files to write to the db in the 'volatile' subdirectory. + pub fn with_volatile_files(&mut self, files: Vec) -> &mut Self { + self.volatile_files_to_write = files; + self + } + + /// Set the size of all volatile files written by [build][Self::build] to the given `file_size` in bytes. + pub fn set_volatile_file_size(&mut self, file_size: u64) -> &mut Self { + self.volatile_file_size = Some(file_size); + self + } + /// Makes [build][Self::build] add another trio of immutables file, that won't be included /// in its returned vec, to simulate the last 3 'uncompleted / wip' files that can be found in /// a cardano immutable db. @@ -76,11 +108,11 @@ impl DummyImmutablesDbBuilder { self } - /// Set the size of all files written by [build][Self::build] to the given `file_size` in bytes. + /// Set the size of all immutable files written by [build][Self::build] to the given `file_size` in bytes. /// /// Note: by default the size of the produced files is less than a 1kb. - pub fn set_file_size(&mut self, file_size: u64) -> &mut Self { - self.file_size = Some(file_size); + pub fn set_immutable_file_size(&mut self, file_size: u64) -> &mut Self { + self.immutable_file_size = Some(file_size); self } @@ -92,7 +124,7 @@ impl DummyImmutablesDbBuilder { if self.append_uncompleted_trio { write_immutable_trio( - self.file_size, + self.immutable_file_size, &self.dir, match immutable_numbers.last() { None => 0, @@ -102,14 +134,34 @@ impl DummyImmutablesDbBuilder { } for non_immutable in &self.non_immutables_to_write { - non_immutables_files.push(write_dummy_file(self.file_size, &self.dir, non_immutable)); + non_immutables_files.push(write_dummy_file( + self.immutable_file_size, + &self.dir, + non_immutable, + )); + } + + if !self.ledger_files_to_write.is_empty() { + let ledger_dir = self.dir.parent().unwrap().join("ledger"); + std::fs::create_dir_all(&ledger_dir).unwrap(); + for filename in &self.ledger_files_to_write { + write_dummy_file(self.ledger_file_size, &ledger_dir, filename); + } + }; + + if !self.volatile_files_to_write.is_empty() { + let volatile_dir = self.dir.parent().unwrap().join("volatile"); + std::fs::create_dir_all(&volatile_dir).unwrap(); + for filename in &self.volatile_files_to_write { + write_dummy_file(self.volatile_file_size, &volatile_dir, filename); + } } DummyImmutableDb { dir: self.dir.clone(), immutables_files: immutable_numbers .into_iter() - .flat_map(|ifn| write_immutable_trio(self.file_size, &self.dir, ifn)) + .flat_map(|ifn| write_immutable_trio(self.immutable_file_size, &self.dir, ifn)) .collect::>(), non_immutables_files, } From 6cb0be12b9e6ffad4e270668ec5273b0c95b70bf Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Mon, 2 Dec 2024 20:15:09 +0100 Subject: [PATCH 04/14] feat: implement artifact builder for `CardanoDatabase` signed entity type --- .../src/artifact_builder/cardano_database.rs | 207 ++++++++++++++++++ .../src/artifact_builder/mod.rs | 2 + 2 files changed, 209 insertions(+) create mode 100644 mithril-aggregator/src/artifact_builder/cardano_database.rs diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs new file mode 100644 index 00000000000..e95d4241690 --- /dev/null +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -0,0 +1,207 @@ +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, Context}; +use async_trait::async_trait; +use semver::Version; + +use mithril_common::{ + entities::{ + ArtifactsLocations, CardanoDatabase, CardanoDbBeacon, Certificate, CompressionAlgorithm, + ProtocolMessagePartKey, SignedEntityType, + }, + StdResult, +}; + +use crate::artifact_builder::ArtifactBuilder; + +pub struct CardanoDatabaseArtifactBuilder { + db_directory: PathBuf, // TODO: temporary, will be accessed through another dependency instead of direct path. + cardano_node_version: Version, + compression_algorithm: CompressionAlgorithm, +} + +impl CardanoDatabaseArtifactBuilder { + pub fn new( + db_directory: PathBuf, + cardano_node_version: &Version, + compression_algorithm: CompressionAlgorithm, + ) -> Self { + Self { + db_directory, + cardano_node_version: cardano_node_version.clone(), + compression_algorithm, + } + } +} + +#[async_trait] +impl ArtifactBuilder for CardanoDatabaseArtifactBuilder { + async fn compute_artifact( + &self, + beacon: CardanoDbBeacon, + certificate: &Certificate, + ) -> StdResult { + let merkle_root = certificate + .protocol_message + .get_message_part(&ProtocolMessagePartKey::CardanoDatabaseMerkleRoot) + .ok_or(anyhow!( + "Can not find CardanoDatabaseMerkleRoot protocol message part in certificate" + )) + .with_context(|| { + format!( + "Can not compute CardanoDatabase artifact for signed_entity: {:?}", + SignedEntityType::CardanoDatabase(beacon.clone()) + ) + })?; + let total_db_size_uncompressed = compute_uncompressed_database_size(&self.db_directory)?; + + let cardano_database = CardanoDatabase::new( + merkle_root.to_string(), + beacon, + total_db_size_uncompressed, + ArtifactsLocations::default(), // TODO: temporary default locations, will be injected in next PR. + self.compression_algorithm, + &self.cardano_node_version, + ); + + Ok(cardano_database) + } +} + +// Return the sum of the files size contained in the subdirectories 'immutable', 'ledger' and 'volatile'. +fn compute_uncompressed_database_size(db_directory: &Path) -> StdResult { + let subdirs = ["immutable", "ledger", "volatile"]; + + let mut total_db_size_uncompressed = 0; + for subdir in subdirs { + let dir_path = db_directory.join(subdir); + + total_db_size_uncompressed += get_directory_size(&dir_path) + .with_context(|| format!("Failed to read metadata for directory: {:?}", dir_path))?; + } + + Ok(total_db_size_uncompressed) +} + +fn get_directory_size(path: &Path) -> StdResult { + let entries = + std::fs::read_dir(path).with_context(|| format!("Failed to read directory: {:?}", path))?; + + let mut directory_size = 0; + for entry in entries { + let path = entry + .with_context(|| format!("Failed to read directory entry in {:?}", path))? + .path(); + + if path.is_file() { + let metadata = std::fs::metadata(&path) + .with_context(|| format!("Failed to read metadata for file: {:?}", path))?; + directory_size += metadata.len(); + } else if path.is_dir() { + directory_size += get_directory_size(&path)?; + } + } + + Ok(directory_size) +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use mithril_common::{ + digesters::DummyImmutablesDbBuilder, + entities::{ProtocolMessage, ProtocolMessagePartKey}, + test_utils::{fake_data, TempDir}, + }; + + use super::*; + + fn get_test_directory(dir_name: &str) -> PathBuf { + TempDir::create("cardano_database", dir_name) + } + + #[test] + fn should_compute_the_size_of_the_uncompressed_database_only_immutable_ledger_and_volatile() { + let test_dir = get_test_directory("should_compute_the_size_of_the_uncompressed_database_only_immutable_ledger_and_volatile"); + + DummyImmutablesDbBuilder::new(test_dir.as_os_str().to_str().unwrap()) + .with_immutables(&[1, 2]) + .set_immutable_file_size(1000) + .with_ledger_files(vec!["blocks-0.dat".to_string()]) + .set_ledger_file_size(5000) + .with_volatile_files(vec!["437".to_string(), "537".to_string()]) + .set_volatile_file_size(2000) + .build(); + // Number of immutable files = 2 × 3 ('chunk', 'primary' and 'secondary'). + let expected_total_size = 2 * 3 * 1000 + 5000 + 2000 * 2; + + std::fs::write(test_dir.join("non_computed_file.txt"), "file inside root").unwrap(); + let non_computed_dir = test_dir.join("non_computed_dir"); + std::fs::create_dir(&non_computed_dir).unwrap(); + std::fs::write( + non_computed_dir.join("another_non_computed_file.txt"), + "file inside a non computed directory", + ) + .unwrap(); + + let total_size = compute_uncompressed_database_size(&test_dir).unwrap(); + + assert_eq!(expected_total_size, total_size); + } + + #[tokio::test] + async fn should_compute_valid_artifact() { + let test_dir = get_test_directory("should_compute_valid_artifact"); + + DummyImmutablesDbBuilder::new(test_dir.as_os_str().to_str().unwrap()) + .with_immutables(&[1, 2]) + .set_immutable_file_size(1000) + .with_ledger_files(vec!["blocks-0.dat".to_string()]) + .set_ledger_file_size(5000) + .with_volatile_files(vec!["437".to_string(), "537".to_string()]) + .set_volatile_file_size(2000) + .build(); + // Number of immutable files = 2 × 3 ('chunk', 'primary' and 'secondary'). + let expected_total_size = 2 * 3 * 1000 + 5000 + 2000 * 2; + + let cardano_database_artifact_builder = CardanoDatabaseArtifactBuilder::new( + test_dir, + &Version::parse("1.0.0").unwrap(), + CompressionAlgorithm::Zstandard, + ); + + let beacon = fake_data::beacon(); + let certificate_with_merkle_root = { + let mut protocol_message = ProtocolMessage::new(); + protocol_message.set_message_part( + ProtocolMessagePartKey::CardanoDatabaseMerkleRoot, + "merkleroot".to_string(), + ); + Certificate { + protocol_message, + ..fake_data::certificate("certificate-123".to_string()) + } + }; + + let artifact = cardano_database_artifact_builder + .compute_artifact(beacon.clone(), &certificate_with_merkle_root) + .await + .unwrap(); + + let artifact_expected = CardanoDatabase::new( + "merkleroot".to_string(), + beacon, + expected_total_size, + ArtifactsLocations { + digests: vec![], + immutables: vec![], + ancillary: vec![], + }, + CompressionAlgorithm::Zstandard, + &Version::parse("1.0.0").unwrap(), + ); + + assert_eq!(artifact_expected, artifact); + } +} diff --git a/mithril-aggregator/src/artifact_builder/mod.rs b/mithril-aggregator/src/artifact_builder/mod.rs index ea7ce80a6b7..45c7facc815 100644 --- a/mithril-aggregator/src/artifact_builder/mod.rs +++ b/mithril-aggregator/src/artifact_builder/mod.rs @@ -1,10 +1,12 @@ //! The module used for building artifact +mod cardano_database; mod cardano_immutable_files_full; mod cardano_stake_distribution; mod cardano_transactions; mod interface; mod mithril_stake_distribution; +pub use cardano_database::*; pub use cardano_immutable_files_full::*; pub use cardano_stake_distribution::*; pub use cardano_transactions::*; From c16d697165ec2f8fe47683ddceea977fcf1646bb Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:53:39 +0100 Subject: [PATCH 05/14] feat: integrate `CardanoDatabase` artifact builder into the aggregator `SignedEntityService` --- .../src/dependency_injection/builder.rs | 11 ++- .../src/services/signed_entity.rs | 79 +++++++++++++++---- mithril-common/src/test_utils/fake_data.rs | 29 ++++++- 3 files changed, 100 insertions(+), 19 deletions(-) diff --git a/mithril-aggregator/src/dependency_injection/builder.rs b/mithril-aggregator/src/dependency_injection/builder.rs index d95ecc8f5c0..c09d6fb19ea 100644 --- a/mithril-aggregator/src/dependency_injection/builder.rs +++ b/mithril-aggregator/src/dependency_injection/builder.rs @@ -52,8 +52,9 @@ use mithril_persistence::{ use super::{DependenciesBuilderError, EpochServiceWrapper, Result}; use crate::{ artifact_builder::{ - CardanoImmutableFilesFullArtifactBuilder, CardanoStakeDistributionArtifactBuilder, - CardanoTransactionsArtifactBuilder, MithrilStakeDistributionArtifactBuilder, + CardanoDatabaseArtifactBuilder, CardanoImmutableFilesFullArtifactBuilder, + CardanoStakeDistributionArtifactBuilder, CardanoTransactionsArtifactBuilder, + MithrilStakeDistributionArtifactBuilder, }, configuration::ExecutionEnvironment, database::repository::{ @@ -1208,11 +1209,17 @@ impl DependenciesBuilder { let stake_store = self.get_stake_store().await?; let cardano_stake_distribution_artifact_builder = Arc::new(CardanoStakeDistributionArtifactBuilder::new(stake_store)); + let cardano_database_artifact_builder = Arc::new(CardanoDatabaseArtifactBuilder::new( + self.configuration.db_directory.clone(), + &cardano_node_version, + self.configuration.snapshot_compression_algorithm, + )); let dependencies = SignedEntityServiceArtifactsDependencies::new( mithril_stake_distribution_artifact_builder, cardano_immutable_files_full_artifact_builder, cardano_transactions_artifact_builder, cardano_stake_distribution_artifact_builder, + cardano_database_artifact_builder, ); let signed_entity_service = Arc::new(MithrilSignedEntityService::new( signed_entity_storer, diff --git a/mithril-aggregator/src/services/signed_entity.rs b/mithril-aggregator/src/services/signed_entity.rs index 57b41565f05..ae7d796aba5 100644 --- a/mithril-aggregator/src/services/signed_entity.rs +++ b/mithril-aggregator/src/services/signed_entity.rs @@ -11,9 +11,9 @@ use tokio::task::JoinHandle; use mithril_common::{ entities::{ - BlockNumber, CardanoDbBeacon, CardanoStakeDistribution, CardanoTransactionsSnapshot, - Certificate, Epoch, MithrilStakeDistribution, SignedEntity, SignedEntityType, - SignedEntityTypeDiscriminants, Snapshot, + BlockNumber, CardanoDatabase, CardanoDbBeacon, CardanoStakeDistribution, + CardanoTransactionsSnapshot, Certificate, Epoch, MithrilStakeDistribution, SignedEntity, + SignedEntityType, SignedEntityTypeDiscriminants, Snapshot, }, logging::LoggerExtensions, signable_builder::Artifact, @@ -89,6 +89,7 @@ pub struct MithrilSignedEntityService { signed_entity_type_lock: Arc, cardano_stake_distribution_artifact_builder: Arc>, + cardano_database_artifact_builder: Arc>, metrics_service: Arc, logger: Logger, } @@ -103,6 +104,7 @@ pub struct SignedEntityServiceArtifactsDependencies { Arc>, cardano_stake_distribution_artifact_builder: Arc>, + cardano_database_artifact_builder: Arc>, } impl SignedEntityServiceArtifactsDependencies { @@ -120,12 +122,16 @@ impl SignedEntityServiceArtifactsDependencies { cardano_stake_distribution_artifact_builder: Arc< dyn ArtifactBuilder, >, + cardano_database_artifact_builder: Arc< + dyn ArtifactBuilder, + >, ) -> Self { Self { mithril_stake_distribution_artifact_builder, cardano_immutable_files_full_artifact_builder, cardano_transactions_artifact_builder, cardano_stake_distribution_artifact_builder, + cardano_database_artifact_builder, } } } @@ -149,6 +155,7 @@ impl MithrilSignedEntityService { .cardano_transactions_artifact_builder, cardano_stake_distribution_artifact_builder: dependencies .cardano_stake_distribution_artifact_builder, + cardano_database_artifact_builder: dependencies.cardano_database_artifact_builder, signed_entity_type_lock, metrics_service, logger: logger.new_with_component_name::(), @@ -247,11 +254,16 @@ impl MithrilSignedEntityService { ) })?, )), - SignedEntityType::CardanoDatabase(_) => { - Err(anyhow::anyhow!( - "Signable builder service can not compute artifact for Cardano database because it is not yet implemented." - )) - } + SignedEntityType::CardanoDatabase(beacon) => Ok(Arc::new( + self.cardano_database_artifact_builder + .compute_artifact(beacon, certificate) + .await + .with_context(|| { + format!( + "Signed Entity Service can not compute artifact for entity type: '{signed_entity_type}'" + ) + })? + )), } } @@ -510,6 +522,8 @@ mod tests { MockArtifactBuilder, mock_cardano_stake_distribution_artifact_builder: MockArtifactBuilder, + mock_cardano_database_artifact_builder: + MockArtifactBuilder, } impl MockDependencyInjector { @@ -532,6 +546,10 @@ mod tests { Epoch, CardanoStakeDistribution, >::new(), + mock_cardano_database_artifact_builder: MockArtifactBuilder::< + CardanoDbBeacon, + CardanoDatabase, + >::new(), } } @@ -541,6 +559,7 @@ mod tests { Arc::new(self.mock_cardano_immutable_files_full_artifact_builder), Arc::new(self.mock_cardano_transactions_artifact_builder), Arc::new(self.mock_cardano_stake_distribution_artifact_builder), + Arc::new(self.mock_cardano_database_artifact_builder), ); MithrilSignedEntityService::new( Arc::new(self.mock_signed_entity_storer), @@ -597,6 +616,7 @@ mod tests { Arc::new(cardano_immutable_files_full_long_artifact_builder), Arc::new(self.mock_cardano_transactions_artifact_builder), Arc::new(self.mock_cardano_stake_distribution_artifact_builder), + Arc::new(self.mock_cardano_database_artifact_builder), ); MithrilSignedEntityService::new( Arc::new(self.mock_signed_entity_storer), @@ -822,17 +842,48 @@ mod tests { } #[tokio::test] - async fn build_cardano_database_artifact_when_given_cardano_database_entity_type_return_error() - { - let mock_container = MockDependencyInjector::new(); + async fn build_cardano_database_artifact_when_given_cardano_database_entity_type() { + let mut mock_container = MockDependencyInjector::new(); + + let cardano_database_expected = fake_data::cardano_database_entities(1) + .first() + .unwrap() + .to_owned(); + + mock_container + .mock_cardano_database_artifact_builder + .expect_compute_artifact() + .times(1) + .returning(|_, _| { + Ok(fake_data::cardano_database_entities(1) + .first() + .unwrap() + .to_owned()) + }); + let artifact_builder_service = mock_container.build_artifact_builder_service(); + let certificate = fake_data::certificate("hash".to_string()); let signed_entity_type = SignedEntityType::CardanoDatabase(CardanoDbBeacon::default()); - - artifact_builder_service + let artifact = artifact_builder_service .compute_artifact(signed_entity_type.clone(), &certificate) .await - .expect_err("Should return error because CardanoDatabase is not implemented yet."); + .unwrap(); + + assert_expected(&cardano_database_expected, &artifact); + } + + #[tokio::test] + async fn should_store_the_artifact_when_creating_artifact_for_a_cardano_database() { + generic_test_that_the_artifact_is_stored( + SignedEntityType::CardanoDatabase(CardanoDbBeacon::default()), + fake_data::cardano_database_entities(1) + .first() + .unwrap() + .to_owned(), + &|mock_injector| &mut mock_injector.mock_cardano_database_artifact_builder, + ) + .await; } async fn generic_test_that_the_artifact_is_stored< diff --git a/mithril-common/src/test_utils/fake_data.rs b/mithril-common/src/test_utils/fake_data.rs index 25fc0a312ee..32d5bd16ada 100644 --- a/mithril-common/src/test_utils/fake_data.rs +++ b/mithril-common/src/test_utils/fake_data.rs @@ -5,9 +5,9 @@ use semver::Version; use crate::crypto_helper::{self, ProtocolMultiSignature}; use crate::entities::{ - self, BlockNumber, CertificateMetadata, CertificateSignature, CompressionAlgorithm, Epoch, - LotteryIndex, ProtocolMessage, ProtocolMessagePartKey, SignedEntityType, SingleSignatures, - SlotNumber, StakeDistribution, StakeDistributionParty, + self, ArtifactsLocations, BlockNumber, CertificateMetadata, CertificateSignature, + CompressionAlgorithm, Epoch, LotteryIndex, ProtocolMessage, ProtocolMessagePartKey, + SignedEntityType, SingleSignatures, SlotNumber, StakeDistribution, StakeDistributionParty, }; use crate::test_utils::MithrilFixtureBuilder; @@ -277,3 +277,26 @@ pub fn cardano_stake_distribution(epoch: Epoch) -> entities::CardanoStakeDistrib stake_distribution, } } + +/// Fake Cardano Database entities +pub fn cardano_database_entities(total: u64) -> Vec { + (1..total + 1) + .map(|cardano_database_id| { + let merkle_root = format!("1{cardano_database_id}").repeat(20); + let mut beacon = beacon(); + beacon.immutable_file_number += cardano_database_id; + let total_db_size_uncompressed = cardano_database_id * 100000; + let cardano_node_version = Version::parse("1.0.0").unwrap(); + let locations = ArtifactsLocations::default(); + + entities::CardanoDatabase::new( + merkle_root, + beacon, + total_db_size_uncompressed, + locations, + CompressionAlgorithm::Gzip, + &cardano_node_version, + ) + }) + .collect::>() +} From 41f2c6b49d7d09bc0ecda6367e89c277f826e75a Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:19:24 +0100 Subject: [PATCH 06/14] refactor: simplify and enhance database size computation and improve tests clarity --- .../src/artifact_builder/cardano_database.rs | 80 ++++++++++--------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs index e95d4241690..f07c22b8a42 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -68,41 +68,43 @@ impl ArtifactBuilder for CardanoDatabaseArtifa } } -// Return the sum of the files size contained in the subdirectories 'immutable', 'ledger' and 'volatile'. +/// Return the sum of the files size contained in the subdirectories 'immutable', 'ledger' and 'volatile'. fn compute_uncompressed_database_size(db_directory: &Path) -> StdResult { let subdirs = ["immutable", "ledger", "volatile"]; - let mut total_db_size_uncompressed = 0; - for subdir in subdirs { - let dir_path = db_directory.join(subdir); + subdirs + .iter() + .map(|subdir| { + let dir_path = db_directory.join(subdir); + compute_fs_entry_size(&dir_path) + .with_context(|| format!("Failed to read metadata for directory: {:?}", dir_path)) + }) + .sum() +} - total_db_size_uncompressed += get_directory_size(&dir_path) - .with_context(|| format!("Failed to read metadata for directory: {:?}", dir_path))?; - } +fn compute_fs_entry_size(path: &Path) -> StdResult { + if path.is_file() { + let metadata = std::fs::metadata(path) + .with_context(|| format!("Failed to read metadata for file: {:?}", path))?; - Ok(total_db_size_uncompressed) -} + return Ok(metadata.len()); + } -fn get_directory_size(path: &Path) -> StdResult { - let entries = - std::fs::read_dir(path).with_context(|| format!("Failed to read directory: {:?}", path))?; - - let mut directory_size = 0; - for entry in entries { - let path = entry - .with_context(|| format!("Failed to read directory entry in {:?}", path))? - .path(); - - if path.is_file() { - let metadata = std::fs::metadata(&path) - .with_context(|| format!("Failed to read metadata for file: {:?}", path))?; - directory_size += metadata.len(); - } else if path.is_dir() { - directory_size += get_directory_size(&path)?; + if path.is_dir() { + let entries = std::fs::read_dir(path) + .with_context(|| format!("Failed to read directory: {:?}", path))?; + let mut directory_size = 0; + for entry in entries { + let path = entry + .with_context(|| format!("Failed to read directory entry in {:?}", path))? + .path(); + directory_size += compute_fs_entry_size(&path)?; } + + return Ok(directory_size); } - Ok(directory_size) + Ok(0) } #[cfg(test)] @@ -125,16 +127,20 @@ mod tests { fn should_compute_the_size_of_the_uncompressed_database_only_immutable_ledger_and_volatile() { let test_dir = get_test_directory("should_compute_the_size_of_the_uncompressed_database_only_immutable_ledger_and_volatile"); + let immutable_file_size = 777; + let ledger_file_size = 6666; + let volatile_file_size = 99; DummyImmutablesDbBuilder::new(test_dir.as_os_str().to_str().unwrap()) .with_immutables(&[1, 2]) - .set_immutable_file_size(1000) + .set_immutable_file_size(immutable_file_size) .with_ledger_files(vec!["blocks-0.dat".to_string()]) - .set_ledger_file_size(5000) + .set_ledger_file_size(ledger_file_size) .with_volatile_files(vec!["437".to_string(), "537".to_string()]) - .set_volatile_file_size(2000) + .set_volatile_file_size(volatile_file_size) .build(); // Number of immutable files = 2 × 3 ('chunk', 'primary' and 'secondary'). - let expected_total_size = 2 * 3 * 1000 + 5000 + 2000 * 2; + let expected_total_size = + (2 * 3 * immutable_file_size) + ledger_file_size + (2 * volatile_file_size); std::fs::write(test_dir.join("non_computed_file.txt"), "file inside root").unwrap(); let non_computed_dir = test_dir.join("non_computed_dir"); @@ -155,15 +161,15 @@ mod tests { let test_dir = get_test_directory("should_compute_valid_artifact"); DummyImmutablesDbBuilder::new(test_dir.as_os_str().to_str().unwrap()) - .with_immutables(&[1, 2]) - .set_immutable_file_size(1000) + .with_immutables(&[1]) + .set_immutable_file_size(100) .with_ledger_files(vec!["blocks-0.dat".to_string()]) - .set_ledger_file_size(5000) - .with_volatile_files(vec!["437".to_string(), "537".to_string()]) - .set_volatile_file_size(2000) + .set_ledger_file_size(100) + .with_volatile_files(vec!["437".to_string()]) + .set_volatile_file_size(100) .build(); - // Number of immutable files = 2 × 3 ('chunk', 'primary' and 'secondary'). - let expected_total_size = 2 * 3 * 1000 + 5000 + 2000 * 2; + // Number of immutable files = 1 × 3 ('chunk', 'primary' and 'secondary'). + let expected_total_size = (3 * 100) + 100 + 100; let cardano_database_artifact_builder = CardanoDatabaseArtifactBuilder::new( test_dir, From 6b27c8b10bea98ecca1276a72f40179c9d299024 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:46:05 +0100 Subject: [PATCH 07/14] refactor: use a more appropriate type to declare an artifact location --- mithril-common/src/entities/cardano_database.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/mithril-common/src/entities/cardano_database.rs b/mithril-common/src/entities/cardano_database.rs index 6b462a6a58c..26147bfd046 100644 --- a/mithril-common/src/entities/cardano_database.rs +++ b/mithril-common/src/entities/cardano_database.rs @@ -53,26 +53,19 @@ impl CardanoDatabase { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] -pub enum ArtifactLocationType { - Aggregator, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct ArtifactLocationEntry { - #[serde(rename = "type")] - pub location_type: ArtifactLocationType, - pub uri: String, +pub enum ArtifactLocation { + Aggregator(String), } /// Locations of the Cardano database related files. #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct ArtifactsLocations { /// Locations of the file containing the digests of the immutable files. - pub digests: Vec, + pub digests: Vec, /// Locations of the immutable files. - pub immutables: Vec, + pub immutables: Vec, /// Locations of the ancillary files (ledger and volatile). - pub ancillary: Vec, + pub ancillary: Vec, } #[typetag::serde] From 1cd900e166d38ee2103edace1815f05d793ab963 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:00:36 +0100 Subject: [PATCH 08/14] refactor: rename `CardanoDatabase` to `CardanoDatabaseSnapshot` and documentation enhancements --- .../src/artifact_builder/cardano_database.rs | 14 ++++++------ .../src/services/signed_entity.rs | 20 +++++++++-------- .../src/entities/cardano_database.rs | 22 +++++++++---------- mithril-common/src/entities/mod.rs | 2 +- mithril-common/src/test_utils/fake_data.rs | 8 +++---- 5 files changed, 34 insertions(+), 32 deletions(-) diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs index f07c22b8a42..a68f292cfb3 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -6,8 +6,8 @@ use semver::Version; use mithril_common::{ entities::{ - ArtifactsLocations, CardanoDatabase, CardanoDbBeacon, Certificate, CompressionAlgorithm, - ProtocolMessagePartKey, SignedEntityType, + ArtifactsLocations, CardanoDatabaseSnapshot, CardanoDbBeacon, Certificate, + CompressionAlgorithm, ProtocolMessagePartKey, SignedEntityType, }, StdResult, }; @@ -35,12 +35,12 @@ impl CardanoDatabaseArtifactBuilder { } #[async_trait] -impl ArtifactBuilder for CardanoDatabaseArtifactBuilder { +impl ArtifactBuilder for CardanoDatabaseArtifactBuilder { async fn compute_artifact( &self, beacon: CardanoDbBeacon, certificate: &Certificate, - ) -> StdResult { + ) -> StdResult { let merkle_root = certificate .protocol_message .get_message_part(&ProtocolMessagePartKey::CardanoDatabaseMerkleRoot) @@ -55,7 +55,7 @@ impl ArtifactBuilder for CardanoDatabaseArtifa })?; let total_db_size_uncompressed = compute_uncompressed_database_size(&self.db_directory)?; - let cardano_database = CardanoDatabase::new( + let cardano_database = CardanoDatabaseSnapshot::new( merkle_root.to_string(), beacon, total_db_size_uncompressed, @@ -195,12 +195,12 @@ mod tests { .await .unwrap(); - let artifact_expected = CardanoDatabase::new( + let artifact_expected = CardanoDatabaseSnapshot::new( "merkleroot".to_string(), beacon, expected_total_size, ArtifactsLocations { - digests: vec![], + digest: vec![], immutables: vec![], ancillary: vec![], }, diff --git a/mithril-aggregator/src/services/signed_entity.rs b/mithril-aggregator/src/services/signed_entity.rs index ae7d796aba5..83dacd8d385 100644 --- a/mithril-aggregator/src/services/signed_entity.rs +++ b/mithril-aggregator/src/services/signed_entity.rs @@ -11,7 +11,7 @@ use tokio::task::JoinHandle; use mithril_common::{ entities::{ - BlockNumber, CardanoDatabase, CardanoDbBeacon, CardanoStakeDistribution, + BlockNumber, CardanoDatabaseSnapshot, CardanoDbBeacon, CardanoStakeDistribution, CardanoTransactionsSnapshot, Certificate, Epoch, MithrilStakeDistribution, SignedEntity, SignedEntityType, SignedEntityTypeDiscriminants, Snapshot, }, @@ -89,7 +89,8 @@ pub struct MithrilSignedEntityService { signed_entity_type_lock: Arc, cardano_stake_distribution_artifact_builder: Arc>, - cardano_database_artifact_builder: Arc>, + cardano_database_artifact_builder: + Arc>, metrics_service: Arc, logger: Logger, } @@ -104,7 +105,8 @@ pub struct SignedEntityServiceArtifactsDependencies { Arc>, cardano_stake_distribution_artifact_builder: Arc>, - cardano_database_artifact_builder: Arc>, + cardano_database_artifact_builder: + Arc>, } impl SignedEntityServiceArtifactsDependencies { @@ -123,7 +125,7 @@ impl SignedEntityServiceArtifactsDependencies { dyn ArtifactBuilder, >, cardano_database_artifact_builder: Arc< - dyn ArtifactBuilder, + dyn ArtifactBuilder, >, ) -> Self { Self { @@ -523,7 +525,7 @@ mod tests { mock_cardano_stake_distribution_artifact_builder: MockArtifactBuilder, mock_cardano_database_artifact_builder: - MockArtifactBuilder, + MockArtifactBuilder, } impl MockDependencyInjector { @@ -548,7 +550,7 @@ mod tests { >::new(), mock_cardano_database_artifact_builder: MockArtifactBuilder::< CardanoDbBeacon, - CardanoDatabase, + CardanoDatabaseSnapshot, >::new(), } } @@ -845,7 +847,7 @@ mod tests { async fn build_cardano_database_artifact_when_given_cardano_database_entity_type() { let mut mock_container = MockDependencyInjector::new(); - let cardano_database_expected = fake_data::cardano_database_entities(1) + let cardano_database_expected = fake_data::cardano_database_snapshots(1) .first() .unwrap() .to_owned(); @@ -855,7 +857,7 @@ mod tests { .expect_compute_artifact() .times(1) .returning(|_, _| { - Ok(fake_data::cardano_database_entities(1) + Ok(fake_data::cardano_database_snapshots(1) .first() .unwrap() .to_owned()) @@ -877,7 +879,7 @@ mod tests { async fn should_store_the_artifact_when_creating_artifact_for_a_cardano_database() { generic_test_that_the_artifact_is_stored( SignedEntityType::CardanoDatabase(CardanoDbBeacon::default()), - fake_data::cardano_database_entities(1) + fake_data::cardano_database_snapshots(1) .first() .unwrap() .to_owned(), diff --git a/mithril-common/src/entities/cardano_database.rs b/mithril-common/src/entities/cardano_database.rs index 26147bfd046..bcfd72bf208 100644 --- a/mithril-common/src/entities/cardano_database.rs +++ b/mithril-common/src/entities/cardano_database.rs @@ -6,30 +6,30 @@ use crate::{ signable_builder::Artifact, }; -/// Cardano database incremental. +/// Cardano database snapshot. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct CardanoDatabase { - /// Merkle root of the Cardano database. +pub struct CardanoDatabaseSnapshot { + /// Merkle root of the Cardano database snapshot. pub merkle_root: String, /// Mithril beacon on the Cardano chain. pub beacon: CardanoDbBeacon, - /// Size of the uncompressed Cardano database (including the ledger and volatile) in Bytes. + /// Size of the uncompressed Cardano database files. pub total_db_size_uncompressed: u64, /// Locations of the Cardano database artifacts. pub locations: ArtifactsLocations, - /// Compression algorithm of the Cardano database archives + /// Compression algorithm of the Cardano database artifacts. pub compression_algorithm: CompressionAlgorithm, - /// Version of the Cardano node used to create the archives. + /// Version of the Cardano node used to create the snapshot. pub cardano_node_version: String, } -impl CardanoDatabase { - /// [CardanoDatabase] factory +impl CardanoDatabaseSnapshot { + /// [CardanoDatabaseSnapshot] factory pub fn new( merkle_root: String, beacon: CardanoDbBeacon, @@ -60,8 +60,8 @@ pub enum ArtifactLocation { /// Locations of the Cardano database related files. #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct ArtifactsLocations { - /// Locations of the file containing the digests of the immutable files. - pub digests: Vec, + /// Locations of the the immutable file digests. + pub digest: Vec, /// Locations of the immutable files. pub immutables: Vec, /// Locations of the ancillary files (ledger and volatile). @@ -69,7 +69,7 @@ pub struct ArtifactsLocations { } #[typetag::serde] -impl Artifact for CardanoDatabase { +impl Artifact for CardanoDatabaseSnapshot { fn get_id(&self) -> String { self.merkle_root.clone() } diff --git a/mithril-common/src/entities/mod.rs b/mithril-common/src/entities/mod.rs index 42574b96acd..ee48fb0e0dd 100644 --- a/mithril-common/src/entities/mod.rs +++ b/mithril-common/src/entities/mod.rs @@ -33,7 +33,7 @@ mod type_alias; pub use block_number::BlockNumber; pub use block_range::{BlockRange, BlockRangeLength, BlockRangesSequence}; pub use cardano_chain_point::{BlockHash, ChainPoint}; -pub use cardano_database::{ArtifactsLocations, CardanoDatabase}; +pub use cardano_database::{ArtifactsLocations, CardanoDatabaseSnapshot}; pub use cardano_db_beacon::CardanoDbBeacon; pub use cardano_network::CardanoNetwork; pub use cardano_stake_distribution::CardanoStakeDistribution; diff --git a/mithril-common/src/test_utils/fake_data.rs b/mithril-common/src/test_utils/fake_data.rs index 32d5bd16ada..a976b4e1bac 100644 --- a/mithril-common/src/test_utils/fake_data.rs +++ b/mithril-common/src/test_utils/fake_data.rs @@ -278,8 +278,8 @@ pub fn cardano_stake_distribution(epoch: Epoch) -> entities::CardanoStakeDistrib } } -/// Fake Cardano Database entities -pub fn cardano_database_entities(total: u64) -> Vec { +/// Fake Cardano Database snapshots +pub fn cardano_database_snapshots(total: u64) -> Vec { (1..total + 1) .map(|cardano_database_id| { let merkle_root = format!("1{cardano_database_id}").repeat(20); @@ -289,7 +289,7 @@ pub fn cardano_database_entities(total: u64) -> Vec { let cardano_node_version = Version::parse("1.0.0").unwrap(); let locations = ArtifactsLocations::default(); - entities::CardanoDatabase::new( + entities::CardanoDatabaseSnapshot::new( merkle_root, beacon, total_db_size_uncompressed, @@ -298,5 +298,5 @@ pub fn cardano_database_entities(total: u64) -> Vec { &cardano_node_version, ) }) - .collect::>() + .collect::>() } From 92d63e33dcb0456db925ae644cca1c7e6065d53c Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:11:16 +0100 Subject: [PATCH 09/14] refactor: update `ArtifactLocation` enum to use an internally tagged representation --- mithril-common/src/entities/cardano_database.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mithril-common/src/entities/cardano_database.rs b/mithril-common/src/entities/cardano_database.rs index bcfd72bf208..34d36d76cd0 100644 --- a/mithril-common/src/entities/cardano_database.rs +++ b/mithril-common/src/entities/cardano_database.rs @@ -52,9 +52,10 @@ impl CardanoDatabaseSnapshot { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "snake_case", tag = "type")] pub enum ArtifactLocation { - Aggregator(String), + Aggregator { uri: String }, + CloudStorage { uri: String }, } /// Locations of the Cardano database related files. From f9d537915b683ed4d52aa35fba77cfca8a496ab2 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 18:00:47 +0100 Subject: [PATCH 10/14] refactor: use specific types for each `ArtifactsLocations` field --- .../src/artifact_builder/cardano_database.rs | 6 +---- .../src/entities/cardano_database.rs | 22 ++++++++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs index a68f292cfb3..68bc96924df 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -199,11 +199,7 @@ mod tests { "merkleroot".to_string(), beacon, expected_total_size, - ArtifactsLocations { - digest: vec![], - immutables: vec![], - ancillary: vec![], - }, + ArtifactsLocations::default(), CompressionAlgorithm::Zstandard, &Version::parse("1.0.0").unwrap(), ); diff --git a/mithril-common/src/entities/cardano_database.rs b/mithril-common/src/entities/cardano_database.rs index 34d36d76cd0..9c6b5f0284e 100644 --- a/mithril-common/src/entities/cardano_database.rs +++ b/mithril-common/src/entities/cardano_database.rs @@ -53,20 +53,32 @@ impl CardanoDatabaseSnapshot { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case", tag = "type")] -pub enum ArtifactLocation { +enum DigestLocation { Aggregator { uri: String }, CloudStorage { uri: String }, } +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type")] +enum ImmutablesLocation { + CloudStorage { uri: String }, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type")] +enum AncillaryLocation { + CloudStorage { uri: String }, +} + /// Locations of the Cardano database related files. #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct ArtifactsLocations { /// Locations of the the immutable file digests. - pub digest: Vec, + digest: Vec, /// Locations of the immutable files. - pub immutables: Vec, - /// Locations of the ancillary files (ledger and volatile). - pub ancillary: Vec, + immutables: Vec, + /// Locations of the ancillary files. + ancillary: Vec, } #[typetag::serde] From c0587d07c7ed8f71f6303aaec454e9ac6968cc66 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 18:12:20 +0100 Subject: [PATCH 11/14] refactor: streamline `compute_uncompressed_database_size` --- .../src/artifact_builder/cardano_database.rs | 27 ++----------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs index 68bc96924df..5fd03acea57 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -68,21 +68,7 @@ impl ArtifactBuilder for CardanoDataba } } -/// Return the sum of the files size contained in the subdirectories 'immutable', 'ledger' and 'volatile'. -fn compute_uncompressed_database_size(db_directory: &Path) -> StdResult { - let subdirs = ["immutable", "ledger", "volatile"]; - - subdirs - .iter() - .map(|subdir| { - let dir_path = db_directory.join(subdir); - compute_fs_entry_size(&dir_path) - .with_context(|| format!("Failed to read metadata for directory: {:?}", dir_path)) - }) - .sum() -} - -fn compute_fs_entry_size(path: &Path) -> StdResult { +fn compute_uncompressed_database_size(path: &Path) -> StdResult { if path.is_file() { let metadata = std::fs::metadata(path) .with_context(|| format!("Failed to read metadata for file: {:?}", path))?; @@ -98,7 +84,7 @@ fn compute_fs_entry_size(path: &Path) -> StdResult { let path = entry .with_context(|| format!("Failed to read directory entry in {:?}", path))? .path(); - directory_size += compute_fs_entry_size(&path)?; + directory_size += compute_uncompressed_database_size(&path)?; } return Ok(directory_size); @@ -142,15 +128,6 @@ mod tests { let expected_total_size = (2 * 3 * immutable_file_size) + ledger_file_size + (2 * volatile_file_size); - std::fs::write(test_dir.join("non_computed_file.txt"), "file inside root").unwrap(); - let non_computed_dir = test_dir.join("non_computed_dir"); - std::fs::create_dir(&non_computed_dir).unwrap(); - std::fs::write( - non_computed_dir.join("another_non_computed_file.txt"), - "file inside a non computed directory", - ) - .unwrap(); - let total_size = compute_uncompressed_database_size(&test_dir).unwrap(); assert_eq!(expected_total_size, total_size); From 3d0a0c1ecae40583e8f125370e8ac607d5040c36 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 18:42:11 +0100 Subject: [PATCH 12/14] refactor: update `DummyImmutablesDbBuilder` to simplify its usage --- .../src/artifact_builder/cardano_database.rs | 27 ++++++++++--------- .../digesters/cardano_immutable_digester.rs | 2 +- .../digesters/dummy_immutable_db_builder.rs | 17 +++++++----- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/mithril-aggregator/src/artifact_builder/cardano_database.rs b/mithril-aggregator/src/artifact_builder/cardano_database.rs index 5fd03acea57..1e6f54b24a9 100644 --- a/mithril-aggregator/src/artifact_builder/cardano_database.rs +++ b/mithril-aggregator/src/artifact_builder/cardano_database.rs @@ -113,20 +113,19 @@ mod tests { fn should_compute_the_size_of_the_uncompressed_database_only_immutable_ledger_and_volatile() { let test_dir = get_test_directory("should_compute_the_size_of_the_uncompressed_database_only_immutable_ledger_and_volatile"); - let immutable_file_size = 777; + let immutable_trio_file_size = 777; let ledger_file_size = 6666; let volatile_file_size = 99; DummyImmutablesDbBuilder::new(test_dir.as_os_str().to_str().unwrap()) .with_immutables(&[1, 2]) - .set_immutable_file_size(immutable_file_size) - .with_ledger_files(vec!["blocks-0.dat".to_string()]) + .set_immutable_trio_file_size(immutable_trio_file_size) + .with_ledger_files(&["blocks-0.dat", "blocks-1.dat", "blocks-2.dat"]) .set_ledger_file_size(ledger_file_size) - .with_volatile_files(vec!["437".to_string(), "537".to_string()]) + .with_volatile_files(&["437", "537", "637", "737"]) .set_volatile_file_size(volatile_file_size) .build(); - // Number of immutable files = 2 × 3 ('chunk', 'primary' and 'secondary'). let expected_total_size = - (2 * 3 * immutable_file_size) + ledger_file_size + (2 * volatile_file_size); + (2 * immutable_trio_file_size) + (3 * ledger_file_size) + (4 * volatile_file_size); let total_size = compute_uncompressed_database_size(&test_dir).unwrap(); @@ -137,16 +136,18 @@ mod tests { async fn should_compute_valid_artifact() { let test_dir = get_test_directory("should_compute_valid_artifact"); + let immutable_trio_file_size = 777; + let ledger_file_size = 6666; + let volatile_file_size = 99; DummyImmutablesDbBuilder::new(test_dir.as_os_str().to_str().unwrap()) .with_immutables(&[1]) - .set_immutable_file_size(100) - .with_ledger_files(vec!["blocks-0.dat".to_string()]) - .set_ledger_file_size(100) - .with_volatile_files(vec!["437".to_string()]) - .set_volatile_file_size(100) + .set_immutable_trio_file_size(immutable_trio_file_size) + .with_ledger_files(&["blocks-0.dat"]) + .set_ledger_file_size(ledger_file_size) + .with_volatile_files(&["437"]) + .set_volatile_file_size(volatile_file_size) .build(); - // Number of immutable files = 1 × 3 ('chunk', 'primary' and 'secondary'). - let expected_total_size = (3 * 100) + 100 + 100; + let expected_total_size = immutable_trio_file_size + ledger_file_size + volatile_file_size; let cardano_database_artifact_builder = CardanoDatabaseArtifactBuilder::new( test_dir, diff --git a/mithril-common/src/digesters/cardano_immutable_digester.rs b/mithril-common/src/digesters/cardano_immutable_digester.rs index 4b6d88c9a53..d27c26d3568 100644 --- a/mithril-common/src/digesters/cardano_immutable_digester.rs +++ b/mithril-common/src/digesters/cardano_immutable_digester.rs @@ -617,7 +617,7 @@ mod tests { let immutable_db = db_builder("hash_computation_is_quicker_with_a_full_cache") .with_immutables(&(1..=50).collect::>()) .append_immutable_trio() - .set_immutable_file_size(65536) + .set_immutable_trio_file_size(65538) .build(); let cache = MemoryImmutableFileDigestCacheProvider::default(); let logger = TestLogger::stdout(); diff --git a/mithril-common/src/digesters/dummy_immutable_db_builder.rs b/mithril-common/src/digesters/dummy_immutable_db_builder.rs index 2afd6e2f4fc..df5fbe02638 100644 --- a/mithril-common/src/digesters/dummy_immutable_db_builder.rs +++ b/mithril-common/src/digesters/dummy_immutable_db_builder.rs @@ -77,8 +77,8 @@ impl DummyImmutablesDbBuilder { } /// Set ledger files to write to the db in the 'ledger' subdirectory. - pub fn with_ledger_files(&mut self, files: Vec) -> &mut Self { - self.ledger_files_to_write = files; + pub fn with_ledger_files(&mut self, files: &[&str]) -> &mut Self { + self.ledger_files_to_write = files.iter().map(|name| name.to_string()).collect(); self } @@ -89,8 +89,8 @@ impl DummyImmutablesDbBuilder { } /// Set volatile files to write to the db in the 'volatile' subdirectory. - pub fn with_volatile_files(&mut self, files: Vec) -> &mut Self { - self.volatile_files_to_write = files; + pub fn with_volatile_files(&mut self, files: &[&str]) -> &mut Self { + self.volatile_files_to_write = files.iter().map(|f| f.to_string()).collect(); self } @@ -111,8 +111,13 @@ impl DummyImmutablesDbBuilder { /// Set the size of all immutable files written by [build][Self::build] to the given `file_size` in bytes. /// /// Note: by default the size of the produced files is less than a 1kb. - pub fn set_immutable_file_size(&mut self, file_size: u64) -> &mut Self { - self.immutable_file_size = Some(file_size); + pub fn set_immutable_trio_file_size(&mut self, trio_file_size: u64) -> &mut Self { + assert!( + trio_file_size % 3 == 0, + "'trio_file_size' must be a multiple of 3" + ); + + self.immutable_file_size = Some(trio_file_size / 3); self } From 0beef3132d5e75fcab770927999fceecaa5a79d9 Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:22:11 +0100 Subject: [PATCH 13/14] refactor: secure and simplify `/ledger` and `/volatile` directories handling in `DummyImmutablesDbBuilder` --- .../digesters/dummy_immutable_db_builder.rs | 57 +++++++++++-------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/mithril-common/src/digesters/dummy_immutable_db_builder.rs b/mithril-common/src/digesters/dummy_immutable_db_builder.rs index df5fbe02638..9e5ca6d750f 100644 --- a/mithril-common/src/digesters/dummy_immutable_db_builder.rs +++ b/mithril-common/src/digesters/dummy_immutable_db_builder.rs @@ -6,6 +6,10 @@ use std::{ path::{Path, PathBuf}, }; +const IMMUTABLE_DIR: &str = "immutable"; +const LEDGER_DIR: &str = "ledger"; +const VOLATILE_DIR: &str = "volatile"; + /// A [DummyImmutableDb] builder. pub struct DummyImmutablesDbBuilder { dir: PathBuf, @@ -51,7 +55,7 @@ impl DummyImmutablesDbBuilder { /// system temp directory, if it exists already it will be cleaned. pub fn new(dir_name: &str) -> Self { Self { - dir: Self::get_test_dir(dir_name), + dir: get_test_dir(dir_name), immutables_to_write: vec![], non_immutables_to_write: vec![], append_uncompleted_trio: false, @@ -130,7 +134,7 @@ impl DummyImmutablesDbBuilder { if self.append_uncompleted_trio { write_immutable_trio( self.immutable_file_size, - &self.dir, + &self.dir.join(IMMUTABLE_DIR), match immutable_numbers.last() { None => 0, Some(last) => last + 1, @@ -141,40 +145,38 @@ impl DummyImmutablesDbBuilder { for non_immutable in &self.non_immutables_to_write { non_immutables_files.push(write_dummy_file( self.immutable_file_size, - &self.dir, + &self.dir.join(IMMUTABLE_DIR), non_immutable, )); } - if !self.ledger_files_to_write.is_empty() { - let ledger_dir = self.dir.parent().unwrap().join("ledger"); - std::fs::create_dir_all(&ledger_dir).unwrap(); - for filename in &self.ledger_files_to_write { - write_dummy_file(self.ledger_file_size, &ledger_dir, filename); - } - }; - - if !self.volatile_files_to_write.is_empty() { - let volatile_dir = self.dir.parent().unwrap().join("volatile"); - std::fs::create_dir_all(&volatile_dir).unwrap(); - for filename in &self.volatile_files_to_write { - write_dummy_file(self.volatile_file_size, &volatile_dir, filename); - } + for filename in &self.ledger_files_to_write { + write_dummy_file(self.ledger_file_size, &self.dir.join(LEDGER_DIR), filename); + } + + for filename in &self.volatile_files_to_write { + write_dummy_file( + self.volatile_file_size, + &self.dir.join(VOLATILE_DIR), + filename, + ); } DummyImmutableDb { - dir: self.dir.clone(), + dir: self.dir.join(IMMUTABLE_DIR), immutables_files: immutable_numbers .into_iter() - .flat_map(|ifn| write_immutable_trio(self.immutable_file_size, &self.dir, ifn)) + .flat_map(|ifn| { + write_immutable_trio( + self.immutable_file_size, + &self.dir.join(IMMUTABLE_DIR), + ifn, + ) + }) .collect::>(), non_immutables_files, } } - - fn get_test_dir(subdir_name: &str) -> PathBuf { - TempDir::create(subdir_name, "immutable") - } } fn write_immutable_trio( @@ -213,3 +215,12 @@ fn write_dummy_file(optional_size: Option, dir: &Path, filename: &str) -> P file } + +fn get_test_dir(subdir_name: &str) -> PathBuf { + let db_dir = TempDir::create("test_cardano_db", subdir_name); + for subdir_name in [LEDGER_DIR, IMMUTABLE_DIR, VOLATILE_DIR] { + std::fs::create_dir(db_dir.join(subdir_name)).unwrap(); + } + + db_dir +} From d27a9a63e645fd4b78d0c1caf8075192dd26e38d Mon Sep 17 00:00:00 2001 From: Damien Lachaume <135982616+dlachaume@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:51:44 +0100 Subject: [PATCH 14/14] chore: upgrade crate versions * mithril-aggregator from `0.5.119` to `0.5.120` * mithril-common from `0.4.92` to `0.4.93` --- Cargo.lock | 4 ++-- mithril-aggregator/Cargo.toml | 2 +- mithril-common/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d45fae07a7..2a53a0de93e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3588,7 +3588,7 @@ dependencies = [ [[package]] name = "mithril-aggregator" -version = "0.5.119" +version = "0.5.120" dependencies = [ "anyhow", "async-trait", @@ -3745,7 +3745,7 @@ dependencies = [ [[package]] name = "mithril-common" -version = "0.4.92" +version = "0.4.93" dependencies = [ "anyhow", "async-trait", diff --git a/mithril-aggregator/Cargo.toml b/mithril-aggregator/Cargo.toml index 30bc1fdb7a7..7758bc8539d 100644 --- a/mithril-aggregator/Cargo.toml +++ b/mithril-aggregator/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mithril-aggregator" -version = "0.5.119" +version = "0.5.120" description = "A Mithril Aggregator server" authors = { workspace = true } edition = { workspace = true } diff --git a/mithril-common/Cargo.toml b/mithril-common/Cargo.toml index c74863fa44d..7ca31a7b93f 100644 --- a/mithril-common/Cargo.toml +++ b/mithril-common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mithril-common" -version = "0.4.92" +version = "0.4.93" description = "Common types, interfaces, and utilities for Mithril nodes." authors = { workspace = true } edition = { workspace = true }