diff --git a/Cargo.lock b/Cargo.lock index 743e8f8c7b5..4605b64f597 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2122,6 +2122,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "hermit-abi" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "725cf19794cf90aa94e65050cb4191ff5d8fa87a498383774c47b332e3af952e" +dependencies = [ + "libc", +] + [[package]] name = "hex-literal" version = "0.2.1" @@ -3134,10 +3143,11 @@ checksum = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" [[package]] name = "num_cpus" -version = "1.10.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcef43580c035376c0705c42792c294b66974abbfd2789b511784023f71f3273" +checksum = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6" dependencies = [ + "hermit-abi", "libc", ] @@ -3176,6 +3186,7 @@ checksum = "93f5bb2e8e8dec81642920ccff6b61f1eb94fa3020c5a325c9851ff604152409" name = "open-ethereum" version = "2.8.0" dependencies = [ + "account-state", "ansi_term", "atty", "blooms-db", @@ -3183,6 +3194,7 @@ dependencies = [ "cli-signer", "client-traits", "common-types", + "crossbeam-utils 0.7.2", "ctrlc", "dir", "docopt", @@ -3191,6 +3203,7 @@ dependencies = [ "ethcore", "ethcore-accounts", "ethcore-blockchain", + "ethcore-bloom-journal", "ethcore-call-contract", "ethcore-db", "ethcore-io", @@ -3232,6 +3245,7 @@ dependencies = [ "parity-util-mem", "parity-version", "parking_lot 0.10.0", + "patricia-trie-ethereum", "pretty_assertions", "regex", "registrar", @@ -3245,10 +3259,12 @@ dependencies = [ "serde_json", "snapshot", "spec", + "state-db", "tempdir", "term_size", "textwrap 0.9.0", "toml", + "trie-db", "verification", "winapi 0.3.8", ] diff --git a/Cargo.toml b/Cargo.toml index 37388ad3e5b..4ee6c19413d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ license = "GPL-3.0" authors = ["Open Ethereum developers", "Parity Technologies "] [dependencies] +account-state = {path = "ethcore/account-state" } ansi_term = "0.11" atty = "0.2.8" blooms-db = { path = "util/blooms-db" } @@ -14,6 +15,7 @@ clap = "2" cli-signer= { path = "cli-signer" } client-traits = { path = "ethcore/client-traits" } common-types = { path = "ethcore/types" } +crossbeam-utils = "0.7.2" ctrlc = { git = "https://github.com/paritytech/rust-ctrlc.git" } dir = { path = "util/dir" } docopt = "1.0" @@ -21,6 +23,7 @@ engine = { path = "ethcore/engine" } ethabi = { version = "9.0.1", optional = true } ethcore = { path = "ethcore", features = ["parity"] } ethcore-accounts = { path = "accounts", optional = true } +ethcore-bloom-journal = { path = "util/bloom" } ethcore-blockchain = { path = "ethcore/blockchain" } ethcore-call-contract = { path = "ethcore/call-contract", optional = true } ethcore-db = { path = "ethcore/db" } @@ -45,7 +48,7 @@ kvdb-rocksdb = "0.6.0" log = "0.4" migration-rocksdb = { path = "util/migration-rocksdb" } node-filter = { path = "ethcore/node-filter" } -num_cpus = "1.2" +num_cpus = "1.12" number_prefix = "0.2" panic_hook = { path = "util/panic-hook" } parity-bytes = "0.1" @@ -61,6 +64,7 @@ parity-updater = { path = "updater" } parity-util-mem = { version = "0.5.1", features = ["jemalloc-global"] } parity-version = { path = "util/version" } parking_lot = "0.10.0" +patricia-trie-ethereum = { path = "util/patricia-trie-ethereum" } regex = "1.0" registrar = { path = "util/registrar" } rlp = "0.4.0" @@ -72,9 +76,11 @@ serde_derive = "1.0" serde_json = "1.0" snapshot = { path = "ethcore/snapshot" } spec = { path = "ethcore/spec" } +state-db = { path = "ethcore/state-db" } term_size = "0.3" textwrap = "0.9" toml = "0.5.6" +trie-db = "0.20.0" verification = { path = "ethcore/verification" } [build-dependencies] diff --git a/accounts/ethstore/cli/Cargo.toml b/accounts/ethstore/cli/Cargo.toml index 4bc70ce16a2..1ff7d36fe45 100644 --- a/accounts/ethstore/cli/Cargo.toml +++ b/accounts/ethstore/cli/Cargo.toml @@ -7,7 +7,7 @@ authors = ["Parity Technologies "] [dependencies] docopt = "1.0" env_logger = "0.5" -num_cpus = "1.6" +num_cpus = "1.12" rustc-hex = "2.1.0" serde = "1.0" serde_derive = "1.0" diff --git a/ethcore/state-db/src/lib.rs b/ethcore/state-db/src/lib.rs index 60548da352c..0fb973025b6 100644 --- a/ethcore/state-db/src/lib.rs +++ b/ethcore/state-db/src/lib.rs @@ -24,7 +24,7 @@ use ethereum_types::{Address, H256}; use hash_db::HashDB; use keccak_hash::keccak; use kvdb::{DBTransaction, DBValue, KeyValueDB}; -use log::trace; +use log::{debug, trace, warn}; use lru_cache::LruCache; use parking_lot::Mutex; @@ -36,24 +36,25 @@ use journaldb::JournalDB; use keccak_hasher::KeccakHasher; use memory_cache::MemoryLruCache; -/// Value used to initialize bloom bitmap size. -/// -/// Bitmap size is the size in bytes (not bits) that will be allocated in memory. -pub const ACCOUNT_BLOOM_SPACE: usize = 1048576; - -/// Value used to initialize bloom items count. +/// Value used to initialize the bloom items count for new DBs /// /// Items count is an estimation of the maximum number of items to store. -pub const DEFAULT_ACCOUNT_PRESET: usize = 1000000; - -/// Key for a value storing amount of hashes -pub const ACCOUNT_BLOOM_HASHCOUNT_KEY: &'static [u8] = b"account_hash_count"; +// todo[dvdplm] Determine the best value here. Should probably be twice as big. +pub const ACCOUNTS_BLOOM_ITEM_COUNT: u64 = 100_000_000; +/// False positive rate for the accounts bloom filter: 1 in 100. +pub const ACCOUNTS_BLOOM_FP_RATE: f64 = 0.01; +/// Key storing the number of hash functions used in the accounts bloom. +pub const ACCOUNTS_BLOOM_HASHCOUNT_KEY: &'static [u8] = b"account_hash_count"; +/// Key storing number of items the accounts bloom was built to contain. +pub const ACCOUNTS_BLOOM_ITEM_COUNT_KEY: &'static [u8] = b"accounts_bloom_item_count"; const STATE_CACHE_BLOCKS: usize = 12; // The percentage of supplied cache size to go to accounts. const ACCOUNT_CACHE_RATIO: usize = 90; +const DB_ERROR: &'static str = "Low-level database error"; + /// Shared canonical state cache. struct AccountCache { /// DB Account cache. `None` indicates that account is known to be missing. @@ -159,43 +160,92 @@ impl StateDB { } } - /// Loads accounts bloom from the database - /// This bloom is used to handle request for the non-existent account fast - pub fn load_bloom(db: &dyn KeyValueDB) -> Bloom { - let hash_count_entry = db.get(COL_ACCOUNT_BLOOM, ACCOUNT_BLOOM_HASHCOUNT_KEY) - .expect("Low-level database error"); - - let hash_count_bytes = match hash_count_entry { - Some(bytes) => bytes, - None => return Bloom::new(ACCOUNT_BLOOM_SPACE, DEFAULT_ACCOUNT_PRESET), - }; - - assert_eq!(hash_count_bytes.len(), 1); - let hash_count = hash_count_bytes[0]; - - let mut bloom_parts = vec![0u64; ACCOUNT_BLOOM_SPACE / 8]; - for i in 0..ACCOUNT_BLOOM_SPACE / 8 { - let key: [u8; 8] = (i as u64).to_le_bytes(); - bloom_parts[i] = db.get(COL_ACCOUNT_BLOOM, &key).expect("low-level database error") - .map(|val| { - assert_eq!(val.len(), 8, "low-level database error"); + fn fetch_bloom_parts(db: &dyn KeyValueDB, bitmap_size: u64) -> Vec { + let nr_parts = bitmap_size / 8; + let mut bloom_parts = vec![0u64; nr_parts as usize]; + trace!(target: "accounts_bloom]", "Fething bloom from disk. bitmap_size={}, nr_parts={}", bitmap_size, nr_parts); + + let start = std::time::Instant::now(); + for (k, v) in db.iter(COL_ACCOUNT_BLOOM) { + // The only keys in the `COL_ACCOUNT_BLOOM` that are not `u64`s are + // the two keys where we store the number of hash functions for + // legacy blooms (ACCOUNTS_BLOOM_HASHCOUNT_KEY) and the number of + // estimated items for the bloom (ACCOUNTS_BLOOM_ITEM_COUNT_KEY). + if k.len() == 8 { + let part_idx = { let mut buff = [0u8; 8]; - buff.copy_from_slice(&*val); + buff.copy_from_slice(&*k); u64::from_le_bytes(buff) - }) - .unwrap_or(0u64); + }; + if part_idx >= nr_parts { + warn!(target: "accounts_bloom", "Accounts bloom DB has a key out of bounds: {}/{:?}. Expected {} bloom parts.", part_idx, k, nr_parts); + } else { + bloom_parts[part_idx as usize] = { + let mut buff = [0u8; 8]; + buff.copy_from_slice(&*v); + u64::from_le_bytes(buff) + }; + } + } else { + assert!( + &*k == ACCOUNTS_BLOOM_HASHCOUNT_KEY || &*k == ACCOUNTS_BLOOM_ITEM_COUNT_KEY, + "Expect the DB to contain `u64`s or the above two keys – corrupt db?" + ) + } + } + debug!(target: "accounts_bloom", "Fetched the bloom from the DB in {:?}. bloom_parts.len={}", start.elapsed(), bloom_parts.len()); + bloom_parts + } + + /// Loads accounts bloom from the database + /// This bloom is used to quickly handle requests for non-existent accounts. + pub fn load_bloom(db: &dyn KeyValueDB) -> Bloom { + let (bloom, item_count) = + if db.get(COL_ACCOUNT_BLOOM, ACCOUNTS_BLOOM_HASHCOUNT_KEY) + .expect(DB_ERROR) + .is_some() { + // The legacy values for bitmap size and hash function count + // (ACCOUNT_BLOOM_SPACE, DEFAULT_ACCOUNT_PRESET) are not + // optimal, so we can't calculate them. + let parts = Self::fetch_bloom_parts(db, 1048576); + (Bloom::from_parts_legacy(parts, 6), 1_000_000) + } else { + let item_count = + db.get(COL_ACCOUNT_BLOOM, ACCOUNTS_BLOOM_ITEM_COUNT_KEY) + .expect(DB_ERROR) + .and_then(|bytes| { + assert_eq!(bytes.len(), 8, "Expected a u64"); + let mut buf = [0u8; 8]; + buf.copy_from_slice(&*bytes); + let val = u64::from_le_bytes(buf); + trace!(target: "accounts_bloom", "DB has a value under 'accounts_bloom_item_count': {}", val); + Some(val) + }) + // Assume this is a new DB + .unwrap_or_else(|| { + trace!(target: "accounts_bloom", "New database, building default bloom with space for {} accounts", ACCOUNTS_BLOOM_ITEM_COUNT); + let mut tx = DBTransaction::new(); + tx.put(COL_ACCOUNT_BLOOM, ACCOUNTS_BLOOM_ITEM_COUNT_KEY, &ACCOUNTS_BLOOM_ITEM_COUNT.to_le_bytes()); + db.write(tx).expect(DB_ERROR); + ACCOUNTS_BLOOM_ITEM_COUNT + }); + let bitmap_size = Bloom::compute_bitmap_size(item_count, ACCOUNTS_BLOOM_FP_RATE); + let parts = Self::fetch_bloom_parts(db, bitmap_size); + (Bloom::from_parts(parts, item_count), item_count) + }; + + debug!(target: "accounts_bloom", "Bloom saturation: {:?}, hash functions: {:?}, bitmap size: {} bits", + bloom.saturation(), bloom.number_of_hash_functions(), bloom.number_of_bits()); + if bloom.saturation() > 0.9 { + warn!("Your accounts bloom is almost full ({}). Please rebuild it with more space. Your current filter uses {} bits and was built for {} accounts.", + bloom.saturation(), bloom.number_of_bits(), item_count); } - let bloom = Bloom::from_parts(&bloom_parts, hash_count as u32); - trace!(target: "account_bloom", "Bloom is {:?} full, hash functions count = {:?}", bloom.saturation(), hash_count); bloom } /// Commit blooms journal to the database transaction pub fn commit_bloom(batch: &mut DBTransaction, journal: BloomJournal) -> io::Result<()> { - assert!(journal.hash_functions <= 255); - batch.put(COL_ACCOUNT_BLOOM, ACCOUNT_BLOOM_HASHCOUNT_KEY, &[journal.hash_functions as u8]); - for (bloom_part_index, bloom_part_value) in journal.entries { let key: [u8; 8] = (bloom_part_index as u64).to_le_bytes(); let val: [u8; 8] = bloom_part_value.to_le_bytes(); @@ -463,13 +513,13 @@ impl account_state::Backend for StateDB { } fn note_non_null_account(&self, address: &Address) { - trace!(target: "account_bloom", "Note account bloom: {:?}", address); + trace!(target: "accounts_bloom", "Note account bloom: {:?}", address); let mut bloom = self.account_bloom.lock(); bloom.set(keccak(address).as_bytes()); } fn is_known_null(&self, address: &Address) -> bool { - trace!(target: "account_bloom", "Check account bloom: {:?}", address); + trace!(target: "accounts_bloom", "Check account bloom: {:?}", address); let bloom = self.account_bloom.lock(); let is_null = !bloom.check(keccak(address).as_bytes()); is_null diff --git a/parity/blockchain.rs b/parity/blockchain.rs index 78730a89f29..dd91c1c61b2 100644 --- a/parity/blockchain.rs +++ b/parity/blockchain.rs @@ -55,7 +55,9 @@ pub enum BlockchainCmd { Import(ImportBlockchain), Export(ExportBlockchain), ExportState(ExportState), - Reset(ResetBlockchain) + Reset(ResetBlockchain), + RebuildAccountsBloom(RebuildAccountsBloom), + RestoreAccountsBloom(RestoreAccountsBloom), } #[derive(Debug, PartialEq)] @@ -79,6 +81,27 @@ pub struct KillBlockchain { pub pruning: Pruning, } +#[derive(Debug, PartialEq)] +/// Rebuild the accounts existence-test bloom filter. +pub struct RebuildAccountsBloom { + pub spec: SpecType, + pub dirs: Directories, + pub pruning: Pruning, + pub compaction: DatabaseCompactionProfile, + pub backup_path: Option, + pub account_count: u64, +} + +#[derive(Debug, PartialEq)] +/// Restore the accounts existence-test bloom filter from a backup on disk +pub struct RestoreAccountsBloom { + pub spec: SpecType, + pub dirs: Directories, + pub pruning: Pruning, + pub compaction: DatabaseCompactionProfile, + pub backup_path: String, +} + #[derive(Debug, PartialEq)] pub struct ImportBlockchain { pub spec: SpecType, @@ -152,6 +175,8 @@ pub fn execute(cmd: BlockchainCmd) -> Result<(), String> { BlockchainCmd::Export(export_cmd) => execute_export(export_cmd), BlockchainCmd::ExportState(export_cmd) => execute_export_state(export_cmd), BlockchainCmd::Reset(reset_cmd) => execute_reset(reset_cmd), + BlockchainCmd::RebuildAccountsBloom(cmd) => rebuild_accounts_bloom(cmd), + BlockchainCmd::RestoreAccountsBloom(cmd) => restore_accounts_bloom(cmd), } } @@ -308,8 +333,6 @@ fn execute_import_light(cmd: ImportBlockchain) -> Result<(), String> { } fn execute_import(cmd: ImportBlockchain) -> Result<(), String> { - let timer = Instant::now(); - // load spec file let spec = cmd.spec.spec(&cmd.dirs.cache)?; @@ -405,19 +428,20 @@ fn execute_import(cmd: ImportBlockchain) -> Result<(), String> { )); service.register_io_handler(informant).map_err(|_| "Unable to register informant handler".to_owned())?; - + let timer = Instant::now(); client.import_blocks(instream, cmd.format)?; - + let elapsed = timer.elapsed(); + let ms = timer.elapsed().as_millis(); // save user defaults user_defaults.pruning = algorithm; user_defaults.tracing = tracing; user_defaults.fat_db = fat_db; user_defaults.save(&user_defaults_path)?; + std::thread::sleep(Duration::from_secs(1)); let report = client.report(); - let elapsed = timer.elapsed(); - let ms = timer.elapsed().as_millis(); - info!("Import completed in {} seconds, {} blocks, {} blk/s, {} transactions, {} tx/s, {} Mgas, {} Mgas/s", + + info!("Import completed in {} seconds, {} blocks, {} blk/s, {} transactions, {} tx/s, {} Mgas, {} Mgas/s, {} state DB memory", elapsed.as_secs(), report.blocks_imported, (report.blocks_imported as u128 * 1000) / ms, @@ -425,6 +449,16 @@ fn execute_import(cmd: ImportBlockchain) -> Result<(), String> { (report.transactions_applied as u128 * 1000) / ms, report.gas_processed / 1_000_000, report.gas_processed / (ms * 1000), + report.state_db_mem, + ); + info!("Import i/o stats. {} reads, {} bytes read, {} cached reads, {} bytes cached reads, {} writes, {} bytes written, {} db transactions", + report.io_stats.reads, + report.io_stats.bytes_read, + report.io_stats.cache_reads, + report.io_stats.cache_read_bytes, + report.io_stats.writes, + report.io_stats.bytes_written, + report.io_stats.transactions, ); Ok(()) } @@ -674,6 +708,53 @@ pub fn kill_db(cmd: KillBlockchain) -> Result<(), String> { Ok(()) } +pub fn rebuild_accounts_bloom(cmd: RebuildAccountsBloom) -> Result<(), String> { + use super::rebuild_accounts_bloom::rebuild_accounts_bloom; + info!("Rebuilding accounts bloom"); + let spec = cmd.spec.spec(&cmd.dirs.cache)?; + let genesis_hash = spec.genesis_header().hash(); + let db_dirs = cmd.dirs.database(genesis_hash, None, spec.data_dir.clone()); + let user_defaults_path = db_dirs.user_defaults_path(); + let user_defaults = UserDefaults::load(&user_defaults_path)?; + let algorithm = cmd.pruning.to_algorithm(&user_defaults); + let db_path = db_dirs.client_path(algorithm); + let compaction = super::db::compaction_profile( + &cmd.compaction, + &db_path, + ); + + rebuild_accounts_bloom( + &db_path, + compaction, + cmd.backup_path, + cmd.account_count, + ).map_err(|e| e.to_string() )?; + Ok(()) +} + +pub fn restore_accounts_bloom(cmd: RestoreAccountsBloom) -> Result<(), String> { + use super::rebuild_accounts_bloom::restore_accounts_bloom; + info!("Restoring accounts bloom from backup at {}", cmd.backup_path); + let spec = cmd.spec.spec(&cmd.dirs.cache)?; + let genesis_hash = spec.genesis_header().hash(); + let db_dirs = cmd.dirs.database(genesis_hash, None, spec.data_dir.clone()); + let user_defaults_path = db_dirs.user_defaults_path(); + let user_defaults = UserDefaults::load(&user_defaults_path)?; + let algorithm = cmd.pruning.to_algorithm(&user_defaults); + let db_path = db_dirs.client_path(algorithm); + let compaction = super::db::compaction_profile( + &cmd.compaction, + &db_path, + ); + + restore_accounts_bloom( + &db_path, + compaction, + cmd.backup_path, + ).map_err(|e| e.to_string() )?; + Ok(()) +} + #[cfg(test)] mod test { use super::DataFormat; diff --git a/parity/cli/mod.rs b/parity/cli/mod.rs index 6f08a074722..566389faf16 100644 --- a/parity/cli/mod.rs +++ b/parity/cli/mod.rs @@ -195,7 +195,7 @@ usage! { "Restore the database of the given --chain (default: mainnet) from a snapshot file", ARG arg_restore_file: (Option) = None, - "[FILE]", + "", "Path to the file to restore from", } @@ -229,6 +229,23 @@ usage! { "Number of blocks to revert", } + CMD cmd_db_rebuild_accounts_bloom { + "Rebuild the accounts bloom filter. Iterate over all accounts in the state db and add its address to the bloom filter. Can take a very long time for big databases. The old bloom is backed up before starting the rebuilding process. To restore from a backup, use `db restore-accounts-bloom`, but be aware that you can only restore a bloom from backup if the target db is at the same block as the backup file.", + // todo[dvdplm]: how to make this optional? I doesn't seem possible atm. + ARG arg_db_rebuild_accounts_bloom_backup_path: (Option) = None, + "--backup-path=", "Path to accounts bloom backup", + + ARG arg_db_rebuild_accounts_bloom_account_count: (u64) = 100_000_000u64, + "--accounts=", "The number of accounts the bloom should handle", + } + + CMD cmd_db_restore_accounts_bloom { + "Restore the accounts bloom filter from a backup file on disk. This is a destructive operation. Make sure that the chain you are restoring to is at the same block as the backup or verification errors are nigh inevitable.", + ARG arg_db_restore_accounts_bloom_backup_path: (String) = "", + "--backup-path=", + "Path to accounts bloom backup file", + } + } CMD cmd_export_hardcoded_sync @@ -1692,6 +1709,8 @@ mod tests { cmd_db: false, cmd_db_kill: false, cmd_db_reset: false, + cmd_db_rebuild_accounts_bloom: false, + cmd_db_restore_accounts_bloom: false, cmd_export_hardcoded_sync: false, // Arguments @@ -1704,6 +1723,9 @@ mod tests { arg_export_state_format: None, arg_snapshot_file: None, arg_restore_file: None, + arg_db_rebuild_accounts_bloom_backup_path: None, + arg_db_rebuild_accounts_bloom_account_count: 100000000u64, + arg_db_restore_accounts_bloom_backup_path: "".to_string(), arg_tools_hash_file: None, arg_enable_signing_queue: false, diff --git a/parity/cli/usage.rs b/parity/cli/usage.rs index 26b5fa728d8..a7320b46ce4 100644 --- a/parity/cli/usage.rs +++ b/parity/cli/usage.rs @@ -679,21 +679,21 @@ macro_rules! usage { // Subcommand arguments $( raw_args.$subc_arg = return_if_parse_error!(if_option!( + $($subc_arg_type_tt)+, + THEN { + if_option_vec!( $($subc_arg_type_tt)+, - THEN { - if_option_vec!( - $($subc_arg_type_tt)+, - THEN { values_t!(submatches, stringify!($subc_arg), inner_option_vec_type!($($subc_arg_type_tt)+)) } - ELSE { value_t!(submatches, stringify!($subc_arg), inner_option_type!($($subc_arg_type_tt)+)) } - ) - } - ELSE { - if_vec!( - $($subc_arg_type_tt)+, - THEN { values_t!(submatches, stringify!($subc_arg), inner_vec_type!($($subc_arg_type_tt)+)) } - ELSE { value_t!(submatches, stringify!($subc_arg), $($subc_arg_type_tt)+) } - ) - } + THEN { values_t!(submatches, stringify!($subc_arg), inner_option_vec_type!($($subc_arg_type_tt)+)) } + ELSE { value_t!(submatches, stringify!($subc_arg), inner_option_type!($($subc_arg_type_tt)+)) } + ) + } + ELSE { + if_vec!( + $($subc_arg_type_tt)+, + THEN { values_t!(submatches, stringify!($subc_arg), inner_vec_type!($($subc_arg_type_tt)+)) } + ELSE { value_t!(submatches, stringify!($subc_arg), $($subc_arg_type_tt)+) } + ) + } )); )* // Sub-subcommands diff --git a/parity/configuration.rs b/parity/configuration.rs index eabe46cef94..91436e1df2e 100644 --- a/parity/configuration.rs +++ b/parity/configuration.rs @@ -47,7 +47,7 @@ use secretstore::{NodeSecretKey, Configuration as SecretStoreConfiguration, Cont use updater::{UpdatePolicy, UpdateFilter, ReleaseTrack}; use run::RunCmd; use types::data_format::DataFormat; -use blockchain::{BlockchainCmd, ImportBlockchain, ExportBlockchain, KillBlockchain, ExportState, ResetBlockchain}; +use blockchain::{BlockchainCmd, ImportBlockchain, ExportBlockchain, KillBlockchain, ExportState, ResetBlockchain, RebuildAccountsBloom, RestoreAccountsBloom}; use export_hardcoded_sync::ExportHsyncCmd; use presale::ImportWallet; use account::{AccountCmd, NewAccount, ListAccounts, ImportAccounts, ImportFromGethAccounts}; @@ -194,16 +194,33 @@ impl Configuration { })) } else if self.args.cmd_db && self.args.cmd_db_kill { Cmd::Blockchain(BlockchainCmd::Kill(KillBlockchain { - spec: spec, - dirs: dirs, - pruning: pruning, + spec, + dirs, + pruning, + })) + } else if self.args.cmd_db && self.args.cmd_db_rebuild_accounts_bloom { + Cmd::Blockchain(BlockchainCmd::RebuildAccountsBloom(RebuildAccountsBloom { + spec, + dirs, + pruning, + compaction, + backup_path: self.args.arg_db_rebuild_accounts_bloom_backup_path, + account_count: self.args.arg_db_rebuild_accounts_bloom_account_count, + })) + } else if self.args.cmd_db && self.args.cmd_db_restore_accounts_bloom { + Cmd::Blockchain(BlockchainCmd::RestoreAccountsBloom(RestoreAccountsBloom { + spec, + dirs, + pruning, + compaction, + backup_path: self.args.arg_db_restore_accounts_bloom_backup_path, })) } else if self.args.cmd_account { let account_cmd = if self.args.cmd_account_new { let new_acc = NewAccount { iterations: key_iterations, path: dirs.keys, - spec: spec, + spec, password_file: self.accounts_config()?.password_files.first().map(|x| x.to_owned()), }; AccountCmd::New(new_acc) diff --git a/parity/db/mod.rs b/parity/db/mod.rs index 9edc444a96f..378ab8b35a2 100644 --- a/parity/db/mod.rs +++ b/parity/db/mod.rs @@ -19,4 +19,4 @@ #[path="rocksdb/mod.rs"] mod impls; -pub use self::impls::{open_db_light, restoration_db_handler, migrate}; +pub use self::impls::{open_db_light, restoration_db_handler, migrate, compaction_profile}; diff --git a/parity/db/rocksdb/mod.rs b/parity/db/rocksdb/mod.rs index f1818b735bc..be277c0bb13 100644 --- a/parity/db/rocksdb/mod.rs +++ b/parity/db/rocksdb/mod.rs @@ -38,6 +38,7 @@ mod migration; mod helpers; pub use self::migration::migrate; +pub use self::helpers::compaction_profile; struct AppDB { key_value: Arc, diff --git a/parity/lib.rs b/parity/lib.rs index 82599dc5c34..a16e7ba7df8 100644 --- a/parity/lib.rs +++ b/parity/lib.rs @@ -104,14 +104,16 @@ mod blockchain; mod cache; mod cli; mod configuration; -mod export_hardcoded_sync; +mod db; mod deprecated; +mod export_hardcoded_sync; mod helpers; mod informant; mod light_helpers; mod modules; mod params; mod presale; +mod rebuild_accounts_bloom; mod rpc; mod rpc_apis; mod run; @@ -120,7 +122,6 @@ mod signer; mod snapshot_cmd; mod upgrade; mod user_defaults; -mod db; use std::fs::File; use std::io::BufReader; diff --git a/parity/rebuild_accounts_bloom.rs b/parity/rebuild_accounts_bloom.rs new file mode 100644 index 00000000000..24a73d66630 --- /dev/null +++ b/parity/rebuild_accounts_bloom.rs @@ -0,0 +1,330 @@ +// Copyright 2015-2020 Parity Technologies (UK) Ltd. +// This file is part of Open Ethereum. + +// Open Ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Open Ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Open Ethereum. If not, see . + +//! Resize the accounts bloom filter for modern times. ! The accounts bloom +//! filter provides a way to check if a given account (`Address`) exists or not +//! without touching the database. The filter cannot be resized with less than a +//! complete rebuild, i.e. iterate over all accounts in the state database and +//! mark each account in the bloom bitmap. At the time of writing the number of +//! ethereum accounts is ~85M and increasing. This module implements backing up, +//! clearing, rebuilding and restoring the accounts bloom filter. + +extern crate account_state; +extern crate ethcore_bloom_journal as accounts_bloom; +extern crate kvdb_rocksdb; +extern crate patricia_trie_ethereum as ethtrie; +extern crate state_db; +extern crate trie_db; +extern crate crossbeam_utils; + +use std::{ + path::Path, + sync::{Arc, atomic::{AtomicU64, Ordering}}, +}; + +use ethcore_db::{COL_EXTRA, COL_HEADERS, COL_STATE, COL_ACCOUNT_BLOOM}; +use ethereum_types::{H256, U256}; +use journaldb; +use kvdb::DBTransaction; +use parking_lot::Mutex; +use self::{ + account_state::account::Account as StateAccount, + accounts_bloom::Bloom, // todo[dvdplm] rename this crate + crossbeam_utils::thread, + ethtrie::TrieDB, + kvdb_rocksdb::{CompactionProfile, Database, DatabaseConfig}, + state_db::{StateDB, ACCOUNTS_BLOOM_ITEM_COUNT_KEY}, + trie_db::Trie, +}; +use types::{ + BlockNumber, + errors::EthcoreError as Error, + views::{HeaderView, ViewRlp}, +}; +use rlp::{RlpStream, Rlp}; + +pub fn rebuild_accounts_bloom>( + db_path: P, + compaction: CompactionProfile, + backup_path: Option, + account_count: u64, +) -> Result<(), Error> { + let db_config = DatabaseConfig { + compaction, + columns: ethcore_db::NUM_COLUMNS, + ..Default::default() + }; + let db_path_str = db_path.as_ref().to_string_lossy(); + let db = Arc::new(Database::open(&db_config, &db_path_str)?); + + let (state_root, best_block) = load_state_root(db.clone())?; + + // todo[dvdplm] I can't make the `--backup-path` optional with the `usage!` + // macro so having `Option` here is pretty useless – it must be + // specified. For the time being we'll always make a backup. + if let Some(backup_path) = backup_path { + let backup_path = dir::helpers::replace_home("", &backup_path); + let backup_path = Path::new(&backup_path); + backup_bloom(&backup_path, db.clone(), best_block)?; + } + + rebuild_bloom(db, account_count, state_root, best_block)?; + Ok(()) +} + +pub fn restore_accounts_bloom>( + db_path: P, + compaction: CompactionProfile, + backup_path: String, +) -> Result<(), Error> { + let db_config = DatabaseConfig { + compaction, + columns: ethcore_db::NUM_COLUMNS, + ..Default::default() + }; + let db_path_str = db_path.as_ref().to_string_lossy(); + let db = Arc::new(Database::open(&db_config, &db_path_str)?); + + let backup_path = dir::helpers::replace_home("", &backup_path); + restore_bloom(&backup_path, db.clone())?; + Ok(()) +} + +fn load_state_root(db: Arc) -> Result<(H256, BlockNumber), Error> { + let best_block_hash = match db.get(COL_EXTRA, b"best")? { + None => { + warn!(target: "migration", "No best block hash, skipping"); + return Err(Error::Msg("No best block hash in the DB.".to_owned())); + }, + Some(hash) => hash, + }; + let best_block_header = match db.get(COL_HEADERS, &best_block_hash)? { + // no best block, nothing to do + None => { + warn!(target: "migration", "No best block header, skipping"); + return Err(Error::Msg("No best block header in the DB.".to_owned())); + }, + Some(x) => x, + }; + let view = ViewRlp::new(&best_block_header, "", 1); + let header = HeaderView::new(view); + let best_block_nr = header.number(); + let state_root = header.state_root(); + Ok((state_root, best_block_nr)) +} + +fn backup_bloom>( + bloom_backup_path: &P, + source: Arc, + best_block: BlockNumber, +) -> Result<(), Error> { + let num_keys = source.num_keys(COL_ACCOUNT_BLOOM)? / 2; + if num_keys == 0 { + warn!("No bloom in the DB to back up"); + return Ok(()) + } + + let mut bloom_backup = std::fs::File::create(bloom_backup_path) + .map_err(|_| format!("Cannot write to file at path: {}", bloom_backup_path.as_ref().display()))?; + + info!("Saving old bloom as of block #{} to '{}'", best_block, bloom_backup_path.as_ref().display()); + let mut stream = RlpStream::new(); + stream.begin_unbounded_list(); + for (n, (k, v)) in source.iter(COL_ACCOUNT_BLOOM).enumerate() { + stream + .begin_list(2) + .append(&k.to_vec()) + .append(&v.to_vec()); + if n > 0 && n % 50_000 == 0 { + info!(" Bloom entries processed: {}", n); + } + } + stream.finalize_unbounded_list(); + + use std::io::Write; + let written = bloom_backup.write(&stream.out())?; + info!("Saved old bloom as of block #{} to '{}' ({} bytes, {} keys)", best_block, bloom_backup_path.as_ref().display(), written, num_keys); + Ok(()) +} + +fn restore_bloom>( + bloom_backup_path: &P, + db: Arc +) -> Result<(), Error> { + let mut bloom_backup = std::fs::File::open(bloom_backup_path)?; + info!("Restoring bloom from '{}'", bloom_backup_path.as_ref().display()); + let mut buf = Vec::with_capacity(10_000_000); + use std::io::Read; + // todo[dvdplm]: this is a little terrible – what's the better way? + let bytes_read = bloom_backup.read_to_end(&mut buf)?; + let rlp = Rlp::new(&buf); + let item_count = rlp.item_count()?; + info!("{} bloom key/values and {} bytes read from disk", item_count, bytes_read); + + let mut batch = DBTransaction::with_capacity(item_count); + for (n, kv_rlp) in rlp.iter().enumerate() { + let kv: Vec> = kv_rlp.as_list()?; + assert_eq!(kv.len(), 2); + batch.put(COL_ACCOUNT_BLOOM, &kv[0], &kv[1]); + if n > 0 && n % 10_000 == 0 { + info!(" Bloom entries prepared for restoration: {}", n); + } + } + clear_bloom(db.clone())?; + db.write(batch)?; + db.flush()?; + info!("Bloom restored (wrote {} entries, {} bytes)", item_count, bytes_read); + Ok(()) +} + +fn clear_bloom(db: Arc) -> Result<(), Error> { + let num_keys = db.num_keys(COL_ACCOUNT_BLOOM)? / 2; + info!("Clearing out old accounts bloom ({} keys)", num_keys); + let mut batch = DBTransaction::with_capacity(num_keys as usize); + for (n, (k,_)) in db.iter(COL_ACCOUNT_BLOOM).enumerate() { + batch.delete(COL_ACCOUNT_BLOOM, &k); + if n > 0 && n % 50_000 == 0 { + info!(" Bloom entries queued for deletion: {}", n); + } + } + let deletions = batch.ops.len(); + db.write(batch)?; + db.flush().map_err(|e| Error::StdIo(e))?; + info!("Deleted {} old bloom items from the DB", deletions); + Ok(()) +} + +/// Rebuild the account bloom. +fn rebuild_bloom( + source: Arc, + account_count: u64, + state_root: H256, + best_block: BlockNumber, +) -> Result<(), Error> { + let num_keys = source.num_keys(COL_STATE)? / 2; + info!(target: "migration", "Accounts bloom rebuild started for chain at #{}. There are {} accounts in the DB (estimate).", best_block, num_keys); + if account_count <= num_keys { + warn!("Rebuilding the bloom with space for {} accounts when the DB contains {} keys is not a good idea: the bloom filter will be saturated right away.", + account_count, num_keys + ); + } + clear_bloom(source.clone())?; + + + // Progress counters + let empty_accounts = Arc::new(AtomicU64::new(0)); + let non_empty_accounts = Arc::new(AtomicU64::new(0)); + let total_accounts = Arc::new(AtomicU64::new(0)); + + let state_db = journaldb::new( + source.clone(), + // It does not matter which `journaldb::Algorithm` is used since + // there will be no writes to the state column. + journaldb::Algorithm::OverlayRecent, + COL_STATE); + + let db = state_db.as_hash_db(); + let start = std::time::Instant::now(); + + // 1 thread: 49627s –> ~14h + // 4 threads: 10825s –> ~3h + // 6 threads: 9399s –> ~2.6h + // 12 threads: 9401s -> ~2.6h (slightly bigger chain though) + // 16 threads: 8805s –> ~2.45h + let threads = num_cpus::get(); + // Chunk up the state in this many parts; each thread will be assigned one part at a time. + const STATE_SUBPARTS: usize = 16; + let bloom_result = thread::scope(|scope| -> Result>, Error> { + let bloom = Bloom::new_for_fp_rate(account_count, 0.01); + let bloom = Arc::new(Mutex::new(bloom)); + for thr_idx in 0..threads { + let tb = scope.builder().name(format!("accounts worker #{}", thr_idx).to_string()); + let my_bloom = bloom.clone(); + let my_total_accounts = total_accounts.clone(); + let my_non_empty_accounts = non_empty_accounts.clone(); + let my_empty_accounts = empty_accounts.clone(); + tb.spawn(move |_| -> Result<(), Error> { + let mut part_start = std::time::Instant::now(); + // Don't insert empty accounts into the bloom + let empty_account_rlp = StateAccount::new_basic(U256::zero(), U256::zero()).rlp(); + for part in (thr_idx..STATE_SUBPARTS).step_by(threads) { + info!("Processing part {} of the accounts in thread {}", part, thr_idx); + let account_trie = TrieDB::new(&db, &state_root)?; + let mut account_iter = account_trie.iter()?; + + // Seek to the start of this data segment + let mut seek_from = vec![0; 32]; + seek_from[0] = (part * STATE_SUBPARTS) as u8; + account_iter.seek(&seek_from)?; + // Set the upper-bound for this section of the data (but let the last part finish the whole range). + let seek_to = + if part < STATE_SUBPARTS - 1 { + Some(((part + 1) * STATE_SUBPARTS) as u8) + } else { + None + }; + let mut batch_start = std::time::Instant::now(); + for (n, (account_key, account_data)) in account_iter.filter_map(Result::ok).enumerate() { + if seek_to.map_or(false, |seek_to| account_key[0] >= seek_to) { + my_total_accounts.fetch_add(n as u64, Ordering::Relaxed); + let sat = my_bloom.lock().saturation(); + info!(" {} accounts processed in {:?} – end of part {} by thread {}. Bloom saturation: {}", n, part_start.elapsed(), part, thr_idx, sat); + part_start = std::time::Instant::now(); + + break; + } + if n > 0 && n % 50_000 == 0 { + info!(" Accounts processed: {} in {:?} by thread {}", n, batch_start.elapsed(), thr_idx); + batch_start = std::time::Instant::now(); + } + if account_data != empty_account_rlp { + my_bloom.lock().set(&account_key); + my_non_empty_accounts.fetch_add(1, Ordering::Relaxed); + } else { + my_empty_accounts.fetch_add(1, Ordering::Relaxed); + } + } + } + Ok(()) + })?; + } + Ok(bloom) + }); + + let bloom = match bloom_result { + Ok(bloom_arc) => bloom_arc?, + Err(e) => { + warn!("One of the bloom-building threads panicked: {:?}", e); + return Err("One of the bloom-building threads panicked".into()) + } + }; + let mut bloom = bloom.lock(); + info!("Finished iterating over {} accounts as of block #{} in: {:?}. Bloom saturation: {}", + total_accounts.load(Ordering::Relaxed), best_block, start.elapsed(), bloom.saturation()); + let bloom_journal = bloom.drain_journal(); + info!(target: "migration", "Generated {} bloom entries; the DB has {} empty accounts and {} non-empty accounts", + bloom_journal.entries.len(), empty_accounts.load(Ordering::Relaxed), non_empty_accounts.load(Ordering::Relaxed)); + info!(target: "migration", "New bloom has {} k_bits (aka 'hash functions') and a bitmap size of {} bits", bloom_journal.hash_functions, bloom.number_of_bits()); + let mut batch = DBTransaction::new(); + StateDB::commit_bloom(&mut batch, bloom_journal)?; + // Write the size of the bloom we just built to the db so we can load&rebuild the bloom at startup + batch.put(COL_ACCOUNT_BLOOM, ACCOUNTS_BLOOM_ITEM_COUNT_KEY, &account_count.to_le_bytes()); + source.write(batch)?; + source.flush()?; + info!(target: "migration", "Finished bloom update for chain at #{}", best_block); + + Ok(()) +} diff --git a/util/bloom/src/lib.rs b/util/bloom/src/lib.rs index 35aab538b3b..fa00a3ca0db 100644 --- a/util/bloom/src/lib.rs +++ b/util/bloom/src/lib.rs @@ -17,6 +17,7 @@ use std::{cmp, mem, f64}; use std::hash::{Hash, Hasher}; use std::collections::HashSet; +use std::f64::consts::LN_2; use siphasher::sip::SipHasher; /// BitVec structure with journalling @@ -28,7 +29,7 @@ struct BitVecJournal { } impl BitVecJournal { - pub fn new(size: usize) -> BitVecJournal { + fn new(size: usize) -> BitVecJournal { let extra = if size % 64 > 0 { 1 } else { 0 }; BitVecJournal { elems: vec![0u64; size / 64 + extra], @@ -36,14 +37,14 @@ impl BitVecJournal { } } - pub fn from_parts(parts: &[u64]) -> BitVecJournal { + fn from_parts(parts: Vec) -> BitVecJournal { BitVecJournal { - elems: parts.to_vec(), + elems: parts, journal: HashSet::new(), } } - pub fn set(&mut self, index: usize) { + fn set(&mut self, index: usize) { let e_index = index / 64; let bit_index = index % 64; let val = self.elems.get_mut(e_index).unwrap(); @@ -51,18 +52,18 @@ impl BitVecJournal { self.journal.insert(e_index); } - pub fn get(&self, index: usize) -> bool { + fn get(&self, index: usize) -> bool { let e_index = index / 64; let bit_index = index % 64; self.elems[e_index] & (1 << bit_index) != 0 } - pub fn drain(&mut self) -> Vec<(usize, u64)> { + fn drain(&mut self) -> Vec<(usize, u64)> { let journal = mem::replace(&mut self.journal, HashSet::new()).into_iter(); journal.map(|idx| (idx, self.elems[idx])).collect::>() } - pub fn saturation(&self) -> f64 { + fn saturation(&self) -> f64 { self.elems.iter().fold(0u64, |acc, e| acc + e.count_ones() as u64) as f64 / (self.elems.len() * 64) as f64 } } @@ -76,12 +77,12 @@ pub struct Bloom { impl Bloom { /// Create a new bloom filter structure. - /// bitmap_size is the size in bytes (not bits) that will be allocated in memory - /// items_count is an estimation of the maximum number of items to store. - pub fn new(bitmap_size: usize, items_count: usize) -> Bloom { - assert!(bitmap_size > 0 && items_count > 0); - let bitmap_bits = (bitmap_size as u64) * 8u64; - let k_num = Bloom::optimal_k_num(bitmap_bits, items_count); + /// `bitmap_size` is the size in bytes (not bits) that will be allocated in memory + /// `items_count` is an estimation of the maximum number of items to store. + fn new(bitmap_size: u64, item_count: u64) -> Bloom { + assert!(bitmap_size > 0 && item_count > 0); + let bitmap_bits = bitmap_size * 8; + let k_num = Bloom::optimal_k_num(bitmap_bits, item_count); let bitmap = BitVecJournal::new(bitmap_bits as usize); Bloom { bitmap, @@ -90,35 +91,42 @@ impl Bloom { } } - /// Initializes bloom filter from saved state - pub fn from_parts(parts: &[u64], k_num: u32) -> Bloom { + /// The legacy accounts bloom filter used non-optimal parameters that cannot + /// be calculated with the facilities in this crate, hence this method that + /// allows the instantiation of a non-optimal filter so that older databases + /// can continue to work. DO NOT USE FOR OTHER PURPOSES. + pub fn from_parts_legacy(parts: Vec, k_num: u32) -> Bloom { + let bitmap_bits = parts.len() as u64 * 64 ; + let bitmap = BitVecJournal::from_parts(parts); + Bloom { bitmap, bitmap_bits, k_num } + } + + /// Initializes a bloom filter from saved state + pub fn from_parts(parts: Vec, item_count: u64) -> Bloom { let bitmap_size = parts.len() * 8; let bitmap_bits = (bitmap_size as u64) * 8u64; let bitmap = BitVecJournal::from_parts(parts); - Bloom { - bitmap, - bitmap_bits, - k_num, - } + let k_num = Self::optimal_k_num(bitmap_bits, item_count); + Bloom { bitmap, bitmap_bits, k_num } } /// Create a new bloom filter structure. - /// items_count is an estimation of the maximum number of items to store. - /// fp_p is the wanted rate of false positives, in ]0.0, 1.0[ - pub fn new_for_fp_rate(items_count: usize, fp_p: f64) -> Bloom { - let bitmap_size = Bloom::compute_bitmap_size(items_count, fp_p); - Bloom::new(bitmap_size, items_count) + /// `item_count` is an estimation of the maximum number of items to store. + /// `fp_p` is the desired false positives rate, in ]0.0, 1.0[ + pub fn new_for_fp_rate(item_count: u64, fp_p: f64) -> Bloom { + let bitmap_size = Bloom::compute_bitmap_size(item_count, fp_p); + Bloom::new(bitmap_size, item_count) } - /// Compute a recommended bitmap size for items_count items + /// Compute a recommended Bloom bitmap size in bytes for `items_count` items /// and a fp_p rate of false positives. - /// fp_p obviously has to be within the ]0.0, 1.0[ range. - pub fn compute_bitmap_size(items_count: usize, fp_p: f64) -> usize { - assert!(items_count > 0); + /// `fp_p` obviously has to be within the ]0.0, 1.0[ range. + pub fn compute_bitmap_size(item_count: u64, fp_p: f64) -> u64 { + assert!(item_count > 0); assert!(fp_p > 0.0 && fp_p < 1.0); - let log2 = f64::consts::LN_2; - let log2_2 = log2 * log2; - ((items_count as f64) * f64::ln(fp_p) / (-8.0 * log2_2)).ceil() as usize + let bitmap_size = ((item_count as f64) * f64::ln(fp_p) / (-8.0 * LN_2 * LN_2)).ceil() as u64; + // Round up to nearest multiple of 8 because we need to use this to index u64s + ((bitmap_size + 7) / 8) * 8 } /// Records the presence of an item. @@ -157,10 +165,12 @@ impl Bloom { self.k_num } - fn optimal_k_num(bitmap_bits: u64, items_count: usize) -> u32 { + /// The optimal number of hash functions for a given bitmap size and item + /// count is calculated as `bits-per-item * ln(2)`. + fn optimal_k_num(bitmap_bits: u64, item_count: u64) -> u32 { let m = bitmap_bits as f64; - let n = items_count as f64; - let k_num = (m / n * f64::ln(2.0f64)).ceil() as u32; + let n = item_count as f64; + let k_num = (m / n * LN_2).ceil() as u32; cmp::max(k_num, 1) } @@ -218,25 +228,42 @@ mod tests { #[test] fn journalling() { + // Set up bloom a with 512 bits and 120 estimated items stored; we'll get a `k` of 3… let initial = vec![0u64; 8]; - let mut bloom = Bloom::from_parts(&initial, 3); + let mut bloom = Bloom::from_parts(initial, 120); + // …which will cause this particular key… bloom.set(&vec![5u8, 4]); let drain = bloom.drain_journal(); - + // …to set one bit in two different entries. assert_eq!(2, drain.entries.len()) } #[test] fn saturation() { + // Set up bloom a with 512 bits and 120 estimated items stored; we'll get a `k` of 3… let initial = vec![0u64; 8]; - let mut bloom = Bloom::from_parts(&initial, 3); + let mut bloom = Bloom::from_parts(initial, 120); + // …which will cause this particular key to set one bit in two different entries. bloom.set(&vec![5u8, 4]); let full = bloom.saturation(); - // 2/8/64 = 0.00390625 + // 2 bits touched, over 8 entries where each entry has 64 bits, so 2/8/64 = 0.00390625 assert!(full >= 0.0039f64 && full <= 0.004f64); } + #[test] + fn test_compute_bitmap_size() { + use std::f64::consts::LN_2; + let bitmap_size = Bloom::compute_bitmap_size(10_000_000, 0.01); + // ~12Mbytes + let expected_size_in_bits = (-(10_000_000 as f64 * f64::ln(0.01)) / ( LN_2 * LN_2)).ceil() as u64; + // rounded up to nearest multiple of 8 + let expected_size_in_bytes = (((expected_size_in_bits / 8) + 7) / 8) * 8; + assert_eq!(bitmap_size, expected_size_in_bytes); + let bloom = Bloom::new( bitmap_size,10_000_000); + assert_eq!(bloom.number_of_hash_functions(), 7); + } + #[test] fn hash_backward_compatibility_for_new() { let ss = vec!["you", "should", "not", "break", "hash", "backward", "compatibility"]; @@ -255,7 +282,7 @@ mod tests { fn hash_backward_compatibility_for_from_parts() { let stored_state = vec![2094615114573771027u64, 244675582389208413u64]; let k_num = 12; - let bloom = Bloom::from_parts(&stored_state, k_num); + let bloom = Bloom::from_parts(stored_state, k_num); let ss = vec!["you", "should", "not", "break", "hash", "backward", "compatibility"]; let tt = vec!["this", "doesnot", "exist"]; diff --git a/util/dir/src/helpers.rs b/util/dir/src/helpers.rs index 7cdf3565090..ec0dd845430 100644 --- a/util/dir/src/helpers.rs +++ b/util/dir/src/helpers.rs @@ -23,6 +23,8 @@ pub fn replace_home(base: &str, arg: &str) -> String { // We use an `if` so that we don't need to call `home_dir()` if not necessary. let r = if arg.contains("$HOME") { arg.replace("$HOME", home_dir().expect("$HOME isn't defined").to_str().unwrap()) + } else if arg.contains("~") { + arg.replace("~", home_dir().expect("$HOME isn't defined").to_str().unwrap()) } else { arg.to_owned() };