From a2a3257ef3e73d21392e0437106cf09f956554a8 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 23 Oct 2019 15:28:42 +0200 Subject: [PATCH 01/27] Comments and todos Use `snapshot_sync` as logging target --- Cargo.toml | 2 +- ethcore/snapshot/src/traits.rs | 4 +-- ethcore/sync/src/chain/handler.rs | 44 ++++++++++++++++++------------- ethcore/sync/src/chain/mod.rs | 30 +++++++++++---------- ethcore/sync/src/snapshot_sync.rs | 42 ++++++++++++++++++++++------- 5 files changed, 77 insertions(+), 45 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 407f0aa4a69..514b3fdf261 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,7 +120,7 @@ name = "parity" [profile.release] debug = false -lto = true +lto = false [workspace] # This should only list projects that are not diff --git a/ethcore/snapshot/src/traits.rs b/ethcore/snapshot/src/traits.rs index b72d6b7afe0..4e064d20c59 100644 --- a/ethcore/snapshot/src/traits.rs +++ b/ethcore/snapshot/src/traits.rs @@ -51,8 +51,8 @@ pub trait SnapshotService : Sync + Send { fn status(&self) -> RestorationStatus; /// Begin snapshot restoration. - /// If restoration in-progress, this will reset it. - /// From this point on, any previous snapshot may become unavailable. + /// If a restoration is in progress, this will reset it. + /// From this point on, any previous snapshot may become unavailable. todo[dvdplm] what does this mean? "unavailable" how? to others? fn begin_restore(&self, manifest: ManifestData); /// Abort an in-progress restoration if there is one. diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index 08ed6503511..7d513ee56f5 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -117,14 +117,13 @@ impl SyncHandler { sync.active_peers.remove(&peer_id); if sync.state == SyncState::SnapshotManifest { - // Check if we are asking other peers for - // the snapshot manifest as well. - // If not, return to initial state - let still_asking_manifest = sync.peers.iter() + // Check if we are asking other peers for a snapshot manifest as well. If not, + // set our state to initial state (`Idle` or `WaitingPeers`). + let still_seeking_manifest = sync.peers.iter() .filter(|&(id, p)| sync.active_peers.contains(id) && p.asking == PeerAsking::SnapshotManifest) - .next().is_none(); + .next().is_some(); - if still_asking_manifest { + if !still_seeking_manifest { sync.state = ChainSync::get_init_state(sync.warp_sync, io.chain()); } } @@ -466,12 +465,12 @@ impl SyncHandler { /// Called when snapshot manifest is downloaded from a peer. fn on_snapshot_manifest(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> { if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) { - trace!(target: "sync", "Ignoring snapshot manifest from unconfirmed peer {}", peer_id); + trace!(target: "snapshot_sync", "Ignoring snapshot manifest from unconfirmed peer {}", peer_id); return Ok(()); } sync.clear_peer_download(peer_id); if !sync.reset_peer_asking(peer_id, PeerAsking::SnapshotManifest) || sync.state != SyncState::SnapshotManifest { - trace!(target: "sync", "{}: Ignored unexpected/expired manifest", peer_id); + trace!(target: "snapshot_sync", "{}: Ignored unexpected/expired manifest", peer_id); return Ok(()); } @@ -482,10 +481,12 @@ impl SyncHandler { .map_or(false, |(l, h)| manifest.version >= l && manifest.version <= h); if !is_supported_version { - trace!(target: "sync", "{}: Snapshot manifest version not supported: {}", peer_id, manifest.version); + warn!(target: "snapshot_sync", "{}: Snapshot manifest version not supported: {}", peer_id, manifest.version); return Err(DownloaderImportError::Invalid); } sync.snapshot.reset_to(&manifest, &keccak(manifest_rlp.as_raw())); + debug!(target: "snapshot_sync", "{}: Peer sent a snapshot manifest we can use. Block number #{}, block chunks: {}, state chunks: {}", + manifest.block_number, manifest.block_hashes.len(), manifest.state_hashes.len()); io.snapshot_service().begin_restore(manifest); sync.state = SyncState::SnapshotData; @@ -495,12 +496,12 @@ impl SyncHandler { /// Called when snapshot data is downloaded from a peer. fn on_snapshot_data(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> { if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) { - trace!(target: "sync", "Ignoring snapshot data from unconfirmed peer {}", peer_id); + trace!(target: "snapshot_sync", "Ignoring snapshot data from unconfirmed peer {}", peer_id); return Ok(()); } sync.clear_peer_download(peer_id); if !sync.reset_peer_asking(peer_id, PeerAsking::SnapshotData) || (sync.state != SyncState::SnapshotData && sync.state != SyncState::SnapshotWaiting) { - trace!(target: "sync", "{}: Ignored unexpected snapshot data", peer_id); + trace!(target: "snapshot_sync", "{}: Ignored unexpected snapshot data", peer_id); return Ok(()); } @@ -508,12 +509,13 @@ impl SyncHandler { let status = io.snapshot_service().status(); match status { RestorationStatus::Inactive | RestorationStatus::Failed => { - trace!(target: "sync", "{}: Snapshot restoration aborted", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot restoration aborted", peer_id); sync.state = SyncState::WaitingPeers; // only note bad if restoration failed. if let (Some(hash), RestorationStatus::Failed) = (sync.snapshot.snapshot_hash(), status) { - trace!(target: "sync", "Noting snapshot hash {} as bad", hash); + // todo[dvdplm]: how do we know we `Failed` because of them and not because something happened on our end, say disk was full? + debug!(target: "snapshot_sync", "Marking snapshot hash {} as bad", hash); sync.snapshot.note_bad(hash); } @@ -521,30 +523,34 @@ impl SyncHandler { return Ok(()); }, RestorationStatus::Initializing { .. } => { - trace!(target: "warp", "{}: Snapshot restoration is initializing", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot restoration is initializing", peer_id); return Ok(()); } RestorationStatus::Finalizing => { - trace!(target: "warp", "{}: Snapshot finalizing restoration", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot finalizing restoration", peer_id); return Ok(()); } RestorationStatus::Ongoing { .. } => { - trace!(target: "sync", "{}: Snapshot restoration is ongoing", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot restoration is ongoing", peer_id); }, } let snapshot_data: Bytes = r.val_at(0)?; match sync.snapshot.validate_chunk(&snapshot_data) { Ok(ChunkType::Block(hash)) => { - trace!(target: "sync", "{}: Processing block chunk", peer_id); + trace!(target: "snapshot_sync", "{}: Processing block chunk", peer_id); io.snapshot_service().restore_block_chunk(hash, snapshot_data); } Ok(ChunkType::State(hash)) => { - trace!(target: "sync", "{}: Processing state chunk", peer_id); + trace!(target: "snapshot_sync", "{}: Processing state chunk", peer_id); io.snapshot_service().restore_state_chunk(hash, snapshot_data); } Err(()) => { - trace!(target: "sync", "{}: Got bad snapshot chunk", peer_id); + // todo[dvdplm] this seems wrong: we'll get `Err(())` back even if we happened to + // have seen this chunk already. Should a peer be disconnected if they happen to + // send us the same chunk twice? Can't we be downloading chunks from more than one + // peer? + trace!(target: "snapshot_sync", "{}: Got bad snapshot chunk", peer_id); io.disconnect_peer(peer_id); return Ok(()); } diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 780bda34d66..d867444833a 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -176,6 +176,7 @@ const MAX_TRANSACTION_PACKET_SIZE: usize = 5 * 1024 * 1024; const SNAPSHOT_RESTORE_THRESHOLD: BlockNumber = 30000; const SNAPSHOT_MIN_PEERS: usize = 3; +// todo[dvdplm] One problem with warp-sync is when the remote peer we're downloading from goes offline. Would it make sense to increase the number of chunks here so we download as much data as we can? const MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD: usize = 3; const WAIT_PEERS_TIMEOUT: Duration = Duration::from_secs(5); @@ -319,7 +320,7 @@ pub struct PeerInfo { latest_hash: H256, /// Peer total difficulty if known difficulty: Option, - /// Type of data currenty being requested from peer. + /// Type of data currently being requested from peer. asking: PeerAsking, /// A set of block numbers being requested asking_blocks: Vec, @@ -787,7 +788,7 @@ impl ChainSync { pub fn reset_and_continue(&mut self, io: &mut dyn SyncIo) { trace!(target: "sync", "Restarting"); if self.state == SyncState::SnapshotData { - debug!(target:"sync", "Aborting snapshot restore"); + debug!(target:"snapshot_sync", "Aborting snapshot restore"); io.snapshot_service().abort_restore(); } self.snapshot.clear(); @@ -804,11 +805,11 @@ impl ChainSync { fn maybe_start_snapshot_sync(&mut self, io: &mut dyn SyncIo) { if !self.warp_sync.is_enabled() || io.snapshot_service().supported_versions().is_none() { - trace!(target: "sync", "Skipping warp sync. Disabled or not supported."); + trace!(target: "snapshot_sync", "Skipping warp sync. Disabled or not supported."); return; } if self.state != SyncState::WaitingPeers && self.state != SyncState::Blocks && self.state != SyncState::Waiting { - trace!(target: "sync", "Skipping warp sync. State: {:?}", self.state); + trace!(target: "snapshot_sync", "Skipping warp sync. State: {:?}", self.state); return; } // Make sure the snapshot block is not too far away from best block and network best block and @@ -824,7 +825,7 @@ impl ChainSync { //collect snapshot infos from peers let snapshots = self.peers.iter() .filter(|&(_, p)| p.is_allowed() && p.snapshot_number.map_or(false, |sn| - // Snapshot must be old enough that it's usefull to sync with it + // Snapshot must be old enough that it's useful to sync with it our_best_block < sn && (sn - our_best_block) > SNAPSHOT_RESTORE_THRESHOLD && // Snapshot must have been taken after the Fork sn > fork_block && @@ -856,14 +857,14 @@ impl ChainSync { if let (Some(hash), Some(peers)) = (best_hash, best_hash.map_or(None, |h| snapshot_peers.get(&h))) { if max_peers >= SNAPSHOT_MIN_PEERS { - trace!(target: "sync", "Starting confirmed snapshot sync {:?} with {:?}", hash, peers); + debug!(target: "snapshot_sync", "Starting confirmed snapshot sync {:?} with {:?}", hash, peers); self.start_snapshot_sync(io, peers); } else if timeout { - trace!(target: "sync", "Starting unconfirmed snapshot sync {:?} with {:?}", hash, peers); + debug!(target: "snapshot_sync", "Starting unconfirmed snapshot sync {:?} with {:?}", hash, peers); self.start_snapshot_sync(io, peers); } } else if timeout && !self.warp_sync.is_warp_only() { - trace!(target: "sync", "No snapshots found, starting full sync"); + debug!(target: "snapshot_sync", "No snapshots found, starting full sync"); self.set_state(SyncState::Idle); self.continue_sync(io); } @@ -927,7 +928,7 @@ impl ChainSync { ).collect(); if peers.len() > 0 { - trace!( + debug!( target: "sync", "Syncing with peers: {} active, {} available, {} total", self.active_peers.len(), peers.len(), self.peers.len() @@ -989,8 +990,8 @@ impl ChainSync { match self.state { SyncState::WaitingPeers => { trace!( - target: "sync", - "Checking snapshot sync: {} vs {} (peer: {})", + target: "snapshot_sync", + "Can we snapshot from them? Their highest block: #{} vs our highest: #{} (peer: {})", peer_snapshot_number, chain_info.best_block_number, peer_id @@ -1042,13 +1043,14 @@ impl ChainSync { // Initialize the snapshot if not already done self.snapshot.initialize(io.snapshot_service()); if self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize > MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD { - trace!(target: "sync", "Snapshot queue full, pausing sync"); + trace!(target: "snapshot_sync", "Snapshot queue full, pausing sync"); self.set_state(SyncState::SnapshotWaiting); return; } }, - RestorationStatus::Initializing { .. } => { - trace!(target: "warp", "Snapshot is stil initializing."); + RestorationStatus::Initializing { state_chunks, block_chunks, chunks_done } => { + debug!(target: "snapshot_sync", "Snapshot is still initializing: state chunks={}, block chunks={}, chunks done={}", + state_chunks, block_chunks, chunks_done); return; }, _ => { diff --git a/ethcore/sync/src/snapshot_sync.rs b/ethcore/sync/src/snapshot_sync.rs index 84b7b3e71a3..ae6d51bd5f4 100644 --- a/ethcore/sync/src/snapshot_sync.rs +++ b/ethcore/sync/src/snapshot_sync.rs @@ -24,18 +24,30 @@ use snapshot::SnapshotService; use common_types::snapshot::ManifestData; #[derive(PartialEq, Eq, Debug)] +/// The type of data contained in a chunk: state or block. pub enum ChunkType { + /// The chunk contains state data (aka account data). State(H256), + /// The chunk contains block data. Block(H256), } #[derive(Default, MallocSizeOf)] pub struct Snapshot { + /// List of hashes of the state chunks we need to complete the warp sync from this snapshot. + /// These hashes are contained in the Manifest we downloaded from the peer(s). pending_state_chunks: Vec, + /// List of hashes of the block chunks we need to complete the warp sync from this snapshot. + /// These hashes are contained in the Manifest we downloaded from the peer(s). pending_block_chunks: Vec, + /// Set of hashes of chunks we are currently downloading. downloading_chunks: HashSet, + /// The set of chunks (block or state) that we have successfully downloaded. completed_chunks: HashSet, + /// The hash of the the `ManifestData` RLP that we're downloading. snapshot_hash: Option, + /// Set of chunk hashes we failed to import. + // todo[dvdplm]: check ^^^ bad_hashes: HashSet, initialized: bool, } @@ -57,7 +69,7 @@ impl Snapshot { } trace!( - target: "snapshot", + target: "snapshot_sync", "Snapshot is now initialized with {} completed chunks.", self.completed_chunks.len(), ); @@ -65,7 +77,7 @@ impl Snapshot { self.initialized = true; } - /// Clear everything. + /// Clear everything and set `initialized` to false. pub fn clear(&mut self) { self.pending_state_chunks.clear(); self.pending_block_chunks.clear(); @@ -75,12 +87,13 @@ impl Snapshot { self.initialized = false; } - /// Check if currently downloading a snapshot. + /// Check if we're currently downloading a snapshot. pub fn have_manifest(&self) -> bool { self.snapshot_hash.is_some() } - /// Reset collection for a manifest RLP + /// Clear the `Snapshot` and reset it with data from a `ManifestData` (i.e. the lists of + /// block&state chunk hashes contained in the `ManifestData`). pub fn reset_to(&mut self, manifest: &ManifestData, hash: &H256) { self.clear(); self.pending_state_chunks = manifest.state_hashes.clone(); @@ -88,11 +101,13 @@ impl Snapshot { self.snapshot_hash = Some(hash.clone()); } - /// Validate chunk and mark it as downloaded + /// Check if the the chunk is known, i.e. downloaded already or currently downloading; if so add + /// it to the `completed_chunks` set. + /// Returns a `ChunkType` with the hash of the chunk. pub fn validate_chunk(&mut self, chunk: &[u8]) -> Result { let hash = keccak(chunk); if self.completed_chunks.contains(&hash) { - trace!(target: "sync", "Ignored proccessed chunk: {:x}", hash); + trace!(target: "snapshot_sync", "Already proccessed chunk {:x}. Ignoring.", hash); return Err(()); } self.downloading_chunks.remove(&hash); @@ -104,11 +119,12 @@ impl Snapshot { self.completed_chunks.insert(hash.clone()); return Ok(ChunkType::State(hash)); } - trace!(target: "sync", "Ignored unknown chunk: {:x}", hash); + trace!(target: "snapshot_sync", "Ignoring unknown chunk: {:x}", hash); Err(()) } /// Find a chunk to download + // todo[dvdplm] plenty of iterating through the pending `Vec`s here (which are never deleted from btw) pub fn needed_chunk(&mut self) -> Option { // Find next needed chunk: first block, then state chunks let chunk = { @@ -136,32 +152,40 @@ impl Snapshot { chunk } + /// Remove a chunk from the set of chunks we're interested in downloading. pub fn clear_chunk_download(&mut self, hash: &H256) { self.downloading_chunks.remove(hash); } - // note snapshot hash as bad. + /// Mark a snapshot hash as bad. pub fn note_bad(&mut self, hash: H256) { self.bad_hashes.insert(hash); } - // whether snapshot hash is known to be bad. + /// Whether snapshot hash is known to be bad. pub fn is_known_bad(&self, hash: &H256) -> bool { self.bad_hashes.contains(hash) } + /// Hash of the snapshot we're currently downloading/importing. + // todo[dvdplm]: verify ^^^ pub fn snapshot_hash(&self) -> Option { self.snapshot_hash } + /// Total number of chunks in the snapshot we're currently working on (state + block chunks). + // todo[dvdplm]: verify ^^^ pub fn total_chunks(&self) -> usize { self.pending_block_chunks.len() + self.pending_state_chunks.len() } + /// Number of chunks we've processed so far (state and block chunks). + // todo[dvdplm]: verify ^^^ pub fn done_chunks(&self) -> usize { self.completed_chunks.len() } + /// Are we done downloading all chunks? pub fn is_complete(&self) -> bool { self.total_chunks() == self.completed_chunks.len() } From 86f04508f66f361dffed615d596868f1319eb0fe Mon Sep 17 00:00:00 2001 From: David Palm Date: Thu, 24 Oct 2019 13:36:16 +0200 Subject: [PATCH 02/27] fix compilation --- ethcore/sync/src/chain/handler.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index 7d513ee56f5..7834250a26e 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -43,7 +43,7 @@ use ethereum_types::{H256, U256}; use keccak_hash::keccak; use network::PeerId; use network::client_version::ClientVersion; -use log::{debug, trace, error}; +use log::{debug, trace, error, warn}; use rlp::Rlp; use common_types::{ BlockNumber, @@ -486,7 +486,7 @@ impl SyncHandler { } sync.snapshot.reset_to(&manifest, &keccak(manifest_rlp.as_raw())); debug!(target: "snapshot_sync", "{}: Peer sent a snapshot manifest we can use. Block number #{}, block chunks: {}, state chunks: {}", - manifest.block_number, manifest.block_hashes.len(), manifest.state_hashes.len()); + peer_id, manifest.block_number, manifest.block_hashes.len(), manifest.state_hashes.len()); io.snapshot_service().begin_restore(manifest); sync.state = SyncState::SnapshotData; From e250df2d886125e615d001d0564b85748ebd2876 Mon Sep 17 00:00:00 2001 From: David Palm Date: Thu, 24 Oct 2019 19:46:25 +0200 Subject: [PATCH 03/27] More todos, more logs --- ethcore/sync/src/api.rs | 2 +- ethcore/sync/src/block_sync.rs | 2 +- ethcore/sync/src/chain/handler.rs | 8 ++--- ethcore/sync/src/chain/mod.rs | 48 ++++++++++++++++++++--------- ethcore/sync/src/chain/requester.rs | 4 +-- ethcore/sync/src/sync_io.rs | 32 +++++++++++++------ ethcore/types/src/snapshot.rs | 6 ++-- 7 files changed, 66 insertions(+), 36 deletions(-) diff --git a/ethcore/sync/src/api.rs b/ethcore/sync/src/api.rs index 17c32eca5de..a8a7a99fc3c 100644 --- a/ethcore/sync/src/api.rs +++ b/ethcore/sync/src/api.rs @@ -295,7 +295,7 @@ pub struct EthSync { light_subprotocol_name: [u8; 3], /// Priority tasks notification channel priority_tasks: Mutex>, - /// for state tracking + /// Track the sync state: are we importing or verifying blocks? is_major_syncing: Arc } diff --git a/ethcore/sync/src/block_sync.rs b/ethcore/sync/src/block_sync.rs index 8c9ede44767..7eb324a0689 100644 --- a/ethcore/sync/src/block_sync.rs +++ b/ethcore/sync/src/block_sync.rs @@ -309,7 +309,7 @@ impl BlockDownloader { } } } - + // Update the highest block number seen on the network from the header. if let Some((number, _)) = last_header { if self.highest_block.as_ref().map_or(true, |n| number > *n) { self.highest_block = Some(number); diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index 7834250a26e..81a22d85c8e 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -509,13 +509,13 @@ impl SyncHandler { let status = io.snapshot_service().status(); match status { RestorationStatus::Inactive | RestorationStatus::Failed => { - trace!(target: "snapshot_sync", "{}: Snapshot restoration aborted", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot restoration status: {:?}", peer_id, status); sync.state = SyncState::WaitingPeers; // only note bad if restoration failed. if let (Some(hash), RestorationStatus::Failed) = (sync.snapshot.snapshot_hash(), status) { // todo[dvdplm]: how do we know we `Failed` because of them and not because something happened on our end, say disk was full? - debug!(target: "snapshot_sync", "Marking snapshot hash {} as bad", hash); + debug!(target: "snapshot_sync", "Marking snapshot manifest hash {} as bad", hash); sync.snapshot.note_bad(hash); } @@ -523,11 +523,11 @@ impl SyncHandler { return Ok(()); }, RestorationStatus::Initializing { .. } => { - trace!(target: "snapshot_sync", "{}: Snapshot restoration is initializing", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot restoration is initializing. Can't accept data right now.", peer_id); return Ok(()); } RestorationStatus::Finalizing => { - trace!(target: "snapshot_sync", "{}: Snapshot finalizing restoration", peer_id); + trace!(target: "snapshot_sync", "{}: Snapshot finalizing restoration. Can't accept data right now.", peer_id); return Ok(()); } RestorationStatus::Ongoing { .. } => { diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 72cd2738959..3d9b8b2fe53 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -277,7 +277,7 @@ impl SyncStatus { } #[derive(PartialEq, Eq, Debug, Clone)] -/// Peer data type requested +/// Peer data type requested from a peer by us. pub enum PeerAsking { Nothing, ForkHeader, @@ -297,7 +297,7 @@ pub enum BlockSet { /// Missing old blocks OldBlocks, } -#[derive(Clone, Eq, PartialEq)] +#[derive(Clone, Eq, PartialEq, Debug)] pub enum ForkConfirmation { /// Fork block confirmation pending. Unconfirmed, @@ -307,7 +307,7 @@ pub enum ForkConfirmation { Confirmed, } -#[derive(Clone)] +#[derive(Clone, Debug)] /// Syncing peer information pub struct PeerInfo { /// eth protocol version @@ -320,6 +320,7 @@ pub struct PeerInfo { latest_hash: H256, /// Peer total difficulty if known difficulty: Option, + // todo[dvdplm]: clarify if these "asking*" fields are things **we** ask **them** for or vice versa /// Type of data currently being requested from peer. asking: PeerAsking, /// A set of block numbers being requested @@ -616,7 +617,7 @@ pub struct ChainSync { state: SyncState, /// Last block number for the start of sync starting_block: BlockNumber, - /// Highest block number seen + /// Highest block number seen on the network. highest_block: Option, /// All connected peers peers: Peers, @@ -803,9 +804,10 @@ impl ChainSync { self.active_peers.remove(&peer_id); } + // Called once per second. fn maybe_start_snapshot_sync(&mut self, io: &mut dyn SyncIo) { if !self.warp_sync.is_enabled() || io.snapshot_service().supported_versions().is_none() { - trace!(target: "snapshot_sync", "Skipping warp sync. Disabled or not supported."); +// trace!(target: "snapshot_sync", "Skipping warp sync. Disabled or not supported."); return; } if self.state != SyncState::WaitingPeers && self.state != SyncState::Blocks && self.state != SyncState::Waiting { @@ -822,22 +824,33 @@ impl ChainSync { WarpSync::OnlyAndAfter(block) => block, _ => 0, }; - //collect snapshot infos from peers + // Collect snapshot info from peers: filter out expired peers and peers from whom we do + // not have fork confirmation. let snapshots = self.peers.iter() .filter(|&(_, p)| p.is_allowed() && p.snapshot_number.map_or(false, |sn| - // Snapshot must be old enough that it's useful to sync with it + // Snapshot must be sufficiently better than what we have that it's useful to + // sync with it: more than 30k blocks beyond our best block our_best_block < sn && (sn - our_best_block) > SNAPSHOT_RESTORE_THRESHOLD && - // Snapshot must have been taken after the Fork + // Snapshot must have been taken after the fork block (if any is configured) sn > fork_block && // Snapshot must be greater than the warp barrier if any sn > expected_warp_block && - // If we know a highest block, snapshot must be recent enough + // If we know what the highest block on the network is, snapshot must be recent + // enough: either it's better than the highest we've seen on the network or it's + // within 30k blocks of the best we've seen. self.highest_block.map_or(true, |highest| { highest < sn || (highest - sn) <= SNAPSHOT_RESTORE_THRESHOLD }) )) .filter_map(|(p, peer)| peer.snapshot_hash.map(|hash| (p, hash.clone()))) - .filter(|&(_, ref hash)| !self.snapshot.is_known_bad(hash)); + .filter(|&(p, ref hash)| { + if self.snapshot.is_known_bad(hash) { + trace!(target: "snapshot_sync", "{}: Snapshot with hash {:?} is known to have failed before; not trying again", p, hash); + false + } else { + true + } + }); let mut snapshot_peers = HashMap::new(); let mut max_peers: usize = 0; @@ -870,6 +883,9 @@ impl ChainSync { } } + /// Start a snapshot with all peers that we are not currently asking something else from. If + /// we're already snapshotting with a peer, set sync state to `SnapshotData` and continue + /// fetching the snapshot. fn start_snapshot_sync(&mut self, io: &mut dyn SyncIo, peers: &[PeerId]) { if !self.snapshot.have_manifest() { for p in peers { @@ -878,10 +894,10 @@ impl ChainSync { } } self.set_state(SyncState::SnapshotManifest); - trace!(target: "sync", "New snapshot sync with {:?}", peers); + trace!(target: "snapshot_sync", "New snapshot sync with {:?}", peers); } else { self.set_state(SyncState::SnapshotData); - trace!(target: "sync", "Resumed snapshot sync with {:?}", peers); + trace!(target: "snapshot_sync", "Resumed snapshot sync with {:?}", peers); } } @@ -991,10 +1007,11 @@ impl ChainSync { SyncState::WaitingPeers => { trace!( target: "snapshot_sync", - "Can we snapshot from them? Their highest block: #{} vs our highest: #{} (peer: {})", + "Can we snapshot from them? Their highest block: #{} vs our highest: #{} (peer: {}, {})", peer_snapshot_number, chain_info.best_block_number, - peer_id + peer_id, + io.peer_enode(peer_id), ); self.maybe_start_snapshot_sync(io); }, @@ -1073,6 +1090,7 @@ impl ChainSync { } /// Clear all blocks/headers marked as being downloaded by a peer. + // todo[dvdplm] ^^^ should be **from a peer** I think? fn clear_peer_download(&mut self, peer_id: PeerId) { if let Some(ref peer) = self.peers.get(&peer_id) { match peer.asking { @@ -1152,7 +1170,7 @@ impl ChainSync { peer.expired = false; peer.block_set = None; if peer.asking != asking { - trace!(target:"sync", "Asking {:?} while expected {:?}", peer.asking, asking); + trace!(target:"sync", "{}: Asking {:?} while expected {:?}", peer_id, peer.asking, asking); peer.asking = PeerAsking::Nothing; return false; } else { diff --git a/ethcore/sync/src/chain/requester.rs b/ethcore/sync/src/chain/requester.rs index c090f9ca9f7..f4b974a3557 100644 --- a/ethcore/sync/src/chain/requester.rs +++ b/ethcore/sync/src/chain/requester.rs @@ -100,8 +100,8 @@ impl SyncRequester { /// Request snapshot manifest from a peer. pub fn request_snapshot_manifest(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId) { - trace!(target: "sync", "{} <- GetSnapshotManifest", peer_id); - let rlp = RlpStream::new_list(0); + trace!(target: "sync", "{}: requesting a snapshot manifest", peer_id); + let rlp = RlpStream::new_list(0); // todo[dvdplm]: make this a const SyncRequester::send_request(sync, io, peer_id, PeerAsking::SnapshotManifest, GetSnapshotManifestPacket, rlp.out()); } diff --git a/ethcore/sync/src/sync_io.rs b/ethcore/sync/src/sync_io.rs index 50fc74afcf7..47cd9184b45 100644 --- a/ethcore/sync/src/sync_io.rs +++ b/ethcore/sync/src/sync_io.rs @@ -50,6 +50,8 @@ pub trait SyncIo { fn peer_version(&self, peer_id: PeerId) -> ClientVersion { ClientVersion::from(peer_id.to_string()) } + /// Returns the peer enode string + fn peer_enode(&self, peer_id: PeerId) -> String; /// Returns information on p2p session fn peer_session_info(&self, peer_id: PeerId) -> Option; /// Maximum mutually supported ETH protocol version @@ -115,10 +117,6 @@ impl<'s> SyncIo for NetSyncIo<'s> { self.chain } - fn chain_overlay(&self) -> &RwLock> { - self.chain_overlay - } - fn snapshot_service(&self) -> &dyn SnapshotService { self.snapshot_service } @@ -127,12 +125,22 @@ impl<'s> SyncIo for NetSyncIo<'s> { self.private_state.clone() } - fn peer_session_info(&self, peer_id: PeerId) -> Option { - self.network.session_info(peer_id) + fn peer_version(&self, peer_id: PeerId) -> ClientVersion { + self.network.peer_client_version(peer_id) } - fn is_expired(&self) -> bool { - self.network.is_expired() + fn peer_enode(&self, peer_id: PeerId) -> String { + self.network.session_info(peer_id).map_or("enode://???".into(), |info| { + if let Some(id) = info.id { + format!("enode:://{}@{}", id, info.remote_address) + } else { + format!("enode:://???@{}", info.remote_address) + } + }) + } + + fn peer_session_info(&self, peer_id: PeerId) -> Option { + self.network.session_info(peer_id) } fn eth_protocol_version(&self, peer_id: PeerId) -> u8 { @@ -143,8 +151,12 @@ impl<'s> SyncIo for NetSyncIo<'s> { self.network.protocol_version(*protocol, peer_id).unwrap_or(0) } - fn peer_version(&self, peer_id: PeerId) -> ClientVersion { - self.network.peer_client_version(peer_id) + fn is_expired(&self) -> bool { + self.network.is_expired() + } + + fn chain_overlay(&self) -> &RwLock> { + self.chain_overlay } fn payload_soft_limit(&self) -> usize { diff --git a/ethcore/types/src/snapshot.rs b/ethcore/types/src/snapshot.rs index ed305d68919..ea2a8a1ebe7 100644 --- a/ethcore/types/src/snapshot.rs +++ b/ethcore/types/src/snapshot.rs @@ -142,9 +142,9 @@ pub type ChunkSink<'a> = dyn FnMut(&[u8]) -> std::io::Result<()> + 'a; /// Statuses for snapshot restoration. #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum RestorationStatus { - /// No restoration. + /// No restoration activity currently. Inactive, - /// Restoration is initializing + /// Restoration is initializing. Initializing { /// Total number of state chunks. state_chunks: u32, @@ -164,7 +164,7 @@ pub enum RestorationStatus { /// Number of block chunks completed. block_chunks_done: u32, }, - /// Finalizing restoration + /// Finalizing restoration. Finalizing, /// Failed restoration. Failed, From 56552858b31710b5ecbd7672d8bca027ea0e95fc Mon Sep 17 00:00:00 2001 From: David Palm Date: Fri, 25 Oct 2019 15:57:37 +0200 Subject: [PATCH 04/27] Fix picking snapshot peer: prefer the one with the highest block number More docs, comments, todos --- ethcore/sync/src/chain/mod.rs | 101 +++++++++++++++++++++----------- util/network-devp2p/src/host.rs | 1 - 2 files changed, 67 insertions(+), 35 deletions(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 3d9b8b2fe53..787632b4b17 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -115,7 +115,7 @@ use ethereum_types::{H256, U256}; use fastmap::{H256FastMap, H256FastSet}; use futures::sync::mpsc as futures_mpsc; use keccak_hash::keccak; -use log::{error, trace, debug}; +use log::{error, trace, debug, warn}; use network::client_version::ClientVersion; use network::{self, PeerId, PacketId}; use parity_util_mem::{MallocSizeOfExt, malloc_size_of_is_0}; @@ -179,13 +179,18 @@ const SNAPSHOT_MIN_PEERS: usize = 3; // todo[dvdplm] One problem with warp-sync is when the remote peer we're downloading from goes offline. Would it make sense to increase the number of chunks here so we download as much data as we can? const MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD: usize = 3; -const WAIT_PEERS_TIMEOUT: Duration = Duration::from_secs(5); -const STATUS_TIMEOUT: Duration = Duration::from_secs(5); +/// Time to wait for snapshotting peers to show up with a snapshot we want to use. Beyond this time, +/// a single peer is enough to start downloading. Beware that if WAIT_PEERS_TIMEOUT is longer than +/// STATUS_TIMEOUT, peers will start to get disconnected. +const WAIT_PEERS_TIMEOUT: Duration = Duration::from_secs(8); +/// Time to wait for a peer to start being useful to us in some form. After this they are disconnected. +const STATUS_TIMEOUT: Duration = Duration::from_secs(10); const HEADERS_TIMEOUT: Duration = Duration::from_secs(15); const BODIES_TIMEOUT: Duration = Duration::from_secs(20); const RECEIPTS_TIMEOUT: Duration = Duration::from_secs(10); const FORK_HEADER_TIMEOUT: Duration = Duration::from_secs(3); -const SNAPSHOT_MANIFEST_TIMEOUT: Duration = Duration::from_secs(5); +/// Max time to wait for a peer to send us a snapshot manifest. +const SNAPSHOT_MANIFEST_TIMEOUT: Duration = Duration::from_secs(15); const SNAPSHOT_DATA_TIMEOUT: Duration = Duration::from_secs(120); const PRIVATE_STATE_TIMEOUT: Duration = Duration::from_secs(120); @@ -804,13 +809,13 @@ impl ChainSync { self.active_peers.remove(&peer_id); } - // Called once per second. + /// Decide if we should start downloading a snapshot and from who. Called once per second. fn maybe_start_snapshot_sync(&mut self, io: &mut dyn SyncIo) { if !self.warp_sync.is_enabled() || io.snapshot_service().supported_versions().is_none() { -// trace!(target: "snapshot_sync", "Skipping warp sync. Disabled or not supported."); return; } - if self.state != SyncState::WaitingPeers && self.state != SyncState::Blocks && self.state != SyncState::Waiting { + use SyncState::*; + if self.state != WaitingPeers && self.state != Blocks && self.state != Waiting { trace!(target: "snapshot_sync", "Skipping warp sync. State: {:?}", self.state); return; } @@ -819,14 +824,14 @@ impl ChainSync { let our_best_block = io.chain().chain_info().best_block_number; let fork_block = self.fork_block.map_or(0, |(n, _)| n); - let (best_hash, max_peers, snapshot_peers) = { - let expected_warp_block = match self.warp_sync { - WarpSync::OnlyAndAfter(block) => block, - _ => 0, - }; + let expected_warp_block = match self.warp_sync { + WarpSync::OnlyAndAfter(block) => block, + _ => 0, + }; + let (best_snapshot_block, best_hash, max_peers, snapshot_peers) = { // Collect snapshot info from peers: filter out expired peers and peers from whom we do // not have fork confirmation. - let snapshots = self.peers.iter() + let mut snapshots = self.peers.iter() .filter(|&(_, p)| p.is_allowed() && p.snapshot_number.map_or(false, |sn| // Snapshot must be sufficiently better than what we have that it's useful to // sync with it: more than 30k blocks beyond our best block @@ -842,44 +847,71 @@ impl ChainSync { highest < sn || (highest - sn) <= SNAPSHOT_RESTORE_THRESHOLD }) )) - .filter_map(|(p, peer)| peer.snapshot_hash.map(|hash| (p, hash.clone()))) - .filter(|&(p, ref hash)| { + .filter_map(|(p, peer)| { + peer.snapshot_hash.map(|hash| (p, hash.clone())) + .and_then(|(p, hash)| peer.snapshot_number.map(|n| (*p, n, hash) ) ) + }) + .filter(|&(p, snapshot_block, ref hash)| { if self.snapshot.is_known_bad(hash) { - trace!(target: "snapshot_sync", "{}: Snapshot with hash {:?} is known to have failed before; not trying again", p, hash); + trace!(target: "snapshot_sync", "{}: Snapshot at #{} with hash {:?} is known to have failed before; not trying again", p, snapshot_block, hash); false } else { true } - }); + }) + .collect::>(); + // Sort collection of peers by highest block number + snapshots.sort_by(|&(_, ref b1, _), &(_, ref b2, _)| b2.cmp(b1) ); let mut snapshot_peers = HashMap::new(); let mut max_peers: usize = 0; let mut best_hash = None; - for (p, hash) in snapshots { + let mut best_snapshot_block = None; + // todo[dvdplm] I don't understand what this tries to achieve. It looks like it's written expecting multiple candidates with the same snapshot and aims to pick the one with the most seeders. + for (p, snapshot_block, hash) in snapshots { let peers = snapshot_peers.entry(hash).or_insert_with(Vec::new); - peers.push(*p); + peers.push(p); if peers.len() > max_peers { + trace!(target: "snapshot_sync", "{} is the new best snapshotting peer, has snapshot at block #{}/{}", p, snapshot_block, hash); max_peers = peers.len(); best_hash = Some(hash); + best_snapshot_block = Some(snapshot_block); } } - (best_hash, max_peers, snapshot_peers) + (best_snapshot_block, best_hash, max_peers, snapshot_peers) }; - - let timeout = (self.state == SyncState::WaitingPeers) && self.sync_start_time.map_or(false, |t| t.elapsed() > WAIT_PEERS_TIMEOUT); - - if let (Some(hash), Some(peers)) = (best_hash, best_hash.map_or(None, |h| snapshot_peers.get(&h))) { + // If we've waited long enough (8sec), a single peer will have to be enough for the snapshot sync to start. + let timeout = (self.state == WaitingPeers) && + self.sync_start_time.map_or(false, |t| t.elapsed() > WAIT_PEERS_TIMEOUT); + + + if let (Some(block), Some(hash), Some(peers)) = ( + best_snapshot_block, + best_hash, + best_hash.map_or(None, |h| snapshot_peers.get(&h)) + ) { + trace!(target: "snapshot_sync", "We can sync a snapshot at #{:?}/{:?} from {} peer(s): {:?}", + best_snapshot_block, best_hash, max_peers, snapshot_peers.values()); if max_peers >= SNAPSHOT_MIN_PEERS { - debug!(target: "snapshot_sync", "Starting confirmed snapshot sync {:?} with {:?}", hash, peers); + debug!(target: "snapshot_sync", "Starting confirmed snapshot sync for a snapshot at #{}/{:?} with peer {:?}", block, hash, peers); self.start_snapshot_sync(io, peers); } else if timeout { - debug!(target: "snapshot_sync", "Starting unconfirmed snapshot sync {:?} with {:?}", hash, peers); + debug!(target: "snapshot_sync", "Starting unconfirmed snapshot sync for a snapshot at #{}/{:?} with peer {:?}", block, hash, peers); self.start_snapshot_sync(io, peers); + } else { + trace!(target: "snapshot_sync", "Waiting a little more to let more snapshot peers connect.") } - } else if timeout && !self.warp_sync.is_warp_only() { - debug!(target: "snapshot_sync", "No snapshots found, starting full sync"); - self.set_state(SyncState::Idle); - self.continue_sync(io); + } else if timeout { + if !self.warp_sync.is_warp_only() { + debug!(target: "snapshot_sync", "No snapshots found, starting full sync"); + self.set_state(SyncState::Idle); + self.continue_sync(io); + } else { + warn!(target: "snapshot_sync", "No snapshots available at #{}. Try using a smaller value for --warp-barrier", expected_warp_block); + } + } else { + // todo[dvdplm] remove this, it's debug only + trace!(target: "snapshot_sync", "Time to wait for snapshot peers is NOT expired and we do NOT have a snapshot peer (best_block={:?}, best_hash={:?}, peers.len={:?}).", best_snapshot_block, best_hash, snapshot_peers.len()) } } @@ -1004,13 +1036,13 @@ impl ChainSync { let higher_difficulty = peer_difficulty.map_or(true, |pd| pd > syncing_difficulty); if force || higher_difficulty || self.old_blocks.is_some() { match self.state { - SyncState::WaitingPeers => { + SyncState::WaitingPeers if peer_snapshot_number > 0 => { trace!( target: "snapshot_sync", - "Can we snapshot from them? Their highest block: #{} vs our highest: #{} (peer: {}, {})", + "{}: Potential snapshot sync peer; their highest block: #{} vs our highest: #{} (peer: {})", + peer_id, peer_snapshot_number, chain_info.best_block_number, - peer_id, io.peer_enode(peer_id), ); self.maybe_start_snapshot_sync(io); @@ -1082,7 +1114,8 @@ impl ChainSync { }, SyncState::SnapshotManifest | //already downloading from other peer SyncState::Waiting | - SyncState::SnapshotWaiting => () + SyncState::SnapshotWaiting => (), + _ => () } } else { trace!(target: "sync", "Skipping peer {}, force={}, td={:?}, our td={}, state={:?}", peer_id, force, peer_difficulty, syncing_difficulty, self.state); diff --git a/util/network-devp2p/src/host.rs b/util/network-devp2p/src/host.rs index dba69e33773..47c2d5321c9 100644 --- a/util/network-devp2p/src/host.rs +++ b/util/network-devp2p/src/host.rs @@ -37,7 +37,6 @@ use mio::{ use parity_path::restrict_permissions_owner; use parking_lot::{Mutex, RwLock}; use rlp::{Encodable, RlpStream}; -use rustc_hex::ToHex; use ethcore_io::{IoContext, IoHandler, IoManager, StreamToken, TimerToken}; use parity_crypto::publickey::{Generator, KeyPair, Random, Secret}; From 9e79bc0c301f3846b4236cefa54b6a3e908eb667 Mon Sep 17 00:00:00 2001 From: David Palm Date: Fri, 25 Oct 2019 22:31:03 +0200 Subject: [PATCH 05/27] Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems Docs, todos, comments --- ethcore/sync/src/api.rs | 38 +++++++++++++++++++++++++++++------ ethcore/sync/src/chain/mod.rs | 38 +++++++++++++++++++++++++---------- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/ethcore/sync/src/api.rs b/ethcore/sync/src/api.rs index a8a7a99fc3c..6a5d49cd83a 100644 --- a/ethcore/sync/src/api.rs +++ b/ethcore/sync/src/api.rs @@ -529,13 +529,39 @@ impl NetworkProtocolHandler for SyncProtocolHandler { trace_time!("sync::timeout"); let mut io = NetSyncIo::new(io, &*self.chain, &*self.snapshot_service, &self.overlay, self.private_state.clone()); match timer { - PEERS_TIMER => self.sync.write().maintain_peers(&mut io), - MAINTAIN_SYNC_TIMER => self.sync.write().maintain_sync(&mut io), - CONTINUE_SYNC_TIMER => self.sync.write().continue_sync(&mut io), - TX_TIMER => self.sync.write().propagate_new_transactions(&mut io), - PRIORITY_TIMER => self.sync.process_priority_queue(&mut io), - _ => warn!("Unknown timer {} triggered.", timer), + PEERS_TIMER =>{ + trace!(target:"dp", " PEERS_TIMER –> maintain_peers()"); + self.sync.write().maintain_peers(&mut io) + }, + MAINTAIN_SYNC_TIMER =>{ + trace!(target:"dp", " MAINTAIN_SYNC_TIMER -> maintain_sync"); + self.sync.write().maintain_sync(&mut io) + } + CONTINUE_SYNC_TIMER =>{ + trace!(target:"dp", " CONTINUE_SYNC_TIMER -> continue_sync"); + self.sync.write().continue_sync(&mut io) + } + TX_TIMER =>{ + trace!(target:"dp", " TX_TIMER -> propagate tx"); + self.sync.write().propagate_new_transactions(&mut io) + } + PRIORITY_TIMER =>{ + trace!(target:"dp", " PRIORITY_TIMER -> process_priority_queue"); + self.sync.process_priority_queue(&mut io) + } + _ =>{ + warn!("Unknown timer {} triggered.", timer) + } } +// todo[dvdplm]: put back +// match timer { +// PEERS_TIMER => self.sync.write().maintain_peers(&mut io), +// MAINTAIN_SYNC_TIMER => self.sync.write().maintain_sync(&mut io), +// CONTINUE_SYNC_TIMER => self.sync.write().continue_sync(&mut io), +// TX_TIMER => self.sync.write().propagate_new_transactions(&mut io), +// PRIORITY_TIMER => self.sync.process_priority_queue(&mut io), +// _ => warn!("Unknown timer {} triggered.", timer), +// } } } diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 787632b4b17..7888b3d2728 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -182,8 +182,12 @@ const MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD: usize = 3; /// Time to wait for snapshotting peers to show up with a snapshot we want to use. Beyond this time, /// a single peer is enough to start downloading. Beware that if WAIT_PEERS_TIMEOUT is longer than /// STATUS_TIMEOUT, peers will start to get disconnected. -const WAIT_PEERS_TIMEOUT: Duration = Duration::from_secs(8); -/// Time to wait for a peer to start being useful to us in some form. After this they are disconnected. +/// +/// In general, be cautious about how these intervals interact with the timers set up in `SyncApi` +/// (`PEERS_TIMER`, `CONTINUE_SYNC_TIMER`, `MAINTAIN_SYNC_TIMER`). +const WAIT_PEERS_TIMEOUT: Duration = Duration::from_millis(7500); // todo[dvdplm]: have seen very unreliable behaviour, possibly related to this timer. Sometimes all peers seem gone juuuust when the peers-collection-phase is over and we're stuck believing there are no peers to snapshot from. It is not repeatable reliably but my best hypothesis is it is connected to the duration of the MAINTAIN_SYNC_TIMER set to 2.5sec. Snapshot seems to start reliably if this is set to a multiple of 2.5. But who knows, there are plenty of timers firing from several threads. Locks are involved. +/// Time to wait for a peer to start being useful to us in some form. After this they are +/// disconnected. const STATUS_TIMEOUT: Duration = Duration::from_secs(10); const HEADERS_TIMEOUT: Duration = Duration::from_secs(15); const BODIES_TIMEOUT: Duration = Duration::from_secs(20); @@ -462,6 +466,7 @@ impl ChainSyncApi { /// /// NOTE This method should only handle stuff that can be canceled and would reach other peers /// by other means. + /// Called every `PRIORITY_TIMER` (0.25sec) pub fn process_priority_queue(&self, io: &mut dyn SyncIo) { fn check_deadline(deadline: Instant) -> Option { let now = Instant::now(); @@ -752,7 +757,7 @@ impl ChainSync { receiver } - /// notify all subscibers of a new SyncState + /// Notify all subscribers of a new SyncState fn notify_sync_state(&mut self, state: SyncState) { // remove any sender whose receiving end has been dropped self.status_sinks.retain(|sender| { @@ -847,10 +852,12 @@ impl ChainSync { highest < sn || (highest - sn) <= SNAPSHOT_RESTORE_THRESHOLD }) )) +// .inspect(|(p, peer_info)| trace!(target: "dp", "{} has #{:?} (passed block height checks)", p, peer_info.snapshot_number)) .filter_map(|(p, peer)| { peer.snapshot_hash.map(|hash| (p, hash.clone())) .and_then(|(p, hash)| peer.snapshot_number.map(|n| (*p, n, hash) ) ) }) +// .inspect(|(p, b, h)| trace!(target: "dp", "{} has #{} (passed transform into tuple)", p, b)) .filter(|&(p, snapshot_block, ref hash)| { if self.snapshot.is_known_bad(hash) { trace!(target: "snapshot_sync", "{}: Snapshot at #{} with hash {:?} is known to have failed before; not trying again", p, snapshot_block, hash); @@ -859,10 +866,12 @@ impl ChainSync { true } }) +// .inspect(|(p, b, h)| trace!(target: "dp", "{} has #{} (passed known-bad checks)", p, b)) .collect::>(); // Sort collection of peers by highest block number snapshots.sort_by(|&(_, ref b1, _), &(_, ref b2, _)| b2.cmp(b1) ); + trace!(target:"dp", "Snapshot candidates={:?}", snapshots); let mut snapshot_peers = HashMap::new(); let mut max_peers: usize = 0; let mut best_hash = None; @@ -908,10 +917,12 @@ impl ChainSync { self.continue_sync(io); } else { warn!(target: "snapshot_sync", "No snapshots available at #{}. Try using a smaller value for --warp-barrier", expected_warp_block); + // todo[dvdplm] remove before merge, debug only + trace!(target: "dp", "No snapshots available at #{}; time to wait for snapshot peers is up and we do NOT have a snapshot peer: (best_block={:?}, best_hash={:?}, peers.len={:?}, state={:?}, peers.len={}).", expected_warp_block, best_snapshot_block, best_hash, snapshot_peers.len(), self.state, self.peers.len()) } } else { // todo[dvdplm] remove this, it's debug only - trace!(target: "snapshot_sync", "Time to wait for snapshot peers is NOT expired and we do NOT have a snapshot peer (best_block={:?}, best_hash={:?}, peers.len={:?}).", best_snapshot_block, best_hash, snapshot_peers.len()) + trace!(target: "snapshot_sync", "Time to wait for snapshot peers is NOT expired and we do NOT have a snapshot peer (best_block={:?}, best_hash={:?}, peers.len={:?}, state={:?}, peers.len={}).", best_snapshot_block, best_hash, snapshot_peers.len(), self.state, self.peers.len()); } } @@ -959,7 +970,8 @@ impl ChainSync { } } - /// Resume downloading + /// Resume downloading. + /// Called every `CONTINUE_SYNC_TIMER` (2.5sec) pub fn continue_sync(&mut self, io: &mut dyn SyncIo) { if self.state == SyncState::Waiting { trace!(target: "sync", "Waiting for the block queue"); @@ -992,9 +1004,8 @@ impl ChainSync { } } - if - (self.state == SyncState::Blocks || self.state == SyncState::NewBlocks) && - !self.peers.values().any(|p| p.asking != PeerAsking::Nothing && p.block_set != Some(BlockSet::OldBlocks) && p.can_sync()) + if (self.state == SyncState::Blocks || self.state == SyncState::NewBlocks) + && !self.peers.values().any(|p| p.asking != PeerAsking::Nothing && p.block_set != Some(BlockSet::OldBlocks) && p.can_sync()) { self.complete_sync(io); } @@ -1243,6 +1254,9 @@ impl ChainSync { io.respond(StatusPacket.id(), packet.out()) } + /// Check if any tasks we have on-going with a peer is taking too long (if so, disconnect them). + /// Also checks handshaking peers. + /// Called every `PEERS_TIMER` (0.7sec). pub fn maintain_peers(&mut self, io: &mut dyn SyncIo) { let tick = Instant::now(); let mut aborting = Vec::new(); @@ -1259,7 +1273,7 @@ impl ChainSync { PeerAsking::PrivateState => elapsed > PRIVATE_STATE_TIMEOUT, }; if timeout { - debug!(target:"sync", "Timeout {}", peer_id); + debug!(target:"sync", "Peer {} timeout while we were asking them for {:?}; disconnecting.", peer_id, peer.asking); io.disconnect_peer(*peer_id); aborting.push(*peer_id); } @@ -1375,7 +1389,8 @@ impl ChainSync { ).collect() } - /// Maintain other peers. Send out any new blocks and transactions + /// Maintain other peers. Send out any new blocks and transactions. Called every + /// `MAINTAIN_SYNC_TIMER` (1.1sec). pub fn maintain_sync(&mut self, io: &mut dyn SyncIo) { self.maybe_start_snapshot_sync(io); self.check_resume(io); @@ -1422,7 +1437,8 @@ impl ChainSync { SyncHandler::on_peer_connected(self, io, peer); } - /// propagates new transactions to all peers + /// Propagates new transactions to all peers. + /// Called every `TX_TIMER` (1.3sec). pub fn propagate_new_transactions(&mut self, io: &mut dyn SyncIo) { let deadline = Instant::now() + Duration::from_millis(500); SyncPropagator::propagate_new_transactions(self, io, || { From ec6e4a56e2ccfda48a592c47994b52bdeba2401d Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 18:46:36 +0100 Subject: [PATCH 06/27] Tabs --- util/network/src/lib.rs | 64 ++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/util/network/src/lib.rs b/util/network/src/lib.rs index 42bc8d6c3ee..e8ec1cf75a7 100644 --- a/util/network/src/lib.rs +++ b/util/network/src/lib.rs @@ -393,42 +393,42 @@ impl NonReservedPeerMode { #[derive(Clone, Debug, PartialEq, Eq)] pub struct IpFilter { - pub predefined: AllowIP, - pub custom_allow: Vec, - pub custom_block: Vec, + pub predefined: AllowIP, + pub custom_allow: Vec, + pub custom_block: Vec, } impl Default for IpFilter { - fn default() -> Self { - IpFilter { - predefined: AllowIP::All, - custom_allow: vec![], - custom_block: vec![], - } - } + fn default() -> Self { + IpFilter { + predefined: AllowIP::All, + custom_allow: vec![], + custom_block: vec![], + } + } } impl IpFilter { - /// Attempt to parse the peer mode from a string. - pub fn parse(s: &str) -> Result { - let mut filter = IpFilter::default(); - for f in s.split_whitespace() { - match f { - "all" => filter.predefined = AllowIP::All, - "private" => filter.predefined = AllowIP::Private, - "public" => filter.predefined = AllowIP::Public, - "none" => filter.predefined = AllowIP::None, - custom => { - if custom.starts_with("-") { - filter.custom_block.push(IpNetwork::from_str(&custom.to_owned().split_off(1))?) - } else { - filter.custom_allow.push(IpNetwork::from_str(custom)?) - } - } - } - } - Ok(filter) - } + /// Attempt to parse the peer mode from a string. + pub fn parse(s: &str) -> Result { + let mut filter = IpFilter::default(); + for f in s.split_whitespace() { + match f { + "all" => filter.predefined = AllowIP::All, + "private" => filter.predefined = AllowIP::Private, + "public" => filter.predefined = AllowIP::Public, + "none" => filter.predefined = AllowIP::None, + custom => { + if custom.starts_with("-") { + filter.custom_block.push(IpNetwork::from_str(&custom.to_owned().split_off(1))?) + } else { + filter.custom_allow.push(IpNetwork::from_str(custom)?) + } + } + } + } + Ok(filter) + } } /// IP fiter @@ -440,6 +440,6 @@ pub enum AllowIP { Private, /// Connect to public network only Public, - /// Block all addresses - None, + /// Block all addresses + None, } From 74ce5e8c479f36cdc3783ce99dc994e73f76c8ae Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 18:47:06 +0100 Subject: [PATCH 07/27] Formatting --- ethcore/sync/src/tests/helpers.rs | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ethcore/sync/src/tests/helpers.rs b/ethcore/sync/src/tests/helpers.rs index 2e91e2490c2..bf7c155bd2a 100644 --- a/ethcore/sync/src/tests/helpers.rs +++ b/ethcore/sync/src/tests/helpers.rs @@ -114,25 +114,17 @@ impl<'p, C> SyncIo for TestIo<'p, C> where C: FlushingBlockChainClient, C: 'p { self.to_disconnect.insert(peer_id); } - fn is_expired(&self) -> bool { - false - } - fn respond(&mut self, packet_id: PacketId, data: Vec) -> Result<(), network::Error> { - self.packets.push(TestPacket { - data: data, - packet_id: packet_id, - recipient: self.sender.unwrap() - }); + self.packets.push( + TestPacket { data, packet_id, recipient: self.sender.unwrap() } + ); Ok(()) } fn send(&mut self,peer_id: PeerId, packet_id: SyncPacket, data: Vec) -> Result<(), network::Error> { - self.packets.push(TestPacket { - data, - packet_id: packet_id.id(), - recipient: peer_id, - }); + self.packets.push( + TestPacket { data, packet_id: packet_id.id(), recipient: peer_id } + ); Ok(()) } @@ -140,6 +132,14 @@ impl<'p, C> SyncIo for TestIo<'p, C> where C: FlushingBlockChainClient, C: 'p { &*self.chain } + fn snapshot_service(&self) -> &dyn SnapshotService { + self.snapshot_service + } + + fn private_state(&self) -> Option> { + self.private_state_db.clone() + } + fn peer_version(&self, peer_id: PeerId) -> ClientVersion { self.peers_info.get(&peer_id) .cloned() @@ -147,12 +147,8 @@ impl<'p, C> SyncIo for TestIo<'p, C> where C: FlushingBlockChainClient, C: 'p { .into() } - fn snapshot_service(&self) -> &dyn SnapshotService { - self.snapshot_service - } - - fn private_state(&self) -> Option> { - self.private_state_db.clone() + fn peer_enode(&self, _peer_id: usize) -> String { + unimplemented!() } fn peer_session_info(&self, _peer_id: PeerId) -> Option { @@ -167,6 +163,10 @@ impl<'p, C> SyncIo for TestIo<'p, C> where C: FlushingBlockChainClient, C: 'p { if protocol == &WARP_SYNC_PROTOCOL_ID { PAR_PROTOCOL_VERSION_4.0 } else { self.eth_protocol_version(peer_id) } } + fn is_expired(&self) -> bool { + false + } + fn chain_overlay(&self) -> &RwLock> { &self.overlay } From 241675f3e97422dabef1864e6bef358db345869d Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 18:48:00 +0100 Subject: [PATCH 08/27] Don't build new rlp::EMPTY_LIST_RLP instances --- ethcore/sync/src/chain/requester.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ethcore/sync/src/chain/requester.rs b/ethcore/sync/src/chain/requester.rs index f4b974a3557..59a2aaf2fb1 100644 --- a/ethcore/sync/src/chain/requester.rs +++ b/ethcore/sync/src/chain/requester.rs @@ -91,7 +91,7 @@ impl SyncRequester { pub fn request_snapshot_data(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId) { // find chunk data to download if let Some(hash) = sync.snapshot.needed_chunk() { - if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) { + if let Some(mut peer) = sync.peers.get_mut(&peer_id) { peer.asking_snapshot_data = Some(hash.clone()); } SyncRequester::request_snapshot_chunk(sync, io, peer_id, &hash); @@ -101,8 +101,7 @@ impl SyncRequester { /// Request snapshot manifest from a peer. pub fn request_snapshot_manifest(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId) { trace!(target: "sync", "{}: requesting a snapshot manifest", peer_id); - let rlp = RlpStream::new_list(0); // todo[dvdplm]: make this a const - SyncRequester::send_request(sync, io, peer_id, PeerAsking::SnapshotManifest, GetSnapshotManifestPacket, rlp.out()); + SyncRequester::send_request(sync, io, peer_id, PeerAsking::SnapshotManifest, GetSnapshotManifestPacket, rlp::EMPTY_LIST_RLP.to_vec()); } pub fn request_private_state(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, hash: &H256) { From b8a150437de0aa1607a1eebb4dfce276d9c5acb4 Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 18:49:00 +0100 Subject: [PATCH 09/27] Dial down debug logging --- ethcore/sync/src/chain/supplier.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/sync/src/chain/supplier.rs b/ethcore/sync/src/chain/supplier.rs index cd322106247..4bc407ea286 100644 --- a/ethcore/sync/src/chain/supplier.rs +++ b/ethcore/sync/src/chain/supplier.rs @@ -116,7 +116,7 @@ impl SyncSupplier { debug!(target:"sync", "Unexpected packet {} from unregistered peer: {}:{}", packet_id, peer, io.peer_version(peer)); return; } - debug!(target: "sync", "{} -> Dispatching packet: {}", peer, packet_id); + trace!(target: "sync", "{} -> Dispatching packet: {}", peer, packet_id); match id { ConsensusDataPacket => { From 956e2379a0475a9696813ef7182cfc4412b81f89 Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 18:50:10 +0100 Subject: [PATCH 10/27] Don't warn about missing hashes in the manifest: it's normal Log client version on peer connect --- ethcore/snapshot/src/service.rs | 1 - ethcore/sync/src/chain/handler.rs | 8 +++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ethcore/snapshot/src/service.rs b/ethcore/snapshot/src/service.rs index da84482db71..112be08db3b 100644 --- a/ethcore/snapshot/src/service.rs +++ b/ethcore/snapshot/src/service.rs @@ -676,7 +676,6 @@ impl Service where C: SnapshotClient + ChainInfo { } else if manifest.state_hashes.contains(&hash) { true } else { - warn!(target: "snapshot", "Hash of the content of {:?} not present in the manifest block/state hashes.", path); return Ok(false); }; diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index 81a22d85c8e..e7e67c1c97a 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -572,7 +572,7 @@ impl SyncHandler { let warp_protocol = warp_protocol_version != 0; let private_tx_protocol = warp_protocol_version >= PAR_PROTOCOL_VERSION_3.0; let peer = PeerInfo { - protocol_version: protocol_version, + protocol_version, network_id: r.val_at(1)?, difficulty: Some(r.val_at(2)?), latest_hash: r.val_at(3)?, @@ -601,7 +601,8 @@ impl SyncHandler { latest:{}, \ genesis:{}, \ snapshot:{:?}, \ - private_tx_enabled:{})", + private_tx_enabled:{}, \ + client_version: {})", peer_id, peer.protocol_version, peer.network_id, @@ -609,7 +610,8 @@ impl SyncHandler { peer.latest_hash, peer.genesis, peer.snapshot_number, - peer.private_tx_enabled + peer.private_tx_enabled, + peer.client_version, ); if io.is_expired() { trace!(target: "sync", "Status packet from expired session {}:{}", peer_id, io.peer_version(peer_id)); From f7c7a8a753bafc6782d7e0e2b1886d3761e2245b Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 19:47:40 +0100 Subject: [PATCH 11/27] Cleanup --- ethcore/sync/src/api.rs | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/ethcore/sync/src/api.rs b/ethcore/sync/src/api.rs index 6a5d49cd83a..a8a7a99fc3c 100644 --- a/ethcore/sync/src/api.rs +++ b/ethcore/sync/src/api.rs @@ -529,39 +529,13 @@ impl NetworkProtocolHandler for SyncProtocolHandler { trace_time!("sync::timeout"); let mut io = NetSyncIo::new(io, &*self.chain, &*self.snapshot_service, &self.overlay, self.private_state.clone()); match timer { - PEERS_TIMER =>{ - trace!(target:"dp", " PEERS_TIMER –> maintain_peers()"); - self.sync.write().maintain_peers(&mut io) - }, - MAINTAIN_SYNC_TIMER =>{ - trace!(target:"dp", " MAINTAIN_SYNC_TIMER -> maintain_sync"); - self.sync.write().maintain_sync(&mut io) - } - CONTINUE_SYNC_TIMER =>{ - trace!(target:"dp", " CONTINUE_SYNC_TIMER -> continue_sync"); - self.sync.write().continue_sync(&mut io) - } - TX_TIMER =>{ - trace!(target:"dp", " TX_TIMER -> propagate tx"); - self.sync.write().propagate_new_transactions(&mut io) - } - PRIORITY_TIMER =>{ - trace!(target:"dp", " PRIORITY_TIMER -> process_priority_queue"); - self.sync.process_priority_queue(&mut io) - } - _ =>{ - warn!("Unknown timer {} triggered.", timer) - } + PEERS_TIMER => self.sync.write().maintain_peers(&mut io), + MAINTAIN_SYNC_TIMER => self.sync.write().maintain_sync(&mut io), + CONTINUE_SYNC_TIMER => self.sync.write().continue_sync(&mut io), + TX_TIMER => self.sync.write().propagate_new_transactions(&mut io), + PRIORITY_TIMER => self.sync.process_priority_queue(&mut io), + _ => warn!("Unknown timer {} triggered.", timer), } -// todo[dvdplm]: put back -// match timer { -// PEERS_TIMER => self.sync.write().maintain_peers(&mut io), -// MAINTAIN_SYNC_TIMER => self.sync.write().maintain_sync(&mut io), -// CONTINUE_SYNC_TIMER => self.sync.write().continue_sync(&mut io), -// TX_TIMER => self.sync.write().propagate_new_transactions(&mut io), -// PRIORITY_TIMER => self.sync.process_priority_queue(&mut io), -// _ => warn!("Unknown timer {} triggered.", timer), -// } } } From 3ab901785ce82a2dc7b15229f0d2a235b2ca55b5 Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 19:47:51 +0100 Subject: [PATCH 12/27] Do not skip snapshots further away than 30k block from the highest block seen Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync. When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot. This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec. --- ethcore/sync/src/chain/mod.rs | 88 +++++++++++++++-------------------- 1 file changed, 37 insertions(+), 51 deletions(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 7888b3d2728..07179c26ad3 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -115,7 +115,7 @@ use ethereum_types::{H256, U256}; use fastmap::{H256FastMap, H256FastSet}; use futures::sync::mpsc as futures_mpsc; use keccak_hash::keccak; -use log::{error, trace, debug, warn}; +use log::{error, trace, debug}; use network::client_version::ClientVersion; use network::{self, PeerId, PacketId}; use parity_util_mem::{MallocSizeOfExt, malloc_size_of_is_0}; @@ -180,12 +180,8 @@ const SNAPSHOT_MIN_PEERS: usize = 3; const MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD: usize = 3; /// Time to wait for snapshotting peers to show up with a snapshot we want to use. Beyond this time, -/// a single peer is enough to start downloading. Beware that if WAIT_PEERS_TIMEOUT is longer than -/// STATUS_TIMEOUT, peers will start to get disconnected. -/// -/// In general, be cautious about how these intervals interact with the timers set up in `SyncApi` -/// (`PEERS_TIMER`, `CONTINUE_SYNC_TIMER`, `MAINTAIN_SYNC_TIMER`). -const WAIT_PEERS_TIMEOUT: Duration = Duration::from_millis(7500); // todo[dvdplm]: have seen very unreliable behaviour, possibly related to this timer. Sometimes all peers seem gone juuuust when the peers-collection-phase is over and we're stuck believing there are no peers to snapshot from. It is not repeatable reliably but my best hypothesis is it is connected to the duration of the MAINTAIN_SYNC_TIMER set to 2.5sec. Snapshot seems to start reliably if this is set to a multiple of 2.5. But who knows, there are plenty of timers firing from several threads. Locks are involved. +/// a single peer is enough to start downloading. +const WAIT_PEERS_TIMEOUT: Duration = Duration::from_secs(10); /// Time to wait for a peer to start being useful to us in some form. After this they are /// disconnected. const STATUS_TIMEOUT: Duration = Duration::from_secs(10); @@ -329,8 +325,7 @@ pub struct PeerInfo { latest_hash: H256, /// Peer total difficulty if known difficulty: Option, - // todo[dvdplm]: clarify if these "asking*" fields are things **we** ask **them** for or vice versa - /// Type of data currently being requested from peer. + /// Type of data currently being requested by us from a peer. asking: PeerAsking, /// A set of block numbers being requested asking_blocks: Vec, @@ -810,7 +805,7 @@ impl ChainSync { /// Remove peer from active peer set. Peer will be reactivated on the next sync /// round. fn deactivate_peer(&mut self, _io: &mut dyn SyncIo, peer_id: PeerId) { - trace!(target: "sync", "Deactivating peer {}", peer_id); + debug!(target: "sync", "Deactivating peer {}", peer_id); self.active_peers.remove(&peer_id); } @@ -821,7 +816,6 @@ impl ChainSync { } use SyncState::*; if self.state != WaitingPeers && self.state != Blocks && self.state != Waiting { - trace!(target: "snapshot_sync", "Skipping warp sync. State: {:?}", self.state); return; } // Make sure the snapshot block is not too far away from best block and network best block and @@ -833,50 +827,38 @@ impl ChainSync { WarpSync::OnlyAndAfter(block) => block, _ => 0, }; + // Collect snapshot info from peers and check if we can use their snapshots to sync. let (best_snapshot_block, best_hash, max_peers, snapshot_peers) = { - // Collect snapshot info from peers: filter out expired peers and peers from whom we do - // not have fork confirmation. let mut snapshots = self.peers.iter() - .filter(|&(_, p)| p.is_allowed() && p.snapshot_number.map_or(false, |sn| - // Snapshot must be sufficiently better than what we have that it's useful to - // sync with it: more than 30k blocks beyond our best block - our_best_block < sn && (sn - our_best_block) > SNAPSHOT_RESTORE_THRESHOLD && - // Snapshot must have been taken after the fork block (if any is configured) - sn > fork_block && - // Snapshot must be greater than the warp barrier if any - sn > expected_warp_block && - // If we know what the highest block on the network is, snapshot must be recent - // enough: either it's better than the highest we've seen on the network or it's - // within 30k blocks of the best we've seen. - self.highest_block.map_or(true, |highest| { - highest < sn || (highest - sn) <= SNAPSHOT_RESTORE_THRESHOLD - }) - )) -// .inspect(|(p, peer_info)| trace!(target: "dp", "{} has #{:?} (passed block height checks)", p, peer_info.snapshot_number)) + .filter(|&(_, p)| + // filter out expired peers and peers from whom we do not have fork confirmation. + p.is_allowed() && + p.snapshot_number.map_or(false, |sn| + // Snapshot must be sufficiently better than what we have that it's useful to + // sync with it: more than 30k blocks beyond our best block + our_best_block < sn && (sn - our_best_block) > SNAPSHOT_RESTORE_THRESHOLD && + // Snapshot must have been taken after the fork block (if any is configured) + sn > fork_block && + // Snapshot must be greater than the warp barrier, if any + sn > expected_warp_block + ) + ) .filter_map(|(p, peer)| { - peer.snapshot_hash.map(|hash| (p, hash.clone())) + peer.snapshot_hash.map(|hash| (p, hash)) + .filter(|(_, hash)| !self.snapshot.is_known_bad(&hash) ) .and_then(|(p, hash)| peer.snapshot_number.map(|n| (*p, n, hash) ) ) }) -// .inspect(|(p, b, h)| trace!(target: "dp", "{} has #{} (passed transform into tuple)", p, b)) - .filter(|&(p, snapshot_block, ref hash)| { - if self.snapshot.is_known_bad(hash) { - trace!(target: "snapshot_sync", "{}: Snapshot at #{} with hash {:?} is known to have failed before; not trying again", p, snapshot_block, hash); - false - } else { - true - } - }) -// .inspect(|(p, b, h)| trace!(target: "dp", "{} has #{} (passed known-bad checks)", p, b)) .collect::>(); - // Sort collection of peers by highest block number + // Sort collection of peers by highest block number. snapshots.sort_by(|&(_, ref b1, _), &(_, ref b2, _)| b2.cmp(b1) ); - trace!(target:"dp", "Snapshot candidates={:?}", snapshots); + let mut snapshot_peers = HashMap::new(); let mut max_peers: usize = 0; let mut best_hash = None; let mut best_snapshot_block = None; - // todo[dvdplm] I don't understand what this tries to achieve. It looks like it's written expecting multiple candidates with the same snapshot and aims to pick the one with the most seeders. + // Of the available snapshots, find the one seeded by the most peers. On a tie, the + // snapshot closest to the tip will be used (unfortunately this is the common case). for (p, snapshot_block, hash) in snapshots { let peers = snapshot_peers.entry(hash).or_insert_with(Vec::new); peers.push(p); @@ -889,7 +871,7 @@ impl ChainSync { } (best_snapshot_block, best_hash, max_peers, snapshot_peers) }; - // If we've waited long enough (8sec), a single peer will have to be enough for the snapshot sync to start. + // If we've waited long enough (10sec), a single peer will have to be enough for the snapshot sync to start. let timeout = (self.state == WaitingPeers) && self.sync_start_time.map_or(false, |t| t.elapsed() > WAIT_PEERS_TIMEOUT); @@ -916,9 +898,11 @@ impl ChainSync { self.set_state(SyncState::Idle); self.continue_sync(io); } else { - warn!(target: "snapshot_sync", "No snapshots available at #{}. Try using a smaller value for --warp-barrier", expected_warp_block); - // todo[dvdplm] remove before merge, debug only - trace!(target: "dp", "No snapshots available at #{}; time to wait for snapshot peers is up and we do NOT have a snapshot peer: (best_block={:?}, best_hash={:?}, peers.len={:?}, state={:?}, peers.len={}).", expected_warp_block, best_snapshot_block, best_hash, snapshot_peers.len(), self.state, self.peers.len()) + // warn!(target: "snapshot_sync", "No snapshots currently available at #{}. Try using a smaller value for --warp-barrier", expected_warp_block); + // todo[dvdplm] remove before merge, debug only, then put the `warn!` line back. + trace!(target: "snapshot_sync", "No snapshots currently available at #{}; time to wait for snapshot peers is up and we do NOT have a snapshot peer: \ + (best_block={:?}, best_hash={:?}, snapshot_peers.len={:?}, state={:?}, peers.len={}).", + expected_warp_block, best_snapshot_block, best_hash, snapshot_peers.len(), self.state, self.peers.len()) } } else { // todo[dvdplm] remove this, it's debug only @@ -928,11 +912,14 @@ impl ChainSync { /// Start a snapshot with all peers that we are not currently asking something else from. If /// we're already snapshotting with a peer, set sync state to `SnapshotData` and continue - /// fetching the snapshot. + /// fetching the snapshot. Note that we only ever sync snapshots from one peer so here we send + /// out the request for a manifest to all the peers that have it and start syncing the snapshot + /// with the first that responds. fn start_snapshot_sync(&mut self, io: &mut dyn SyncIo, peers: &[PeerId]) { if !self.snapshot.have_manifest() { for p in peers { if self.peers.get(p).map_or(false, |p| p.asking == PeerAsking::Nothing) { + // When we get a response we call `SyncHandler::on_snapshot_manifest` SyncRequester::request_snapshot_manifest(self, io, *p); } } @@ -1133,10 +1120,9 @@ impl ChainSync { } } - /// Clear all blocks/headers marked as being downloaded by a peer. - // todo[dvdplm] ^^^ should be **from a peer** I think? + /// Clear all blocks/headers marked as being downloaded by us from a peer. fn clear_peer_download(&mut self, peer_id: PeerId) { - if let Some(ref peer) = self.peers.get(&peer_id) { + if let Some(peer) = self.peers.get(&peer_id) { match peer.asking { PeerAsking::BlockHeaders => { if let Some(ref hash) = peer.asking_hash { From 34cef68889d7219e28728408174158d394f4dc47 Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 19:58:02 +0100 Subject: [PATCH 13/27] lockfile --- Cargo.lock | 80 +++++++++++++++++++++++++++--------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 823091e8b15..8aae4548df4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,7 +145,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "ascii" -version = "0.7.1" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -189,7 +189,7 @@ dependencies = [ "ethjson 0.1.0", "itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "lru-cache 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "machine 0.1.0", @@ -392,7 +392,7 @@ source = "git+https://github.com/paritytech/bn#6beba2ed6c9351622f9e948ccee406384 dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "crunchy 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-hex 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -402,7 +402,7 @@ name = "bstr" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", @@ -442,7 +442,7 @@ name = "c2-chacha" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -549,7 +549,7 @@ dependencies = [ "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "ethjson 0.1.0", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "lru-cache 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "machine 0.1.0", @@ -582,10 +582,10 @@ dependencies = [ [[package]] name = "combine" -version = "3.6.1" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "ascii 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "ascii 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -644,7 +644,7 @@ dependencies = [ "criterion-plot 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_os 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -692,7 +692,7 @@ dependencies = [ "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "memoffset 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -716,7 +716,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -801,7 +801,7 @@ name = "derive_more" version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 0.4.20 (registry+https://github.com/rust-lang/crates.io-index)", "quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -845,7 +845,7 @@ name = "docopt" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)", @@ -876,7 +876,7 @@ dependencies = [ "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lunarity-lexer 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-hex 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1077,7 +1077,7 @@ dependencies = [ "kvdb 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb-memorydb 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb-rocksdb 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "machine 0.1.0", "macros 0.1.0", @@ -1281,7 +1281,7 @@ dependencies = [ "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.5.13 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1332,7 +1332,7 @@ dependencies = [ "ethcore-io 1.12.0", "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "ipnetwork 0.12.8 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", "parity-crypto 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "parity-snappy 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1449,7 +1449,7 @@ dependencies = [ "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb-rocksdb 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "parity-bytes 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "parity-crypto 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1650,7 +1650,7 @@ dependencies = [ "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "hex-literal 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "memory-cache 0.1.0", "parity-bytes 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1815,7 +1815,7 @@ name = "fs-swap" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", "libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2242,7 +2242,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cesu8 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "combine 3.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "jni-sys 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2343,7 +2343,7 @@ dependencies = [ "bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)", "globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "jsonrpc-core 14.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "tokio 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-codec 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2449,7 +2449,7 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -2963,7 +2963,7 @@ dependencies = [ "digest 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "hmac 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "parity-secp256k1 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "pbkdf2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3319,7 +3319,7 @@ dependencies = [ "ethcore-sync 1.12.0", "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "parity-bytes 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3372,7 +3372,7 @@ name = "parity-wordlist" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -3892,7 +3892,7 @@ dependencies = [ "crossbeam-deque 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.10.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -3967,7 +3967,7 @@ version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", "spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "untrusted 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3997,7 +3997,7 @@ name = "rlp_compress" version = "0.1.0" dependencies = [ "elastic-array 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "rlp 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -4274,7 +4274,7 @@ dependencies = [ "keccak-hasher 0.1.1", "kvdb 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb-rocksdb 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.10.1 (registry+https://github.com/rust-lang/crates.io-index)", "parity-bytes 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -4320,7 +4320,7 @@ dependencies = [ "keccak-hasher 0.1.1", "kvdb 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb-rocksdb 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "parity-bytes 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "parity-crypto 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -4574,7 +4574,7 @@ name = "thread_local" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -4714,7 +4714,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "mio 0.6.19 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.10.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -4774,7 +4774,7 @@ dependencies = [ "crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.10.1 (registry+https://github.com/rust-lang/crates.io-index)", "slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5071,7 +5071,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5098,7 +5098,7 @@ dependencies = [ "executive-state 0.1.0", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "machine 0.1.0", "memory-cache 0.1.0", @@ -5120,7 +5120,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "if_chain 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 0.4.20 (registry+https://github.com/rust-lang/crates.io-index)", "quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5390,7 +5390,7 @@ dependencies = [ "checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" "checksum arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" "checksum arrayvec 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" -"checksum ascii 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae7d751998c189c1d4468cf0a39bb2eae052a9c58d50ebb3b9591ee3813ad50" +"checksum ascii 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "eab1c04a571841102f5345a8fc0f6bb3d31c315dec879b5c6e42e40ce7ffa34e" "checksum assert_matches 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7deb0a829ca7bcfaf5da70b073a8d128619259a7be8216a355e23f00763059e5" "checksum attohttpc 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "eaf0ec4b0e00f61ee75556ca027485b7b354f4a714d88cc03f4468abd9378c86" "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" @@ -5428,7 +5428,7 @@ dependencies = [ "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cmake 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "6ec65ee4f9c9d16f335091d23693457ed4928657ba4982289d7fafee03bc614a" -"checksum combine 3.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fc1d011beeed29187b8db2ac3925c8dd4d3e87db463dc9d2d2833985388fc5bc" +"checksum combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "da3da6baa321ec19e1cc41d31bf599f00c783d0517095cdaf0332e3fe8d20680" "checksum const-random 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7b641a8c9867e341f3295564203b1c250eb8ce6cb6126e007941f78c4d2ed7fe" "checksum const-random-macro 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c750ec12b83377637110d5a57f5ae08e895b06c4b16e2bdbf1a94ef717428c59" "checksum criterion 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "938703e165481c8d612ea3479ac8342e5615185db37765162e762ec3523e2fc6" @@ -5537,7 +5537,7 @@ dependencies = [ "checksum kvdb 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "72ae89206cea31c32014b39d5a454b96135894221610dbfd19cf4d2d044fa546" "checksum kvdb-memorydb 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "296c12309ed36cb74d59206406adbf1971c3baa56d5410efdb508d8f1c60a351" "checksum kvdb-rocksdb 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96eb0e0112bb66fe5401294ca0f43c9cb771456af9270443545026e55fd00912" -"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" +"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum lazycell 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ddba4c30a78328befecec92fc94970e53b3ae385827d28620f0f5bb2493081e0" "checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba" "checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2" From fff43cc62973d0b8626a353aef640f7cc3c5a44f Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 22:44:26 +0100 Subject: [PATCH 14/27] Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going) Resolve some documentation todos, add more --- ethcore/snapshot/src/service.rs | 1 + ethcore/snapshot/src/traits.rs | 3 +-- ethcore/sync/src/chain/handler.rs | 12 +++++++++++- ethcore/sync/src/chain/mod.rs | 5 +++++ ethcore/sync/src/snapshot_sync.rs | 32 ++++++++++++++++--------------- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/ethcore/snapshot/src/service.rs b/ethcore/snapshot/src/service.rs index 112be08db3b..32f2251ec5b 100644 --- a/ethcore/snapshot/src/service.rs +++ b/ethcore/snapshot/src/service.rs @@ -787,6 +787,7 @@ impl Service where C: SnapshotClient + ChainInfo { false => Ok(()) } } + // todo[dvdplm] aren't we tossing away any errors from feed_state/feed_blocks here? And why the explicit `drop` here? other => other.map(drop), }; (res, db) diff --git a/ethcore/snapshot/src/traits.rs b/ethcore/snapshot/src/traits.rs index 21a15673e9d..3ed29134100 100644 --- a/ethcore/snapshot/src/traits.rs +++ b/ethcore/snapshot/src/traits.rs @@ -51,8 +51,7 @@ pub trait SnapshotService : Sync + Send { fn status(&self) -> RestorationStatus; /// Begin snapshot restoration. - /// If a restoration is in progress, this will reset it. - /// From this point on, any previous snapshot may become unavailable. todo[dvdplm] what does this mean? "unavailable" how? to others? + /// If a restoration is in progress, this will reset it and clear all data. fn begin_restore(&self, manifest: ManifestData); /// Abort an in-progress restoration if there is one. diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index e7e67c1c97a..c87b9f86098 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -124,6 +124,7 @@ impl SyncHandler { .next().is_some(); if !still_seeking_manifest { + warn!(target: "snapshot_sync", "The peer we were downloading a snapshot from ({}) went away. Retrying.", peer_id); sync.state = ChainSync::get_init_state(sync.warp_sync, io.chain()); } } @@ -514,7 +515,13 @@ impl SyncHandler { // only note bad if restoration failed. if let (Some(hash), RestorationStatus::Failed) = (sync.snapshot.snapshot_hash(), status) { - // todo[dvdplm]: how do we know we `Failed` because of them and not because something happened on our end, say disk was full? + // todo[dvdplm]: how do we know we `Failed` because of them and not because + // something happened on our end, say disk was full? + // On a second look I think `feed_chunk_with_restoration()`, the + // `SnapshotService` implementation, is indeed tossing away some errors. I think + // it should be refactored to distinguish the two cases of: restoration failed + // because the snapshot data is bad (corrupted, partial, malicious) and errors + // caused by the local env (out of disk seems like the obvious one). debug!(target: "snapshot_sync", "Marking snapshot manifest hash {} as bad", hash); sync.snapshot.note_bad(hash); } @@ -545,6 +552,9 @@ impl SyncHandler { trace!(target: "snapshot_sync", "{}: Processing state chunk", peer_id); io.snapshot_service().restore_state_chunk(hash, snapshot_data); } + Ok(ChunkType::Dupe(hash)) => { + warn!(target: "snapshot_sync", "{}: Duplicate chunk (hash {:?}).", peer_id, hash); + } Err(()) => { // todo[dvdplm] this seems wrong: we'll get `Err(())` back even if we happened to // have seen this chunk already. Should a peer be disconnected if they happen to diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 07179c26ad3..0b0b9f0d592 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -596,6 +596,11 @@ impl ChainSync { peers } + /// Reset the client to its initial state: + /// - if warp sync is enabled, start looking for peers to sync a snapshot from + /// - if `--warp-barrier` is used, ensure we're not synced beyond the barrier and start + /// looking for peers to sync a snapshot from + /// - otherwise, go `Idle`. fn get_init_state(warp_sync: WarpSync, chain: &dyn BlockChainClient) -> SyncState { let best_block = chain.chain_info().best_block_number; match warp_sync { diff --git a/ethcore/sync/src/snapshot_sync.rs b/ethcore/sync/src/snapshot_sync.rs index ae6d51bd5f4..0b7b999d038 100644 --- a/ethcore/sync/src/snapshot_sync.rs +++ b/ethcore/sync/src/snapshot_sync.rs @@ -30,6 +30,8 @@ pub enum ChunkType { State(H256), /// The chunk contains block data. Block(H256), + /// Already seen chunk + Dupe(H256) } #[derive(Default, MallocSizeOf)] @@ -46,8 +48,8 @@ pub struct Snapshot { completed_chunks: HashSet, /// The hash of the the `ManifestData` RLP that we're downloading. snapshot_hash: Option, - /// Set of chunk hashes we failed to import. - // todo[dvdplm]: check ^^^ + /// Set of snapshot hashes we failed to import. We will not try to sync with + /// this snapshot again until restart. bad_hashes: HashSet, initialized: bool, } @@ -108,7 +110,7 @@ impl Snapshot { let hash = keccak(chunk); if self.completed_chunks.contains(&hash) { trace!(target: "snapshot_sync", "Already proccessed chunk {:x}. Ignoring.", hash); - return Err(()); + return Ok(ChunkType::Dupe(hash)); } self.downloading_chunks.remove(&hash); if self.pending_block_chunks.iter().any(|h| h == &hash) { @@ -124,25 +126,28 @@ impl Snapshot { } /// Find a chunk to download - // todo[dvdplm] plenty of iterating through the pending `Vec`s here (which are never deleted from btw) + // todo[dvdplm] plenty of iterating through the pending `Vec`s here (which are never deleted + // from btw). Why not `pop()` the chunk hash from the pending_*_chunks? Audit the usage of those + // fields and see if we rely on them staying their initial size for some reason. + // `total_chunks()` do use them, need to re-work that. Same for `is_complete()`. pub fn needed_chunk(&mut self) -> Option { // Find next needed chunk: first block, then state chunks let chunk = { - let chunk_filter = |h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h); + let filter = |h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h); - let needed_block_chunk = self.pending_block_chunks.iter() - .filter(|&h| chunk_filter(h)) + let block_chunk = self.pending_block_chunks.iter() + .filter(|&h| filter(h)) .map(|h| *h) .next(); - // If no block chunks to download, get the state chunks - if needed_block_chunk.is_none() { + // If there is no block chunk to download, try the state chunks. + if block_chunk.is_none() { self.pending_state_chunks.iter() - .filter(|&h| chunk_filter(h)) + .filter(|&h| filter(h)) .map(|h| *h) .next() } else { - needed_block_chunk + block_chunk } }; @@ -162,25 +167,22 @@ impl Snapshot { self.bad_hashes.insert(hash); } - /// Whether snapshot hash is known to be bad. + /// Whether a snapshot hash is known to be bad. pub fn is_known_bad(&self, hash: &H256) -> bool { self.bad_hashes.contains(hash) } /// Hash of the snapshot we're currently downloading/importing. - // todo[dvdplm]: verify ^^^ pub fn snapshot_hash(&self) -> Option { self.snapshot_hash } /// Total number of chunks in the snapshot we're currently working on (state + block chunks). - // todo[dvdplm]: verify ^^^ pub fn total_chunks(&self) -> usize { self.pending_block_chunks.len() + self.pending_state_chunks.len() } /// Number of chunks we've processed so far (state and block chunks). - // todo[dvdplm]: verify ^^^ pub fn done_chunks(&self) -> usize { self.completed_chunks.len() } From f4ccf20d17be215648b4ecdc3950547b2520f3db Mon Sep 17 00:00:00 2001 From: David Palm Date: Mon, 28 Oct 2019 22:57:25 +0100 Subject: [PATCH 15/27] tweak log message --- ethcore/sync/src/chain/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 0b0b9f0d592..ff26e8088de 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -1101,7 +1101,7 @@ impl ChainSync { } }, RestorationStatus::Initializing { state_chunks, block_chunks, chunks_done } => { - debug!(target: "snapshot_sync", "Snapshot is still initializing: state chunks={}, block chunks={}, chunks done={}", + debug!(target: "snapshot_sync", "Snapshot is initializing: state chunks={}, block chunks={}, chunks done={}", state_chunks, block_chunks, chunks_done); return; }, From decb18c276c136363b4a89f3aec3b385f77810a3 Mon Sep 17 00:00:00 2001 From: David Palm Date: Tue, 29 Oct 2019 18:57:04 +0100 Subject: [PATCH 16/27] Don't warp sync twice Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync. More docs, resolve todos. Dial down some `sync` debug level logging to trace --- ethcore/snapshot/src/lib.rs | 2 +- ethcore/sync/src/chain/handler.rs | 16 +++----- ethcore/sync/src/chain/mod.rs | 66 +++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/ethcore/snapshot/src/lib.rs b/ethcore/snapshot/src/lib.rs index 406aeeddb93..05e24ce830e 100644 --- a/ethcore/snapshot/src/lib.rs +++ b/ethcore/snapshot/src/lib.rs @@ -452,7 +452,7 @@ impl StateRebuilder { StateDB::commit_bloom(&mut batch, bloom_journal)?; self.db.inject(&mut batch)?; backing.write_buffered(batch); - trace!(target: "snapshot", "current state root: {:?}", self.state_root); + trace!(target: "snapshot", "Wrote {} bytes of state to db&disk. Current state root: {:?}", chunk.len(), self.state_root); Ok(()) } diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index c87b9f86098..cd06aea800f 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -76,14 +76,14 @@ impl SyncHandler { SignedPrivateTransactionPacket => SyncHandler::on_signed_private_transaction(sync, io, peer, &rlp), PrivateStatePacket => SyncHandler::on_private_state_data(sync, io, peer, &rlp), _ => { - debug!(target: "sync", "{}: Unknown packet {}", peer, packet_id.id()); + trace!(target: "sync", "{}: Unknown packet {}", peer, packet_id.id()); Ok(()) } }; match result { Err(DownloaderImportError::Invalid) => { - debug!(target:"sync", "{} -> Invalid packet {}", peer, packet_id.id()); + trace!(target:"sync", "{} -> Invalid packet {}", peer, packet_id.id()); io.disable_peer(peer); sync.deactivate_peer(io, peer); }, @@ -96,7 +96,7 @@ impl SyncHandler { }, } } else { - debug!(target: "sync", "{}: Unknown packet {}", peer, packet_id); + trace!(target: "sync", "{}: Unknown packet {}", peer, packet_id); } } @@ -371,18 +371,18 @@ impl SyncHandler { let block_set = sync.peers.get(&peer_id).and_then(|p| p.block_set).unwrap_or(BlockSet::NewBlocks); if !sync.reset_peer_asking(peer_id, PeerAsking::BlockHeaders) { - debug!(target: "sync", "{}: Ignored unexpected headers", peer_id); + trace!(target: "sync", "{}: Ignored unexpected headers", peer_id); return Ok(()); } let expected_hash = match expected_hash { Some(hash) => hash, None => { - debug!(target: "sync", "{}: Ignored unexpected headers (expected_hash is None)", peer_id); + trace!(target: "sync", "{}: Ignored unexpected headers (expected_hash is None)", peer_id); return Ok(()); } }; if !allowed { - debug!(target: "sync", "{}: Ignored unexpected headers (peer not allowed)", peer_id); + trace!(target: "sync", "{}: Ignored unexpected headers (peer not allowed)", peer_id); return Ok(()); } @@ -556,10 +556,6 @@ impl SyncHandler { warn!(target: "snapshot_sync", "{}: Duplicate chunk (hash {:?}).", peer_id, hash); } Err(()) => { - // todo[dvdplm] this seems wrong: we'll get `Err(())` back even if we happened to - // have seen this chunk already. Should a peer be disconnected if they happen to - // send us the same chunk twice? Can't we be downloading chunks from more than one - // peer? trace!(target: "snapshot_sync", "{}: Got bad snapshot chunk", peer_id); io.disconnect_peer(peer_id); return Ok(()); diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index ff26e8088de..b8167728fd4 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -172,12 +172,17 @@ const MAX_NEW_BLOCK_AGE: BlockNumber = 20; // maximal packet size with transactions (cannot be greater than 16MB - protocol limitation). // keep it under 8MB as well, cause it seems that it may result oversized after compression. const MAX_TRANSACTION_PACKET_SIZE: usize = 5 * 1024 * 1024; -// Min number of blocks to be behind for a snapshot sync +// Min number of blocks to be behind the tip for a snapshot sync to be considered useful to us. const SNAPSHOT_RESTORE_THRESHOLD: BlockNumber = 30000; +/// We prefer to sync snapshots that are available from this many peers. If we have not found a +/// snapshot available from `SNAPSHOT_MIN_PEERS` peers within `WAIT_PEERS_TIMEOUT`, then we make do +/// with a single peer to sync from. const SNAPSHOT_MIN_PEERS: usize = 3; - -// todo[dvdplm] One problem with warp-sync is when the remote peer we're downloading from goes offline. Would it make sense to increase the number of chunks here so we download as much data as we can? -const MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD: usize = 3; +/// To keep memory from growing uncontrollably we restore chunks as we download them and write them +/// to disk only after we have processed them; we also want to avoid pausing the chunk download too +/// often, so we allow a little bit of leeway here and let the downloading be +/// `MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD` chunks ahead of the restoration. +const MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD: usize = 5; /// Time to wait for snapshotting peers to show up with a snapshot we want to use. Beyond this time, /// a single peer is enough to start downloading. @@ -597,16 +602,25 @@ impl ChainSync { } /// Reset the client to its initial state: - /// - if warp sync is enabled, start looking for peers to sync a snapshot from - /// - if `--warp-barrier` is used, ensure we're not synced beyond the barrier and start - /// looking for peers to sync a snapshot from - /// - otherwise, go `Idle`. + /// - if warp sync is enabled, start looking for peers to sync a snapshot from + /// - if `--warp-barrier` is used, ensure we're not synced beyond the barrier and start + /// looking for peers to sync a snapshot from + /// - otherwise, go `Idle`. fn get_init_state(warp_sync: WarpSync, chain: &dyn BlockChainClient) -> SyncState { let best_block = chain.chain_info().best_block_number; match warp_sync { - WarpSync::Enabled => SyncState::WaitingPeers, - WarpSync::OnlyAndAfter(block) if block > best_block => SyncState::WaitingPeers, - _ => SyncState::Idle, + WarpSync::Enabled => { + debug!(target: "sync", "Setting the initial state to `WaitingPeers`. Our best block: #{}; warp_sync: {:?}", best_block, warp_sync); + SyncState::WaitingPeers + }, + WarpSync::OnlyAndAfter(block) if block > best_block => { + debug!(target: "sync", "Setting the initial state to `WaitingPeers`. Our best block: #{}; warp_sync: {:?}", best_block, warp_sync); + SyncState::WaitingPeers + }, + _ => { + debug!(target: "sync", "Setting the initial state to `Idle`. Our best block: #{}", best_block); + SyncState::Idle + }, } } } @@ -699,7 +713,7 @@ impl ChainSync { sync } - /// Returns synchonization status + /// Returns synchronization status pub fn status(&self) -> SyncStatus { let last_imported_number = self.new_blocks.last_imported_block_number(); SyncStatus { @@ -777,7 +791,7 @@ impl ChainSync { fn reset(&mut self, io: &mut dyn SyncIo, state: Option) { self.new_blocks.reset(); let chain_info = io.chain().chain_info(); - for (_, ref mut p) in &mut self.peers { + for (_, mut p) in &mut self.peers { if p.block_set != Some(BlockSet::OldBlocks) { p.reset_asking(); if p.difficulty.is_none() { @@ -803,6 +817,8 @@ impl ChainSync { io.snapshot_service().abort_restore(); } self.snapshot.clear(); + // Passing `None` here means we'll end up in either `SnapshotWaiting` or `Idle` depending on + // the warp sync settings. self.reset(io, None); self.continue_sync(io); } @@ -829,7 +845,15 @@ impl ChainSync { let fork_block = self.fork_block.map_or(0, |(n, _)| n); let expected_warp_block = match self.warp_sync { - WarpSync::OnlyAndAfter(block) => block, + WarpSync::OnlyAndAfter(warp_block) => { + if our_best_block >= warp_block { + trace!(target: "snapshot_sync", + "Our best block (#{}) is already beyond the warp barrier block (#{})", + our_best_block, warp_block); + return; + } + warp_block + }, _ => 0, }; // Collect snapshot info from peers and check if we can use their snapshots to sync. @@ -844,8 +868,8 @@ impl ChainSync { our_best_block < sn && (sn - our_best_block) > SNAPSHOT_RESTORE_THRESHOLD && // Snapshot must have been taken after the fork block (if any is configured) sn > fork_block && - // Snapshot must be greater than the warp barrier, if any - sn > expected_warp_block + // Snapshot must be greater or equal to the warp barrier, if any + sn >= expected_warp_block ) ) .filter_map(|(p, peer)| { @@ -1298,24 +1322,24 @@ impl ChainSync { SyncState::SnapshotWaiting => { match io.snapshot_service().status() { RestorationStatus::Inactive => { - trace!(target:"sync", "Snapshot restoration is complete"); + trace!(target:"snapshot_sync", "Snapshot restoration is complete"); self.restart(io); }, RestorationStatus::Initializing { .. } => { - trace!(target:"sync", "Snapshot restoration is initializing"); + trace!(target:"snapshot_sync", "Snapshot restoration is initializing"); }, RestorationStatus::Finalizing { .. } => { - trace!(target:"sync", "Snapshot finalizing restoration"); + trace!(target:"snapshot_sync", "Snapshot finalizing restoration"); }, RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => { if !self.snapshot.is_complete() && self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize <= MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD { - trace!(target:"sync", "Resuming snapshot sync"); + trace!(target:"snapshot_sync", "Resuming snapshot sync"); self.set_state(SyncState::SnapshotData); self.continue_sync(io); } }, RestorationStatus::Failed => { - trace!(target: "sync", "Snapshot restoration aborted"); + trace!(target: "snapshot_sync", "Snapshot restoration aborted"); self.set_state(SyncState::WaitingPeers); self.snapshot.clear(); self.continue_sync(io); From d79a4f44fb1bf5ac330991ffb34b291dfd6e6f19 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 00:14:19 +0100 Subject: [PATCH 17/27] Avoid iterating over all snapshot block/state hashes to find the next work item Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time. --- ethcore/sync/src/chain/mod.rs | 4 +- ethcore/sync/src/snapshot_sync.rs | 97 ++++++++++++++++--------------- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index b8167728fd4..f688eb705e7 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -1115,9 +1115,9 @@ impl ChainSync { }, SyncState::SnapshotData => { match io.snapshot_service().status() { - RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => { + RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, state_chunks, block_chunks } => { // Initialize the snapshot if not already done - self.snapshot.initialize(io.snapshot_service()); + self.snapshot.initialize(io.snapshot_service(), block_chunks as usize + state_chunks as usize); if self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize > MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD { trace!(target: "snapshot_sync", "Snapshot queue full, pausing sync"); self.set_state(SyncState::SnapshotWaiting); diff --git a/ethcore/sync/src/snapshot_sync.rs b/ethcore/sync/src/snapshot_sync.rs index 0b7b999d038..f48211b3069 100644 --- a/ethcore/sync/src/snapshot_sync.rs +++ b/ethcore/sync/src/snapshot_sync.rs @@ -38,16 +38,18 @@ pub enum ChunkType { pub struct Snapshot { /// List of hashes of the state chunks we need to complete the warp sync from this snapshot. /// These hashes are contained in the Manifest we downloaded from the peer(s). - pending_state_chunks: Vec, + pending_state_chunks: HashSet, /// List of hashes of the block chunks we need to complete the warp sync from this snapshot. /// These hashes are contained in the Manifest we downloaded from the peer(s). - pending_block_chunks: Vec, + pending_block_chunks: HashSet, /// Set of hashes of chunks we are currently downloading. downloading_chunks: HashSet, /// The set of chunks (block or state) that we have successfully downloaded. completed_chunks: HashSet, /// The hash of the the `ManifestData` RLP that we're downloading. snapshot_hash: Option, + /// Total number of chunks in the current snapshot. + total_chunks: Option, /// Set of snapshot hashes we failed to import. We will not try to sync with /// this snapshot again until restart. bad_hashes: HashSet, @@ -61,7 +63,7 @@ impl Snapshot { } /// Sync the Snapshot completed chunks with the Snapshot Service - pub fn initialize(&mut self, snapshot_service: &dyn SnapshotService) { + pub fn initialize(&mut self, snapshot_service: &dyn SnapshotService, total_chunks: usize) { if self.initialized { return; } @@ -72,10 +74,10 @@ impl Snapshot { trace!( target: "snapshot_sync", - "Snapshot is now initialized with {} completed chunks.", - self.completed_chunks.len(), + "Snapshot initialized. {}/{} completed chunks.", + self.completed_chunks.len(), total_chunks ); - + self.total_chunks = Some(total_chunks); self.initialized = true; } @@ -86,6 +88,7 @@ impl Snapshot { self.downloading_chunks.clear(); self.completed_chunks.clear(); self.snapshot_hash = None; + self.total_chunks = None; self.initialized = false; } @@ -98,8 +101,9 @@ impl Snapshot { /// block&state chunk hashes contained in the `ManifestData`). pub fn reset_to(&mut self, manifest: &ManifestData, hash: &H256) { self.clear(); - self.pending_state_chunks = manifest.state_hashes.clone(); - self.pending_block_chunks = manifest.block_hashes.clone(); + self.pending_state_chunks = HashSet::from_iter(manifest.state_hashes.clone()); + self.pending_block_chunks = HashSet::from_iter(manifest.block_hashes.clone()); + self.total_chunks = Some(self.pending_block_chunks.len() + self.pending_state_chunks.len()); self.snapshot_hash = Some(hash.clone()); } @@ -113,44 +117,36 @@ impl Snapshot { return Ok(ChunkType::Dupe(hash)); } self.downloading_chunks.remove(&hash); - if self.pending_block_chunks.iter().any(|h| h == &hash) { - self.completed_chunks.insert(hash.clone()); - return Ok(ChunkType::Block(hash)); - } - if self.pending_state_chunks.iter().any(|h| h == &hash) { - self.completed_chunks.insert(hash.clone()); - return Ok(ChunkType::State(hash)); - } - trace!(target: "snapshot_sync", "Ignoring unknown chunk: {:x}", hash); - Err(()) + + self.pending_block_chunks.take(&hash) + .and_then(|h| { + self.completed_chunks.insert(h); + Some(ChunkType::Block(hash)) + }) + .or( + self.pending_state_chunks.take(&hash) + .and_then(|h| { + self.completed_chunks.insert(h); + Some(ChunkType::State(hash)) + }) + ).ok_or_else(|| { + trace!(target: "snapshot_sync", "Ignoring unknown chunk: {:x}", hash); + () + }) } /// Find a chunk to download - // todo[dvdplm] plenty of iterating through the pending `Vec`s here (which are never deleted - // from btw). Why not `pop()` the chunk hash from the pending_*_chunks? Audit the usage of those - // fields and see if we rely on them staying their initial size for some reason. - // `total_chunks()` do use them, need to re-work that. Same for `is_complete()`. pub fn needed_chunk(&mut self) -> Option { // Find next needed chunk: first block, then state chunks let chunk = { let filter = |h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h); - - let block_chunk = self.pending_block_chunks.iter() - .filter(|&h| filter(h)) + self.pending_block_chunks.iter() + .find(|&h| filter(h)) + .or(self.pending_state_chunks.iter() + .find(|&h| filter(h)) + ) .map(|h| *h) - .next(); - - // If there is no block chunk to download, try the state chunks. - if block_chunk.is_none() { - self.pending_state_chunks.iter() - .filter(|&h| filter(h)) - .map(|h| *h) - .next() - } else { - block_chunk - } }; - if let Some(hash) = chunk { self.downloading_chunks.insert(hash.clone()); } @@ -179,7 +175,7 @@ impl Snapshot { /// Total number of chunks in the snapshot we're currently working on (state + block chunks). pub fn total_chunks(&self) -> usize { - self.pending_block_chunks.len() + self.pending_state_chunks.len() + self.total_chunks.unwrap_or_default() } /// Number of chunks we've processed so far (state and block chunks). @@ -240,25 +236,30 @@ mod test { let mut snapshot = Snapshot::new(); let (manifest, mhash, state_chunks, block_chunks) = test_manifest(); snapshot.reset_to(&manifest, &mhash); - assert_eq!(snapshot.done_chunks(), 0); - assert!(snapshot.validate_chunk(&H256::random().as_bytes().to_vec()).is_err()); + assert_eq!(snapshot.done_chunks(), 0, "no chunks done at outset"); + assert!(snapshot.validate_chunk(&H256::random().as_bytes().to_vec()).is_err(), "random chunk is invalid"); + // request all 20 + 20 chunks let requested: Vec = (0..40).map(|_| snapshot.needed_chunk().unwrap()).collect(); - assert!(snapshot.needed_chunk().is_none()); + assert!(snapshot.needed_chunk().is_none(), "no chunks left after all are drained"); let requested_all_block_chunks = manifest.block_hashes.iter() .all(|h| requested.iter().any(|rh| rh == h)); - assert!(requested_all_block_chunks); + assert!(requested_all_block_chunks, "all block chunks in the manifest accounted for"); let requested_all_state_chunks = manifest.state_hashes.iter() .all(|h| requested.iter().any(|rh| rh == h)); - assert!(requested_all_state_chunks); + assert!(requested_all_state_chunks, "all state chunks in the manifest accounted for"); - assert_eq!(snapshot.downloading_chunks.len(), 40); + assert_eq!(snapshot.downloading_chunks.len(), 40, "all requested chunks are downloading"); - assert_eq!(snapshot.validate_chunk(&state_chunks[4]), Ok(ChunkType::State(manifest.state_hashes[4].clone()))); - assert_eq!(snapshot.completed_chunks.len(), 1); - assert_eq!(snapshot.downloading_chunks.len(), 39); + assert_eq!( + snapshot.validate_chunk(&state_chunks[4]), + Ok(ChunkType::State(manifest.state_hashes[4].clone())), + "4th state chunk hash validates as such" + ); + assert_eq!(snapshot.completed_chunks.len(), 1, "after validating a chunk, it's in the completed set"); + assert_eq!(snapshot.downloading_chunks.len(), 39, "after validating a chunk, there's one less in the downloading set"); assert_eq!(snapshot.validate_chunk(&block_chunks[10]), Ok(ChunkType::Block(manifest.block_hashes[10].clone()))); assert_eq!(snapshot.completed_chunks.len(), 2); @@ -276,7 +277,7 @@ mod test { } } - assert!(snapshot.is_complete()); + assert!(snapshot.is_complete(), "when all chunks have been validated, we're done"); assert_eq!(snapshot.done_chunks(), 40); assert_eq!(snapshot.done_chunks(), snapshot.total_chunks()); assert_eq!(snapshot.snapshot_hash(), Some(keccak(manifest.into_rlp()))); From ebab77888e69f94143855c8aed1b2c85d5faf89b Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 15:34:15 +0100 Subject: [PATCH 18/27] Address review grumbles --- Cargo.toml | 2 +- ethcore/snapshot/src/service.rs | 3 +-- ethcore/sync/src/chain/handler.rs | 7 ------- ethcore/sync/src/chain/mod.rs | 2 +- ethcore/sync/src/sync_io.rs | 14 ++++++-------- 5 files changed, 9 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 49e44cba16b..c81f5acb356 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,7 +121,7 @@ name = "parity" [profile.release] debug = false -lto = false +lto = true [workspace] # This should only list projects that are not diff --git a/ethcore/snapshot/src/service.rs b/ethcore/snapshot/src/service.rs index 32f2251ec5b..40ce19a50d9 100644 --- a/ethcore/snapshot/src/service.rs +++ b/ethcore/snapshot/src/service.rs @@ -787,8 +787,7 @@ impl Service where C: SnapshotClient + ChainInfo { false => Ok(()) } } - // todo[dvdplm] aren't we tossing away any errors from feed_state/feed_blocks here? And why the explicit `drop` here? - other => other.map(drop), + Err(e) => Err(e) }; (res, db) } diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index cd06aea800f..ffa1c46d765 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -515,13 +515,6 @@ impl SyncHandler { // only note bad if restoration failed. if let (Some(hash), RestorationStatus::Failed) = (sync.snapshot.snapshot_hash(), status) { - // todo[dvdplm]: how do we know we `Failed` because of them and not because - // something happened on our end, say disk was full? - // On a second look I think `feed_chunk_with_restoration()`, the - // `SnapshotService` implementation, is indeed tossing away some errors. I think - // it should be refactored to distinguish the two cases of: restoration failed - // because the snapshot data is bad (corrupted, partial, malicious) and errors - // caused by the local env (out of disk seems like the obvious one). debug!(target: "snapshot_sync", "Marking snapshot manifest hash {} as bad", hash); sync.snapshot.note_bad(hash); } diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index f688eb705e7..b20379d756e 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -1070,7 +1070,7 @@ impl ChainSync { peer_id, peer_snapshot_number, chain_info.best_block_number, - io.peer_enode(peer_id), + io.peer_enode(peer_id).unwrap_or_else(|| "enode://???".to_string()), ); self.maybe_start_snapshot_sync(io); }, diff --git a/ethcore/sync/src/sync_io.rs b/ethcore/sync/src/sync_io.rs index 47cd9184b45..15b99e649c3 100644 --- a/ethcore/sync/src/sync_io.rs +++ b/ethcore/sync/src/sync_io.rs @@ -51,7 +51,7 @@ pub trait SyncIo { ClientVersion::from(peer_id.to_string()) } /// Returns the peer enode string - fn peer_enode(&self, peer_id: PeerId) -> String; + fn peer_enode(&self, peer_id: PeerId) -> Option; /// Returns information on p2p session fn peer_session_info(&self, peer_id: PeerId) -> Option; /// Maximum mutually supported ETH protocol version @@ -129,13 +129,11 @@ impl<'s> SyncIo for NetSyncIo<'s> { self.network.peer_client_version(peer_id) } - fn peer_enode(&self, peer_id: PeerId) -> String { - self.network.session_info(peer_id).map_or("enode://???".into(), |info| { - if let Some(id) = info.id { - format!("enode:://{}@{}", id, info.remote_address) - } else { - format!("enode:://???@{}", info.remote_address) - } + fn peer_enode(&self, peer_id: PeerId) -> Option { + self.network.session_info(peer_id).and_then(|info| { + info.id.map(|node_id| { + format!("enode:://{}@{}", node_id, info.remote_address) + }) }) } From 2e75fb432742faf1966a289699ecc9efef8d41d9 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 16:43:56 +0100 Subject: [PATCH 19/27] Log correct number of bytes written to disk --- ethcore/snapshot/src/lib.rs | 1 - ethcore/snapshot/src/service.rs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/snapshot/src/lib.rs b/ethcore/snapshot/src/lib.rs index 05e24ce830e..a7f30cc6593 100644 --- a/ethcore/snapshot/src/lib.rs +++ b/ethcore/snapshot/src/lib.rs @@ -452,7 +452,6 @@ impl StateRebuilder { StateDB::commit_bloom(&mut batch, bloom_journal)?; self.db.inject(&mut batch)?; backing.write_buffered(batch); - trace!(target: "snapshot", "Wrote {} bytes of state to db&disk. Current state root: {:?}", chunk.len(), self.state_root); Ok(()) } diff --git a/ethcore/snapshot/src/service.rs b/ethcore/snapshot/src/service.rs index 40ce19a50d9..577b9c94bd8 100644 --- a/ethcore/snapshot/src/service.rs +++ b/ethcore/snapshot/src/service.rs @@ -161,6 +161,7 @@ impl Restoration { if let Some(ref mut writer) = self.writer.as_mut() { writer.write_state_chunk(hash, chunk)?; + trace!(target: "snapshot", "Wrote {}/{} bytes of state to db/disk. Current state root: {:?}", len, chunk.len(), self.state.state_root()); } self.state_chunks_left.remove(&hash); From 5970b80733da0eccf0ba6a8469bb80a25a3fdf86 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 16:49:35 +0100 Subject: [PATCH 20/27] Revert ChunkType::Dup change --- ethcore/sync/src/chain/handler.rs | 3 --- ethcore/sync/src/snapshot_sync.rs | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index ffa1c46d765..18212ad1405 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -545,9 +545,6 @@ impl SyncHandler { trace!(target: "snapshot_sync", "{}: Processing state chunk", peer_id); io.snapshot_service().restore_state_chunk(hash, snapshot_data); } - Ok(ChunkType::Dupe(hash)) => { - warn!(target: "snapshot_sync", "{}: Duplicate chunk (hash {:?}).", peer_id, hash); - } Err(()) => { trace!(target: "snapshot_sync", "{}: Got bad snapshot chunk", peer_id); io.disconnect_peer(peer_id); diff --git a/ethcore/sync/src/snapshot_sync.rs b/ethcore/sync/src/snapshot_sync.rs index f48211b3069..6b33663ead8 100644 --- a/ethcore/sync/src/snapshot_sync.rs +++ b/ethcore/sync/src/snapshot_sync.rs @@ -30,8 +30,6 @@ pub enum ChunkType { State(H256), /// The chunk contains block data. Block(H256), - /// Already seen chunk - Dupe(H256) } #[derive(Default, MallocSizeOf)] @@ -114,7 +112,7 @@ impl Snapshot { let hash = keccak(chunk); if self.completed_chunks.contains(&hash) { trace!(target: "snapshot_sync", "Already proccessed chunk {:x}. Ignoring.", hash); - return Ok(ChunkType::Dupe(hash)); + return Err(()); } self.downloading_chunks.remove(&hash); From 100ca6b5f664034bfaf42ea72a162d5e93e84292 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 16:51:31 +0100 Subject: [PATCH 21/27] whitespace grumble --- ethcore/types/src/snapshot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/types/src/snapshot.rs b/ethcore/types/src/snapshot.rs index 8f49a11ebaa..3e1be1d0eda 100644 --- a/ethcore/types/src/snapshot.rs +++ b/ethcore/types/src/snapshot.rs @@ -193,7 +193,7 @@ pub type ChunkSink<'a> = dyn FnMut(&[u8]) -> std::io::Result<()> + 'a; /// Statuses for snapshot restoration. #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum RestorationStatus { - /// No restoration activity currently. + /// No restoration activity currently. Inactive, /// Restoration is initializing. Initializing { From bd9fc2013274758a1ed6b7c2230b22abb8e95750 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 16:52:40 +0100 Subject: [PATCH 22/27] Cleanup debugging code --- ethcore/sync/src/chain/mod.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index b20379d756e..7f618ab76ee 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -927,15 +927,8 @@ impl ChainSync { self.set_state(SyncState::Idle); self.continue_sync(io); } else { - // warn!(target: "snapshot_sync", "No snapshots currently available at #{}. Try using a smaller value for --warp-barrier", expected_warp_block); - // todo[dvdplm] remove before merge, debug only, then put the `warn!` line back. - trace!(target: "snapshot_sync", "No snapshots currently available at #{}; time to wait for snapshot peers is up and we do NOT have a snapshot peer: \ - (best_block={:?}, best_hash={:?}, snapshot_peers.len={:?}, state={:?}, peers.len={}).", - expected_warp_block, best_snapshot_block, best_hash, snapshot_peers.len(), self.state, self.peers.len()) + warn!(target: "snapshot_sync", "No snapshots currently available at #{}. Try using a smaller value for --warp-barrier", expected_warp_block); } - } else { - // todo[dvdplm] remove this, it's debug only - trace!(target: "snapshot_sync", "Time to wait for snapshot peers is NOT expired and we do NOT have a snapshot peer (best_block={:?}, best_hash={:?}, peers.len={:?}, state={:?}, peers.len={}).", best_snapshot_block, best_hash, snapshot_peers.len(), self.state, self.peers.len()); } } From 29a26c916ce9f712b9beac4ca10fac10c2027f1b Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 17:09:32 +0100 Subject: [PATCH 23/27] Fix docs --- ethcore/sync/src/chain/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 7f618ab76ee..79f1a61d020 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -194,7 +194,7 @@ const HEADERS_TIMEOUT: Duration = Duration::from_secs(15); const BODIES_TIMEOUT: Duration = Duration::from_secs(20); const RECEIPTS_TIMEOUT: Duration = Duration::from_secs(10); const FORK_HEADER_TIMEOUT: Duration = Duration::from_secs(3); -/// Max time to wait for a peer to send us a snapshot manifest. +/// Max time to wait for the Snapshot Manifest packet to arrive from a peer after it's being asked. const SNAPSHOT_MANIFEST_TIMEOUT: Duration = Duration::from_secs(15); const SNAPSHOT_DATA_TIMEOUT: Duration = Duration::from_secs(120); const PRIVATE_STATE_TIMEOUT: Duration = Duration::from_secs(120); From ac4cd1005f80fdc89c9aee35aba6e3e627c6172d Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 17:47:29 +0100 Subject: [PATCH 24/27] Fix import and a typo --- ethcore/sync/src/chain/mod.rs | 2 +- ethcore/sync/src/chain/requester.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 79f1a61d020..b875698b17c 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -115,7 +115,7 @@ use ethereum_types::{H256, U256}; use fastmap::{H256FastMap, H256FastSet}; use futures::sync::mpsc as futures_mpsc; use keccak_hash::keccak; -use log::{error, trace, debug}; +use log::{error, trace, debug, warn}; use network::client_version::ClientVersion; use network::{self, PeerId, PacketId}; use parity_util_mem::{MallocSizeOfExt, malloc_size_of_is_0}; diff --git a/ethcore/sync/src/chain/requester.rs b/ethcore/sync/src/chain/requester.rs index 59a2aaf2fb1..d91af8902ce 100644 --- a/ethcore/sync/src/chain/requester.rs +++ b/ethcore/sync/src/chain/requester.rs @@ -87,7 +87,7 @@ impl SyncRequester { SyncRequester::send_request(sync, io, peer_id, PeerAsking::ForkHeader, GetBlockHeadersPacket, rlp.out()); } - /// Find some headers or blocks to download for a peer. + /// Find some headers or blocks to download from a peer. pub fn request_snapshot_data(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId) { // find chunk data to download if let Some(hash) = sync.snapshot.needed_chunk() { From 81d46cb9d7060779e7aba50828ebb634af4e720c Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 30 Oct 2019 18:02:04 +0100 Subject: [PATCH 25/27] Fix test impl --- ethcore/sync/src/tests/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/sync/src/tests/helpers.rs b/ethcore/sync/src/tests/helpers.rs index bf7c155bd2a..98a4adfe195 100644 --- a/ethcore/sync/src/tests/helpers.rs +++ b/ethcore/sync/src/tests/helpers.rs @@ -147,7 +147,7 @@ impl<'p, C> SyncIo for TestIo<'p, C> where C: FlushingBlockChainClient, C: 'p { .into() } - fn peer_enode(&self, _peer_id: usize) -> String { + fn peer_enode(&self, _peer_id: usize) -> Option { unimplemented!() } From d2c347af38e9dd8bb5fdf16a7e3eee6d795335a4 Mon Sep 17 00:00:00 2001 From: David Palm Date: Thu, 31 Oct 2019 14:15:11 +0100 Subject: [PATCH 26/27] Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order --- Cargo.lock | 20 ++++++++++++-------- ethcore/sync/Cargo.toml | 1 + ethcore/sync/src/chain/mod.rs | 2 +- ethcore/sync/src/snapshot_sync.rs | 24 +++++++++++++++++++----- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8aae4548df4..f5e2c31bba2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,7 +207,7 @@ dependencies = [ [[package]] name = "autocfg" -version = "0.1.4" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -280,7 +280,7 @@ name = "bincode" version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -873,7 +873,7 @@ dependencies = [ "ethabi 9.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1525,6 +1525,7 @@ dependencies = [ "ethereum-types 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "fastmap 0.1.0", "futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "keccak-hash 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "kvdb-memorydb 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1923,7 +1924,7 @@ dependencies = [ "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)", "http 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", - "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", "string 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1954,7 +1955,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "ahash 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", - "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2145,8 +2146,11 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.0.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "instant-seal" @@ -5394,7 +5398,7 @@ dependencies = [ "checksum assert_matches 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7deb0a829ca7bcfaf5da70b073a8d128619259a7be8216a355e23f00763059e5" "checksum attohttpc 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "eaf0ec4b0e00f61ee75556ca027485b7b354f4a714d88cc03f4468abd9378c86" "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" -"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" +"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a" "checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0" "checksum base-x 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5cda5d0f5584d129112ad8bf4775b9fd2b9f1e30738c7b1a25314ba2244d6a51" @@ -5510,7 +5514,7 @@ dependencies = [ "checksum impl-codec 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3fa0086251524c50fd53b32e7b05eb6d79e2f97221eaf0c53c0ca9c3096f21d3" "checksum impl-rlp 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f39b9963cf5f12fcc4ae4b30a6927ed67d6b4ea4cbe7d17a41131163b401303b" "checksum impl-serde 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bbb1ea6188aca47a0eaeeb330d8a82f16cd500f30b897062d23922568727333a" -"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d" +"checksum indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712d7b3ea5827fcb9d4fda14bf4da5f136f0db2ae9c8f4bd4e2d1c6fde4e6db2" "checksum integer-encoding 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "26746cbc2e680af687e88d717f20ff90079bd10fc984ad57d277cd0e37309fa5" "checksum interleaved-ordered 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "141340095b15ed7491bd3d4ced9d20cebfb826174b6bb03386381f62b01e3d77" "checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08" diff --git a/ethcore/sync/Cargo.toml b/ethcore/sync/Cargo.toml index 8356249d914..db07e294f04 100644 --- a/ethcore/sync/Cargo.toml +++ b/ethcore/sync/Cargo.toml @@ -19,6 +19,7 @@ ethcore-private-tx = { path = "../private-tx" } ethereum-types = "0.8.0" fastmap = { path = "../../util/fastmap" } futures = "0.1" +indexmap = "1.3.0" keccak-hash = "0.4.0" light = { package = "ethcore-light", path = "../light" } log = "0.4" diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index b875698b17c..1d08aa199cf 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -923,7 +923,7 @@ impl ChainSync { } } else if timeout { if !self.warp_sync.is_warp_only() { - debug!(target: "snapshot_sync", "No snapshots found, starting full sync"); + debug!(target: "snapshot_sync", "Not syncing snapshots (or none found), proceeding with normal sync."); self.set_state(SyncState::Idle); self.continue_sync(io); } else { diff --git a/ethcore/sync/src/snapshot_sync.rs b/ethcore/sync/src/snapshot_sync.rs index 6b33663ead8..71e1bd57f5c 100644 --- a/ethcore/sync/src/snapshot_sync.rs +++ b/ethcore/sync/src/snapshot_sync.rs @@ -22,6 +22,7 @@ use keccak_hash::keccak; use log::trace; use snapshot::SnapshotService; use common_types::snapshot::ManifestData; +use indexmap::IndexSet; #[derive(PartialEq, Eq, Debug)] /// The type of data contained in a chunk: state or block. @@ -36,10 +37,18 @@ pub enum ChunkType { pub struct Snapshot { /// List of hashes of the state chunks we need to complete the warp sync from this snapshot. /// These hashes are contained in the Manifest we downloaded from the peer(s). - pending_state_chunks: HashSet, + /// Note: this is an ordered set so that state restoration happens in order, which keeps + /// memory usage down. + // See https://github.com/paritytech/parity-common/issues/255 + #[ignore_malloc_size_of = "no impl for IndexSet (yet)"] + pending_state_chunks: IndexSet, /// List of hashes of the block chunks we need to complete the warp sync from this snapshot. /// These hashes are contained in the Manifest we downloaded from the peer(s). - pending_block_chunks: HashSet, + /// Note: this is an ordered set so that state restoration happens in order, which keeps + /// memory usage down. + // See https://github.com/paritytech/parity-common/issues/255 + #[ignore_malloc_size_of = "no impl for IndexSet (yet)"] + pending_block_chunks: IndexSet, /// Set of hashes of chunks we are currently downloading. downloading_chunks: HashSet, /// The set of chunks (block or state) that we have successfully downloaded. @@ -99,8 +108,8 @@ impl Snapshot { /// block&state chunk hashes contained in the `ManifestData`). pub fn reset_to(&mut self, manifest: &ManifestData, hash: &H256) { self.clear(); - self.pending_state_chunks = HashSet::from_iter(manifest.state_hashes.clone()); - self.pending_block_chunks = HashSet::from_iter(manifest.block_hashes.clone()); + self.pending_state_chunks = IndexSet::from_iter(manifest.state_hashes.clone()); + self.pending_block_chunks = IndexSet::from_iter(manifest.block_hashes.clone()); self.total_chunks = Some(self.pending_block_chunks.len() + self.pending_state_chunks.len()); self.snapshot_hash = Some(hash.clone()); } @@ -133,7 +142,12 @@ impl Snapshot { }) } - /// Find a chunk to download + /// Pick a chunk to download. + /// Note: the order in which chunks are processed is somewhat important. The account state + /// sometimes spills over into more than one chunk and the parts of state that are missing + /// pieces are held in memory while waiting for the next chunk(s) to show up. This means that + /// when chunks are processed out-of-order, memory usage goes up, sometimes significantly (see + /// e.g. https://github.com/paritytech/parity-ethereum/issues/8825). pub fn needed_chunk(&mut self) -> Option { // Find next needed chunk: first block, then state chunks let chunk = { From 1ebd11960abf442370ea1dfa9c41e095f6a151d8 Mon Sep 17 00:00:00 2001 From: David Palm Date: Thu, 31 Oct 2019 15:38:26 +0100 Subject: [PATCH 27/27] Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough --- ethcore/sync/src/chain/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 1d08aa199cf..488e358ced3 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -195,7 +195,7 @@ const BODIES_TIMEOUT: Duration = Duration::from_secs(20); const RECEIPTS_TIMEOUT: Duration = Duration::from_secs(10); const FORK_HEADER_TIMEOUT: Duration = Duration::from_secs(3); /// Max time to wait for the Snapshot Manifest packet to arrive from a peer after it's being asked. -const SNAPSHOT_MANIFEST_TIMEOUT: Duration = Duration::from_secs(15); +const SNAPSHOT_MANIFEST_TIMEOUT: Duration = Duration::from_secs(5); const SNAPSHOT_DATA_TIMEOUT: Duration = Duration::from_secs(120); const PRIVATE_STATE_TIMEOUT: Duration = Duration::from_secs(120);