From 5315731a7ac58c38c986502c447b27a4198d538a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 20 Mar 2022 19:54:34 -0500 Subject: [PATCH 01/72] gossip-support: be explicit about dimensions --- node/network/gossip-support/src/lib.rs | 26 +++++++++--- node/network/gossip-support/src/tests.rs | 50 ++++++++++++++++-------- node/subsystem-types/src/messages.rs | 11 ++++-- 3 files changed, 63 insertions(+), 24 deletions(-) diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index 9db1e9050df2..6401d4133f09 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -493,16 +493,29 @@ where .expect("our_index < len; indices contains it; qed"); let neighbors = matrix_neighbors(our_shuffled_position, len); - let our_neighbors = neighbors.map(|i| authorities[indices[i]].clone()).collect(); - - ctx.send_message(NetworkBridgeMessage::NewGossipTopology { our_neighbors }) + let row_neighbors = neighbors.row_neighbors.map(|i| authorities[indices[i]].clone()).collect(); + let column_neighbors = + neighbors.column_neighbors.map(|i| authorities[indices[i]].clone()).collect(); + + ctx.send_message(NetworkBridgeMessage::NewGossipTopology { + our_neighbors_x: row_neighbors, + our_neighbors_y: column_neighbors, + }) .await; Ok(()) } +struct MatrixNeighbors { + row_neighbors: R, + column_neighbors: C, +} + /// Compute our row and column neighbors in a matrix -fn matrix_neighbors(our_index: usize, len: usize) -> impl Iterator { +fn matrix_neighbors( + our_index: usize, + len: usize, +) -> MatrixNeighbors, impl Iterator> { assert!(our_index < len, "our_index is computed using `enumerate`; qed"); // e.g. for size 11 the matrix would be @@ -520,7 +533,10 @@ fn matrix_neighbors(our_index: usize, len: usize) -> impl Iterator let row_neighbors = our_row * sqrt..std::cmp::min(our_row * sqrt + sqrt, len); let column_neighbors = (our_column..len).step_by(sqrt); - row_neighbors.chain(column_neighbors).filter(move |i| *i != our_index) + MatrixNeighbors { + row_neighbors: row_neighbors.filter(move |i| *i != our_index), + column_neighbors: column_neighbors.filter(move |i| *i != our_index), + } } impl overseer::Subsystem for GossipSupport diff --git a/node/network/gossip-support/src/tests.rs b/node/network/gossip-support/src/tests.rs index 6302772501ff..1760dccbc3ab 100644 --- a/node/network/gossip-support/src/tests.rs +++ b/node/network/gossip-support/src/tests.rs @@ -53,9 +53,18 @@ lazy_static! { Sr25519Keyring::One.public().into(), Sr25519Keyring::Two.public().into(), ]; - static ref NEIGHBORS: Vec = vec![ - Sr25519Keyring::Two.public().into(), + + // [2 6] + // [4 5] + // [1 3] + // [0 ] + + static ref ROW_NEIGHBORS: Vec = vec![ Sr25519Keyring::Charlie.public().into(), + ]; + + static ref COLUMN_NEIGHBORS: Vec = vec![ + Sr25519Keyring::Two.public().into(), Sr25519Keyring::Eve.public().into(), ]; } @@ -209,11 +218,15 @@ async fn test_neighbors(overseer: &mut VirtualOverseer) { assert_matches!( overseer_recv(overseer).await, AllMessages::NetworkBridge(NetworkBridgeMessage::NewGossipTopology { - our_neighbors, + our_neighbors_x, + our_neighbors_y, }) => { - let mut got: Vec<_> = our_neighbors.into_iter().collect(); - got.sort(); - assert_eq!(got, NEIGHBORS.clone()); + let mut got_row: Vec<_> = our_neighbors_x.into_iter().collect(); + let mut got_column: Vec<_> = our_neighbors_y.into_iter().collect(); + got_row.sort(); + got_column.sort(); + assert_eq!(got_row, ROW_NEIGHBORS.clone()); + assert_eq!(got_column, COLUMN_NEIGHBORS.clone()); } ); } @@ -504,18 +517,23 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { #[test] fn test_matrix_neighbors() { - for (our_index, len, expected) in vec![ - (0usize, 1usize, vec![]), - (1, 2, vec![0usize]), - (0, 9, vec![1, 2, 3, 6]), - (9, 10, vec![0, 3, 6]), - (10, 11, vec![1, 4, 7, 9]), - (7, 11, vec![1, 4, 6, 8, 10]), + for (our_index, len, expected_row, expected_column) in vec![ + (0usize, 1usize, vec![], vec![]), + (1, 2, vec![], vec![0usize]), + (0, 9, vec![1, 2], vec![3, 6]), + (9, 10, vec![], vec![0, 3, 6]), + (10, 11, vec![9], vec![1, 4, 7]), + (7, 11, vec![6, 8], vec![1, 4, 10]), ] .into_iter() { - let mut result: Vec<_> = matrix_neighbors(our_index, len).collect(); - result.sort(); - assert_eq!(result, expected); + let matrix = matrix_neighbors(our_index, len); + let mut row_result: Vec<_> = matrix.row_neighbors.collect(); + let mut column_result: Vec<_> = matrix.column_neighbors.collect(); + row_result.sort(); + column_result.sort(); + + assert_eq!(row_result, expected_row); + assert_eq!(column_result, expected_column); } } diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 8657ec16283b..0939303617f0 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -378,9 +378,14 @@ pub enum NetworkBridgeMessage { /// Inform the distribution subsystems about the new /// gossip network topology formed. NewGossipTopology { - /// Ids of our neighbors in the new gossip topology. - /// We're not necessarily connected to all of them, but we should. - our_neighbors: HashSet, + /// Ids of our neighbors in the X dimensions of the new gossip topology. + /// We're not necessarily connected to all of them, but we should + /// try to be. + our_neighbors_x: HashSet, + /// Ids of our neighbors in the X dimensions of the new gossip topology. + /// We're not necessarily connected to all of them, but we should + /// try to be. + our_neighbors_y: HashSet, }, } From e56a1d24da9056691d18cc1b79d36d3e780b9ff4 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 20 Mar 2022 20:10:09 -0500 Subject: [PATCH 02/72] some guide updates --- roadmap/implementers-guide/src/types/network.md | 5 ++++- .../implementers-guide/src/types/overseer-protocol.md | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/roadmap/implementers-guide/src/types/network.md b/roadmap/implementers-guide/src/types/network.md index b51015c3848e..a48177b60156 100644 --- a/roadmap/implementers-guide/src/types/network.md +++ b/roadmap/implementers-guide/src/types/network.md @@ -154,7 +154,10 @@ enum NetworkBridgeEvent { /// /// Note, that the distribution subsystems need to handle the last /// view update of the newly added gossip peers manually. - NewGossipTopology(HashSet), + NewGossipTopology { + our_neighbors_x: HashSet, + our_neighbors_y: HashSet, + } /// We received a message from the given peer. PeerMessage(PeerId, M), /// The given peer has updated its description of its view. diff --git a/roadmap/implementers-guide/src/types/overseer-protocol.md b/roadmap/implementers-guide/src/types/overseer-protocol.md index 3fabdd59540f..6528e375fd3e 100644 --- a/roadmap/implementers-guide/src/types/overseer-protocol.md +++ b/roadmap/implementers-guide/src/types/overseer-protocol.md @@ -553,9 +553,12 @@ enum NetworkBridgeMessage { /// Inform the distribution subsystems about the new /// gossip network topology formed. NewGossipTopology { - /// Ids of our neighbors in the new gossip topology. - /// We're not necessarily connected to all of them, but we should. - our_neighbors: HashSet, + /// Ids of our neighbors in the X dimension of the new gossip topology. + /// We're not necessarily connected to all of them, but we should try to be. + our_neighbors_x: HashSet, + /// Ids of our neighbors in the Y dimension of the new gossip topology. + /// We're not necessarily connected to all of them, but we should try to be. + our_neighbors_y: HashSet, } } ``` From a7f1bc04fc4ec44c541068429b75dc1349e6035d Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 20 Mar 2022 21:42:33 -0500 Subject: [PATCH 03/72] update network-bridge to distinguish x and y dimensions --- node/network/bridge/src/lib.rs | 30 +++++++++++++++---- .../src/messages/network_bridge_event.rs | 18 +++++++++-- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index b2129cdebbdf..ca734a91aaaa 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -590,30 +590,48 @@ where ).await; } NetworkBridgeMessage::NewGossipTopology { - our_neighbors, + our_neighbors_x, + our_neighbors_y, } => { gum::debug!( target: LOG_TARGET, action = "NewGossipTopology", - neighbors = our_neighbors.len(), + neighbors_x = our_neighbors_x.len(), + neighbors_y = our_neighbors_y.len(), "Gossip topology has changed", ); let ads = &mut authority_discovery_service; - let mut gossip_peers = HashSet::with_capacity(our_neighbors.len()); - for authority in our_neighbors { + let mut gossip_peers_x = HashSet::with_capacity(our_neighbors_x.len()); + let mut gossip_peers_y = HashSet::with_capacity(our_neighbors_y.len()); + + for authority in our_neighbors_x { + let addr = get_peer_id_by_authority_id( + ads, + authority.clone(), + ).await; + + if let Some(peer_id) = addr { + gossip_peers_x.insert(peer_id); + } + } + + for authority in our_neighbors_y { let addr = get_peer_id_by_authority_id( ads, authority.clone(), ).await; if let Some(peer_id) = addr { - gossip_peers.insert(peer_id); + gossip_peers_y.insert(peer_id); } } dispatch_validation_event_to_all_unbounded( - NetworkBridgeEvent::NewGossipTopology(gossip_peers), + NetworkBridgeEvent::NewGossipTopology { + our_neighbors_x: gossip_peers_x, + our_neighbors_y: gossip_peers_y, + }, ctx.sender(), ); } diff --git a/node/subsystem-types/src/messages/network_bridge_event.rs b/node/subsystem-types/src/messages/network_bridge_event.rs index b8fe157cbef0..dc032c2d0ca1 100644 --- a/node/subsystem-types/src/messages/network_bridge_event.rs +++ b/node/subsystem-types/src/messages/network_bridge_event.rs @@ -37,7 +37,12 @@ pub enum NetworkBridgeEvent { /// /// Note, that the distribution subsystems need to handle the last /// view update of the newly added gossip peers manually. - NewGossipTopology(HashSet), + NewGossipTopology { + /// Neighbors in the 'X' dimension of the grid. + our_neighbors_x: HashSet, + /// Neighbors in the 'Y' dimension of the grid. + our_neighbors_y: HashSet, + }, /// Peer has sent a message. PeerMessage(PeerId, M), @@ -77,8 +82,15 @@ impl NetworkBridgeEvent { NetworkBridgeEvent::PeerConnected(peer.clone(), role.clone(), authority_id.clone()), NetworkBridgeEvent::PeerDisconnected(ref peer) => NetworkBridgeEvent::PeerDisconnected(peer.clone()), - NetworkBridgeEvent::NewGossipTopology(ref peers) => - NetworkBridgeEvent::NewGossipTopology(peers.clone()), + NetworkBridgeEvent::NewGossipTopology { + ref our_neighbors_x, + ref our_neighbors_y, + } => { + NetworkBridgeEvent::NewGossipTopology { + our_neighbors_x: our_neighbors_x.clone(), + our_neighbors_y: our_neighbors_y.clone(), + } + }, NetworkBridgeEvent::PeerViewChange(ref peer, ref view) => NetworkBridgeEvent::PeerViewChange(peer.clone(), view.clone()), NetworkBridgeEvent::OurViewChange(ref view) => From 8c6b4a54ca83060ec09e53c398b241289942be94 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 20 Mar 2022 21:55:34 -0500 Subject: [PATCH 04/72] get everything to compile --- node/network/approval-distribution/src/lib.rs | 3 ++- node/network/bitfield-distribution/src/lib.rs | 3 ++- node/network/collator-protocol/src/collator_side/mod.rs | 2 +- node/network/collator-protocol/src/validator_side/mod.rs | 2 +- node/network/gossip-support/src/lib.rs | 6 +++--- node/network/gossip-support/src/tests.rs | 2 +- node/network/statement-distribution/src/lib.rs | 3 ++- node/subsystem-types/src/messages/network_bridge_event.rs | 8 ++------ 8 files changed, 14 insertions(+), 15 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index bf5835016ddb..b0c22d8c9bbd 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -216,7 +216,8 @@ impl State { entry.known_by.remove(&peer_id); }) }, - NetworkBridgeEvent::NewGossipTopology(peers) => { + NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { + let peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); let newly_added: Vec = peers.difference(&self.gossip_peers).cloned().collect(); self.gossip_peers = peers; diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs index befdec66b359..7b65a4aace3b 100644 --- a/node/network/bitfield-distribution/src/lib.rs +++ b/node/network/bitfield-distribution/src/lib.rs @@ -523,7 +523,8 @@ async fn handle_network_msg( // get rid of superfluous data state.peer_views.remove(&peer); }, - NetworkBridgeEvent::NewGossipTopology(peers) => { + NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { + let peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); let newly_added: Vec = peers.difference(&state.gossip_peers).cloned().collect(); state.gossip_peers = peers; for new_peer in newly_added { diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index bb972598abe5..5a286bdf89a8 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -971,7 +971,7 @@ where PeerMessage(remote, msg) => { handle_incoming_peer_message(ctx, runtime, state, remote, msg).await?; }, - NewGossipTopology(..) => { + NewGossipTopology { .. } => { // impossible! }, } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index e3c188739148..f3e50a630097 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -1090,7 +1090,7 @@ where state.peer_data.remove(&peer_id); state.metrics.note_collator_peer_count(state.peer_data.len()); }, - NewGossipTopology(..) => { + NewGossipTopology { .. } => { // impossible! }, PeerViewChange(peer_id, view) => { diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index 6401d4133f09..e63b3187e321 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -378,7 +378,7 @@ where }, NetworkBridgeEvent::OurViewChange(_) => {}, NetworkBridgeEvent::PeerViewChange(_, _) => {}, - NetworkBridgeEvent::NewGossipTopology(_) => {}, + NetworkBridgeEvent::NewGossipTopology { .. } => {}, NetworkBridgeEvent::PeerMessage(_, v) => { match v {}; }, @@ -500,8 +500,8 @@ where ctx.send_message(NetworkBridgeMessage::NewGossipTopology { our_neighbors_x: row_neighbors, our_neighbors_y: column_neighbors, - }) - .await; + }) + .await; Ok(()) } diff --git a/node/network/gossip-support/src/tests.rs b/node/network/gossip-support/src/tests.rs index 1760dccbc3ab..506b089e3864 100644 --- a/node/network/gossip-support/src/tests.rs +++ b/node/network/gossip-support/src/tests.rs @@ -54,7 +54,7 @@ lazy_static! { Sr25519Keyring::Two.public().into(), ]; - // [2 6] + // [2 6] // [4 5] // [1 3] // [0 ] diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 1931c545c0d5..6a92953c5704 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -1623,7 +1623,8 @@ async fn handle_network_update( }); } }, - NetworkBridgeEvent::NewGossipTopology(new_peers) => { + NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { + let new_peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); let _ = metrics.time_network_bridge_update_v1("new_gossip_topology"); let newly_added: Vec = new_peers.difference(gossip_peers).cloned().collect(); *gossip_peers = new_peers; diff --git a/node/subsystem-types/src/messages/network_bridge_event.rs b/node/subsystem-types/src/messages/network_bridge_event.rs index dc032c2d0ca1..34a0957febfb 100644 --- a/node/subsystem-types/src/messages/network_bridge_event.rs +++ b/node/subsystem-types/src/messages/network_bridge_event.rs @@ -82,15 +82,11 @@ impl NetworkBridgeEvent { NetworkBridgeEvent::PeerConnected(peer.clone(), role.clone(), authority_id.clone()), NetworkBridgeEvent::PeerDisconnected(ref peer) => NetworkBridgeEvent::PeerDisconnected(peer.clone()), - NetworkBridgeEvent::NewGossipTopology { - ref our_neighbors_x, - ref our_neighbors_y, - } => { + NetworkBridgeEvent::NewGossipTopology { ref our_neighbors_x, ref our_neighbors_y } => NetworkBridgeEvent::NewGossipTopology { our_neighbors_x: our_neighbors_x.clone(), our_neighbors_y: our_neighbors_y.clone(), - } - }, + }, NetworkBridgeEvent::PeerViewChange(ref peer, ref view) => NetworkBridgeEvent::PeerViewChange(peer.clone(), view.clone()), NetworkBridgeEvent::OurViewChange(ref view) => From 38e998be04dc37f7ca616168502da9fecafd40ff Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Mon, 21 Mar 2022 18:15:44 -0500 Subject: [PATCH 05/72] beginnings --- node/network/approval-distribution/src/lib.rs | 78 +++++++++---------- .../approval-distribution/src/tests.rs | 10 +++ 2 files changed, 49 insertions(+), 39 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index b0c22d8c9bbd..49a58eab8f3a 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -66,6 +66,18 @@ pub struct ApprovalDistribution { metrics: Metrics, } +#[derive(Debug, Clone, Copy)] +enum GridDimension { + Row, + Column, +} + +#[derive(Default)] +struct PeerData { + view: View, + shared_dimension: Option, +} + /// The [`State`] struct is responsible for tracking the overall state of the subsystem. /// /// It tracks metadata about our view of the unfinalized chain, @@ -84,8 +96,8 @@ struct State { /// also a race that occurs typically on local networks. pending_known: HashMap>, - /// Peer view data is partially stored here, and partially inline within the [`BlockEntry`]s - peer_views: HashMap, + /// Peer data is partially stored here, and partially inline within the [`BlockEntry`]s + peer_data: HashMap, /// Track all our neighbors in the current gossip topology. /// We're not necessarily connected to all of them. @@ -154,18 +166,12 @@ enum ApprovalState { Approved(AssignmentCert, ValidatorSignature), } -#[derive(Debug, Clone, Copy)] -enum LocalSource { - Yes, - No, -} - /// Information about candidates in the context of a particular block they are included in. /// In other words, multiple `CandidateEntry`s may exist for the same candidate, /// if it is included by multiple blocks - this is likely the case when there are forks. #[derive(Debug, Default)] struct CandidateEntry { - approvals: HashMap, + approvals: HashMap, } #[derive(Debug, Clone)] @@ -181,13 +187,6 @@ impl MessageSource { Self::Local => None, } } - - fn as_local_source(&self) -> LocalSource { - match self { - Self::Local => LocalSource::Yes, - _ => LocalSource::No, - } - } } enum PendingMessage { @@ -207,11 +206,11 @@ impl State { NetworkBridgeEvent::PeerConnected(peer_id, role, _) => { // insert a blank view if none already present gum::trace!(target: LOG_TARGET, ?peer_id, ?role, "Peer connected"); - self.peer_views.entry(peer_id).or_default(); + self.peer_data.entry(peer_id).or_default(); }, NetworkBridgeEvent::PeerDisconnected(peer_id) => { gum::trace!(target: LOG_TARGET, ?peer_id, "Peer disconnected"); - self.peer_views.remove(&peer_id); + self.peer_data.remove(&peer_id); self.blocks.iter_mut().for_each(|(_hash, entry)| { entry.known_by.remove(&peer_id); }) @@ -222,8 +221,8 @@ impl State { peers.difference(&self.gossip_peers).cloned().collect(); self.gossip_peers = peers; for peer_id in newly_added { - if let Some(view) = self.peer_views.remove(&peer_id) { - self.handle_peer_view_change(ctx, metrics, peer_id, view).await; + if let Some(peer_data) = self.peer_data.remove(&peer_id) { + self.handle_peer_view_change(ctx, metrics, peer_id, peer_data.view).await; } } }, @@ -295,9 +294,10 @@ impl State { ); { - for (peer_id, view) in self.peer_views.iter() { - let intersection = view.iter().filter(|h| new_hashes.contains(h)); - let view_intersection = View::new(intersection.cloned(), view.finalized_number); + for (peer_id, peer_data) in self.peer_data.iter() { + let intersection = peer_data.view.iter().filter(|h| new_hashes.contains(h)); + let view_intersection = + View::new(intersection.cloned(), peer_data.view.finalized_number); Self::unify_with_peer( ctx, &self.gossip_peers, @@ -453,6 +453,8 @@ impl State { } } + // handle a peer view change: requires that the peer is already connected + // and has an entry in the `PeerData` struct. async fn handle_peer_view_change( &mut self, ctx: &mut (impl SubsystemContext @@ -463,7 +465,10 @@ impl State { ) { gum::trace!(target: LOG_TARGET, ?view, "Peer view change"); let finalized_number = view.finalized_number; - let old_view = self.peer_views.insert(peer_id.clone(), view.clone()); + let old_view = self + .peer_data + .get_mut(&peer_id) + .map(|d| std::mem::replace(&mut d.view, view.clone())); let old_finalized_number = old_view.map(|v| v.finalized_number).unwrap_or(0); // we want to prune every block known_by peer up to (including) view.finalized_number @@ -659,8 +664,6 @@ impl State { } } - let local_source = source.as_local_source(); - // Invariant: none of the peers except for the `source` know about the assignment. metrics.on_assignment_imported(); @@ -668,9 +671,10 @@ impl State { Some(candidate_entry) => { // set the approval state for validator_index to Assigned // unless the approval state is set already - candidate_entry.approvals.entry(validator_index).or_insert_with(|| { - (ApprovalState::Assigned(assignment.cert.clone()), local_source) - }); + candidate_entry + .approvals + .entry(validator_index) + .or_insert_with(|| ApprovalState::Assigned(assignment.cert.clone())); }, None => { gum::warn!( @@ -711,7 +715,7 @@ impl State { target: LOG_TARGET, ?block_hash, ?claimed_candidate_index, - ?local_source, + local = source.peer_id().is_none(), num_peers = peers.len(), "Sending an assignment to peers", ); @@ -860,8 +864,6 @@ impl State { } } - let local_source = source.as_local_source(); - // Invariant: none of the peers except for the `source` know about the approval. metrics.on_approval_imported(); @@ -870,13 +872,13 @@ impl State { // set the approval state for validator_index to Approved // it should be in assigned state already match candidate_entry.approvals.remove(&validator_index) { - Some((ApprovalState::Assigned(cert), _local)) => { + Some(ApprovalState::Assigned(cert)) => { candidate_entry.approvals.insert( validator_index, - (ApprovalState::Approved(cert, vote.signature.clone()), local_source), + ApprovalState::Approved(cert, vote.signature.clone()), ); }, - Some((ApprovalState::Approved(..), _)) => { + Some(ApprovalState::Approved(..)) => { unreachable!( "we only insert it after the fingerprint, checked the fingerprint above; qed" ); @@ -933,7 +935,7 @@ impl State { target: LOG_TARGET, ?block_hash, ?candidate_index, - ?local_source, + local = source.peer_id().is_none(), num_peers = peers.len(), "Sending an approval to peers", ); @@ -1052,9 +1054,7 @@ impl State { for (candidate_index, candidate_entry) in entry.candidates.iter().enumerate() { let candidate_index = candidate_index as u32; - for (validator_index, (approval_state, _is_local)) in - candidate_entry.approvals.iter() - { + for (validator_index, approval_state) in candidate_entry.approvals.iter() { let assignment_fingerprint = MessageFingerprint::Assignment( block.clone(), candidate_index, diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 6f08b2a8523a..93f5e5e2ca9f 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1052,3 +1052,13 @@ fn race_condition_in_local_vs_remote_view_update() { virtual_overseer }); } + +// TODO [now]: test propagation of message from issuer - unshared dimension + +// TODO [now]: test that messages are propagated to necessary peers after they connect + +// TODO [now]: test that first receipt of message broadcasts to random peers + +// TODO [now]: test that new gossip topology re-evaluates all messages + +// TODO [now]: test that when a block takes a long time to be finalized, we broadcast more aggressively. From 92925da812a5b6e5f07349ea23098ed966be58d3 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 18:35:26 -0500 Subject: [PATCH 06/72] some TODOs --- node/network/approval-distribution/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 49a58eab8f3a..71a4842ae986 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -216,6 +216,10 @@ impl State { }) }, NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { + // TODO [now]: update shared dimension of all peers. + // TODO [now]: update broadcast dimensions of all messages + // TODO [now]: broadcast messages along new dimensions. + let peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); let newly_added: Vec = peers.difference(&self.gossip_peers).cloned().collect(); From d888b4bc235a9b07da604fa8116d131d73d8c308 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 20:19:09 -0500 Subject: [PATCH 07/72] polkadot runtime: use relevant_authorities --- runtime/polkadot/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/polkadot/src/lib.rs b/runtime/polkadot/src/lib.rs index 8cb75e3fe941..25a61d243f35 100644 --- a/runtime/polkadot/src/lib.rs +++ b/runtime/polkadot/src/lib.rs @@ -2021,7 +2021,7 @@ sp_api::impl_runtime_apis! { impl authority_discovery_primitives::AuthorityDiscoveryApi for Runtime { fn authorities() -> Vec { - AuthorityDiscovery::authorities() + parachains_runtime_api_impl::relevant_authority_ids::() } } From 167468cd9e29493ec08f9dfd85b7e085ef8a36cd Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 21:03:07 -0500 Subject: [PATCH 08/72] make gossip topologies per-session --- node/subsystem-types/src/messages.rs | 16 +++++--- .../src/messages/network_bridge_event.rs | 41 ++++++++++++------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 0939303617f0..743e039456dd 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -49,7 +49,7 @@ use polkadot_primitives::v2::{ }; use polkadot_statement_table::v2::Misbehavior; use std::{ - collections::{BTreeMap, HashSet}, + collections::{BTreeMap, HashSet, HashMap}, sync::Arc, time::Duration, }; @@ -378,14 +378,20 @@ pub enum NetworkBridgeMessage { /// Inform the distribution subsystems about the new /// gossip network topology formed. NewGossipTopology { - /// Ids of our neighbors in the X dimensions of the new gossip topology. + /// The session info this gossip topology is concerned with. + session: SessionIndex, + /// Ids of our neighbors in the X dimensions of the new gossip topology, + /// along with their validator indices within the session. + /// /// We're not necessarily connected to all of them, but we should /// try to be. - our_neighbors_x: HashSet, - /// Ids of our neighbors in the X dimensions of the new gossip topology. + our_neighbors_x: HashMap, + /// Ids of our neighbors in the X dimensions of the new gossip topology, + /// along with their validator indices within the session. + /// /// We're not necessarily connected to all of them, but we should /// try to be. - our_neighbors_y: HashSet, + our_neighbors_y: HashMap, }, } diff --git a/node/subsystem-types/src/messages/network_bridge_event.rs b/node/subsystem-types/src/messages/network_bridge_event.rs index 34a0957febfb..a5e9edf0f2c7 100644 --- a/node/subsystem-types/src/messages/network_bridge_event.rs +++ b/node/subsystem-types/src/messages/network_bridge_event.rs @@ -14,12 +14,33 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::{collections::HashSet, convert::TryFrom}; +use std::{collections::{HashMap, HashSet}, convert::TryFrom}; pub use sc_network::{PeerId, ReputationChange}; use polkadot_node_network_protocol::{ObservedRole, OurView, View, WrongVariant}; -use polkadot_primitives::v2::AuthorityDiscoveryId; +use polkadot_primitives::v2::{AuthorityDiscoveryId, SessionIndex, ValidatorIndex}; + +/// Information about a peer in the gossip topology for a session. +#[derive(Debug, Clone, PartialEq)] +pub struct TopologyPeerInfo { + /// The validator's known peer IDs. + pub peer_ids: Vec, + /// The index of the validator in the discovery keys of the corresponding + /// `SessionInfo`. + pub validator_index: ValidatorIndex, +} + +/// A struct indicating new gossip topology. +#[derive(Debug, Clone, PartialEq)] +pub struct NewGossipTopology { + /// The session index this topology corresponds to. + pub session: SessionIndex, + /// Neighbors in the 'X' dimension of the grid. + pub our_neighbors_x: HashMap, + /// Neighbors in the 'Y' dimension of the grid. + pub our_neighbors_y: HashMap, +} /// Events from network. #[derive(Debug, Clone, PartialEq)] @@ -30,19 +51,14 @@ pub enum NetworkBridgeEvent { /// A peer has disconnected. PeerDisconnected(PeerId), - /// Our neighbors in the new gossip topology. + /// Our neighbors in the new gossip topology for the session. /// We're not necessarily connected to all of them. /// /// This message is issued only on the validation peer set. /// /// Note, that the distribution subsystems need to handle the last /// view update of the newly added gossip peers manually. - NewGossipTopology { - /// Neighbors in the 'X' dimension of the grid. - our_neighbors_x: HashSet, - /// Neighbors in the 'Y' dimension of the grid. - our_neighbors_y: HashSet, - }, + NewGossipTopology(NewGossipTopology), /// Peer has sent a message. PeerMessage(PeerId, M), @@ -82,11 +98,8 @@ impl NetworkBridgeEvent { NetworkBridgeEvent::PeerConnected(peer.clone(), role.clone(), authority_id.clone()), NetworkBridgeEvent::PeerDisconnected(ref peer) => NetworkBridgeEvent::PeerDisconnected(peer.clone()), - NetworkBridgeEvent::NewGossipTopology { ref our_neighbors_x, ref our_neighbors_y } => - NetworkBridgeEvent::NewGossipTopology { - our_neighbors_x: our_neighbors_x.clone(), - our_neighbors_y: our_neighbors_y.clone(), - }, + NetworkBridgeEvent::NewGossipTopology(ref topology) => + NetworkBridgeEvent::NewGossipTopology(topology.clone()), NetworkBridgeEvent::PeerViewChange(ref peer, ref view) => NetworkBridgeEvent::PeerViewChange(peer.clone(), view.clone()), NetworkBridgeEvent::OurViewChange(ref view) => From 221ffb07d293d9092bc20f8b110c8fa425e0072c Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 21:12:19 -0500 Subject: [PATCH 09/72] better formatting --- node/subsystem-types/src/messages.rs | 2 +- node/subsystem-types/src/messages/network_bridge_event.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 743e039456dd..4451f7c4fd61 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -49,7 +49,7 @@ use polkadot_primitives::v2::{ }; use polkadot_statement_table::v2::Misbehavior; use std::{ - collections::{BTreeMap, HashSet, HashMap}, + collections::{BTreeMap, HashMap, HashSet}, sync::Arc, time::Duration, }; diff --git a/node/subsystem-types/src/messages/network_bridge_event.rs b/node/subsystem-types/src/messages/network_bridge_event.rs index a5e9edf0f2c7..86f449580487 100644 --- a/node/subsystem-types/src/messages/network_bridge_event.rs +++ b/node/subsystem-types/src/messages/network_bridge_event.rs @@ -14,7 +14,10 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::{collections::{HashMap, HashSet}, convert::TryFrom}; +use std::{ + collections::{HashMap, HashSet}, + convert::TryFrom, +}; pub use sc_network::{PeerId, ReputationChange}; From 66e79bf63e97d545a76e18ef69e1f740a41e5e77 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 21:12:31 -0500 Subject: [PATCH 10/72] gossip support: use current session validators --- node/network/gossip-support/src/lib.rs | 124 +++++++++++++++---------- 1 file changed, 75 insertions(+), 49 deletions(-) diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index e63b3187e321..9a3d1ff5a0a9 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -49,10 +49,12 @@ use polkadot_node_subsystem::{ RuntimeApiRequest, }, overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, - SubsystemError, SubsystemSender, + SubsystemError, }; use polkadot_node_subsystem_util as util; -use polkadot_primitives::v2::{AuthorityDiscoveryId, Hash, SessionIndex}; +use polkadot_primitives::v2::{ + AuthorityDiscoveryId, Hash, SessionIndex, SessionInfo, ValidatorIndex, +}; #[cfg(test)] mod tests; @@ -213,6 +215,24 @@ where if force_request { leaf_session } else { maybe_new_session }; if let Some((session_index, relay_parent)) = maybe_issue_connection { + let session_info = + util::request_session_info(leaf, session_index, ctx.sender()).await.await??; + + let session_info = match session_info { + Some(s) => s, + None => { + gum::warn!( + relay_parent = ?leaf, + session_index = self.last_session_index, + "Failed to get session info.", + ); + + continue + }, + }; + + // Note: we only update `last_session_index` once we've + // successfully gotten the `SessionInfo`. let is_new_session = maybe_new_session.is_some(); if is_new_session { gum::debug!( @@ -223,45 +243,44 @@ where self.last_session_index = Some(session_index); } - let all_authorities = determine_relevant_authorities(ctx, relay_parent).await?; - let our_index = ensure_i_am_an_authority(&self.keystore, &all_authorities).await?; - let other_authorities = { - let mut authorities = all_authorities.clone(); - authorities.swap_remove(our_index); - authorities - }; + let our_index = + ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await?; - self.issue_connection_request(ctx, other_authorities).await; + // Connect to authorities from the past/present/future + { + let mut connections = authorities_past_present_future(ctx, leaf).await?; + + // ... ignoring our own identity. + if let Some(pos) = connections + .iter() + .position(|v| v == &session_info.discovery_keys[our_index]) + { + connections.remove(pos); + } + + self.issue_connection_request(ctx, connections).await; + } if is_new_session { - update_gossip_topology(ctx, our_index, all_authorities, relay_parent).await?; - self.update_authority_status_metrics(leaf, ctx.sender()).await?; + self.update_authority_status_metrics(&session_info).await; + + update_gossip_topology( + ctx, + our_index, + session_info.discovery_keys, + relay_parent, + session_index, + ) + .await?; } } } Ok(()) } - async fn update_authority_status_metrics( - &mut self, - leaf: Hash, - sender: &mut impl SubsystemSender, - ) -> Result<(), util::Error> { - if let Some(session_info) = util::request_session_info( - leaf, - self.last_session_index - .expect("Last session index is always set on every session index change"), - sender, - ) - .await - .await?? - { - let maybe_index = match ensure_i_am_an_authority( - &self.keystore, - &session_info.discovery_keys, - ) - .await - { + async fn update_authority_status_metrics(&mut self, session_info: &SessionInfo) { + let maybe_index = + match ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await { Ok(index) => { self.metrics.on_is_authority(); Some(index) @@ -275,21 +294,19 @@ where Err(_) => None, }; - if let Some(validator_index) = maybe_index { - // The subset of authorities participating in parachain consensus. - let parachain_validators_this_session = session_info.validators; + if let Some(validator_index) = maybe_index { + // The subset of authorities participating in parachain consensus. + let parachain_validators_this_session = session_info.validators.len(); - // First `maxValidators` entries are the parachain validators. We'll check - // if our index is in this set to avoid searching for the keys. - // https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148 - if validator_index < parachain_validators_this_session.len() { - self.metrics.on_is_parachain_validator(); - } else { - self.metrics.on_is_not_parachain_validator(); - } + // First `maxValidators` entries are the parachain validators. We'll check + // if our index is in this set to avoid searching for the keys. + // https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148 + if validator_index < parachain_validators_this_session { + self.metrics.on_is_parachain_validator(); + } else { + self.metrics.on_is_not_parachain_validator(); } } - Ok(()) } async fn issue_connection_request( @@ -416,7 +433,8 @@ where } } -async fn determine_relevant_authorities( +// Get the authorities of the past, present, and future. +async fn authorities_past_present_future( ctx: &mut Context, relay_parent: Hash, ) -> Result, util::Error> @@ -428,7 +446,7 @@ where gum::debug!( target: LOG_TARGET, authority_count = ?authorities.len(), - "Determined relevant authorities", + "Determined past/present/future authorities", ); Ok(authorities) } @@ -460,6 +478,7 @@ async fn update_gossip_topology( our_index: usize, authorities: Vec, relay_parent: Hash, + session_index: SessionIndex, ) -> Result<(), util::Error> where Context: SubsystemContext, @@ -493,11 +512,18 @@ where .expect("our_index < len; indices contains it; qed"); let neighbors = matrix_neighbors(our_shuffled_position, len); - let row_neighbors = neighbors.row_neighbors.map(|i| authorities[indices[i]].clone()).collect(); - let column_neighbors = - neighbors.column_neighbors.map(|i| authorities[indices[i]].clone()).collect(); + let row_neighbors = neighbors + .row_neighbors + .map(|i| (authorities[indices[i]].clone(), ValidatorIndex::from(i as u32))) + .collect(); + + let column_neighbors = neighbors + .column_neighbors + .map(|i| (authorities[indices[i]].clone(), ValidatorIndex::from(i as u32))) + .collect(); ctx.send_message(NetworkBridgeMessage::NewGossipTopology { + session: session_index, our_neighbors_x: row_neighbors, our_neighbors_y: column_neighbors, }) From dd8a300dd7f80e5bef1374dcbe018f87e1e98576 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 21:14:47 -0500 Subject: [PATCH 11/72] expand in comment --- node/network/gossip-support/src/lib.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index 9a3d1ff5a0a9..420d971f3186 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -246,7 +246,17 @@ where let our_index = ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await?; - // Connect to authorities from the past/present/future + // Connect to authorities from the past/present/future. + // + // This is maybe not the right place for this logic to live, + // but at the moment we're limited by the network bridge's ability + // to handle connection requests (it only allows one, globally). + // + // Certain network protocols - mostly req/res, but some gossip, + // will require being connected to past/future validators as well + // as current. That is, the old authority sets are not made obsolete + // by virtue of a new session being entered. Therefore we maintain + // connections to a much broader set of validators. { let mut connections = authorities_past_present_future(ctx, leaf).await?; From cfa35322851b2ef2a69d0e8233a6c8d965ae20b5 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 22:18:06 -0500 Subject: [PATCH 12/72] adjust tests and fix index bug --- node/network/gossip-support/src/lib.rs | 6 +- node/network/gossip-support/src/tests.rs | 150 +++++++++++++++-------- 2 files changed, 100 insertions(+), 56 deletions(-) diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index 420d971f3186..05c7b3f22b79 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -524,12 +524,14 @@ where let neighbors = matrix_neighbors(our_shuffled_position, len); let row_neighbors = neighbors .row_neighbors - .map(|i| (authorities[indices[i]].clone(), ValidatorIndex::from(i as u32))) + .map(|i| indices[i]) + .map(|i| (authorities[i].clone(), ValidatorIndex::from(i as u32))) .collect(); let column_neighbors = neighbors .column_neighbors - .map(|i| (authorities[indices[i]].clone(), ValidatorIndex::from(i as u32))) + .map(|i| indices[i]) + .map(|i| (authorities[i].clone(), ValidatorIndex::from(i as u32))) .collect(); ctx.send_message(NetworkBridgeMessage::NewGossipTopology { diff --git a/node/network/gossip-support/src/tests.rs b/node/network/gossip-support/src/tests.rs index 506b089e3864..a85874b03a59 100644 --- a/node/network/gossip-support/src/tests.rs +++ b/node/network/gossip-support/src/tests.rs @@ -38,34 +38,39 @@ use test_helpers::mock::make_ferdie_keystore; use super::*; +const AUTHORITY_KEYRINGS: &[Sr25519Keyring] = &[ + Sr25519Keyring::Alice, + Sr25519Keyring::Bob, + Sr25519Keyring::Charlie, + Sr25519Keyring::Eve, + Sr25519Keyring::One, + Sr25519Keyring::Two, + Sr25519Keyring::Ferdie, +]; + lazy_static! { static ref MOCK_AUTHORITY_DISCOVERY: MockAuthorityDiscovery = MockAuthorityDiscovery::new(); - static ref AUTHORITIES: Vec = { - let mut authorities = OTHER_AUTHORITIES.clone(); - authorities.push(Sr25519Keyring::Ferdie.public().into()); - authorities + static ref AUTHORITIES: Vec = + AUTHORITY_KEYRINGS.iter().map(|k| k.public().into()).collect(); + + static ref OTHER_AUTHORITIES: Vec = { + let mut a = AUTHORITIES.clone(); + a.pop(); // remove FERDIE. + a }; - static ref OTHER_AUTHORITIES: Vec = vec![ - Sr25519Keyring::Alice.public().into(), - Sr25519Keyring::Bob.public().into(), - Sr25519Keyring::Charlie.public().into(), - Sr25519Keyring::Eve.public().into(), - Sr25519Keyring::One.public().into(), - Sr25519Keyring::Two.public().into(), - ]; // [2 6] // [4 5] // [1 3] // [0 ] - static ref ROW_NEIGHBORS: Vec = vec![ - Sr25519Keyring::Charlie.public().into(), + static ref ROW_NEIGHBORS: Vec<(AuthorityDiscoveryId, ValidatorIndex)> = vec![ + (Sr25519Keyring::Charlie.public().into(), ValidatorIndex::from(2)), ]; - static ref COLUMN_NEIGHBORS: Vec = vec![ - Sr25519Keyring::Two.public().into(), - Sr25519Keyring::Eve.public().into(), + static ref COLUMN_NEIGHBORS: Vec<(AuthorityDiscoveryId, ValidatorIndex)> = vec![ + (Sr25519Keyring::Two.public().into(), ValidatorIndex::from(5)), + (Sr25519Keyring::Eve.public().into(), ValidatorIndex::from(3)), ]; } @@ -188,13 +193,32 @@ async fn overseer_signal_active_leaves(overseer: &mut VirtualOverseer, leaf: Has .expect("signal send timeout"); } +fn make_session_info() -> SessionInfo { + let all_validator_indices: Vec<_> = (0..6).map(ValidatorIndex::from).collect(); + SessionInfo { + active_validator_indices: all_validator_indices.clone(), + random_seed: [0; 32], + dispute_period: 6, + validators: AUTHORITY_KEYRINGS.iter().map(|k| k.public().into()).collect(), + discovery_keys: AUTHORITIES.clone(), + assignment_keys: AUTHORITY_KEYRINGS.iter().map(|k| k.public().into()).collect(), + validator_groups: vec![all_validator_indices], + n_cores: 1, + zeroth_delay_tranche_width: 1, + relay_vrf_modulo_samples: 1, + n_delay_tranches: 1, + no_show_slots: 1, + needed_approvals: 1, + } +} + async fn overseer_recv(overseer: &mut VirtualOverseer) -> AllMessages { let msg = overseer.recv().timeout(TIMEOUT).await.expect("msg recv timeout"); msg } -async fn test_neighbors(overseer: &mut VirtualOverseer) { +async fn test_neighbors(overseer: &mut VirtualOverseer, expected_session: SessionIndex) { assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -218,9 +242,11 @@ async fn test_neighbors(overseer: &mut VirtualOverseer) { assert_matches!( overseer_recv(overseer).await, AllMessages::NetworkBridge(NetworkBridgeMessage::NewGossipTopology { + session: got_session, our_neighbors_x, our_neighbors_y, }) => { + assert_eq!(expected_session, got_session); let mut got_row: Vec<_> = our_neighbors_x.into_iter().collect(); let mut got_column: Vec<_> = our_neighbors_y.into_iter().collect(); got_row.sort(); @@ -248,6 +274,18 @@ fn issues_a_connection_request_on_new_session() { } ); + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + tx.send(Ok(Some(make_session_info()))).unwrap(); + } + ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -270,18 +308,7 @@ fn issues_a_connection_request_on_new_session() { } ); - test_neighbors(overseer).await; - - assert_matches!( - overseer_recv(overseer).await, - AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::SessionInfo(1, sender), - )) => { - assert_eq!(relay_parent, hash); - sender.send(Ok(None)).unwrap(); - } - ); + test_neighbors(overseer, 1).await; virtual_overseer }); @@ -326,6 +353,18 @@ fn issues_a_connection_request_on_new_session() { } ); + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 2); + tx.send(Ok(Some(make_session_info()))).unwrap(); + } + ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -348,18 +387,7 @@ fn issues_a_connection_request_on_new_session() { } ); - test_neighbors(overseer).await; - - assert_matches!( - overseer_recv(overseer).await, - AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::SessionInfo(2, sender), - )) => { - assert_eq!(relay_parent, hash); - sender.send(Ok(None)).unwrap(); - } - ); + test_neighbors(overseer, 2).await; virtual_overseer }); @@ -420,6 +448,18 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { } ); + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + tx.send(Ok(Some(make_session_info()))).unwrap(); + } + ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -446,18 +486,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { } ); - test_neighbors(overseer).await; - - assert_matches!( - overseer_recv(overseer).await, - AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::SessionInfo(1, sender), - )) => { - assert_eq!(relay_parent, hash); - sender.send(Ok(None)).unwrap(); - } - ); + test_neighbors(overseer, 1).await; virtual_overseer }) @@ -483,6 +512,19 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { tx.send(Ok(1)).unwrap(); } ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + tx.send(Ok(Some(make_session_info()))).unwrap(); + } + ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( From c934eefa124c266bb659f61a09544483e9fc6bf2 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 22:33:36 -0500 Subject: [PATCH 13/72] add past/present/future connection test and clean up code --- Cargo.lock | 1 + node/network/gossip-support/Cargo.toml | 1 + node/network/gossip-support/src/tests.rs | 99 +++++++++++++++++++++--- 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d18327a56656..b67e02aa9e25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6566,6 +6566,7 @@ dependencies = [ "rand_chacha 0.3.1", "sc-network", "sp-application-crypto", + "sp-authority-discovery", "sp-consensus-babe", "sp-core", "sp-keyring", diff --git a/node/network/gossip-support/Cargo.toml b/node/network/gossip-support/Cargo.toml index f37a530d03b3..e130e4777134 100644 --- a/node/network/gossip-support/Cargo.toml +++ b/node/network/gossip-support/Cargo.toml @@ -25,6 +25,7 @@ gum = { package = "tracing-gum", path = "../../gum" } sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-consensus-babe = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-authority-discovery = { git = "https://github.com/paritytech/substrate", branch = "master" } polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } diff --git a/node/network/gossip-support/src/tests.rs b/node/network/gossip-support/src/tests.rs index a85874b03a59..7962903c5fb8 100644 --- a/node/network/gossip-support/src/tests.rs +++ b/node/network/gossip-support/src/tests.rs @@ -24,6 +24,8 @@ use futures::{executor, future, Future}; use lazy_static::lazy_static; use sc_network::multiaddr::Protocol; +use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; +use sp_core::crypto::Pair as PairT; use sp_consensus_babe::{AllowedSlots, BabeEpochConfiguration, Epoch as BabeEpoch}; use sp_keyring::Sr25519Keyring; @@ -53,12 +55,19 @@ lazy_static! { static ref AUTHORITIES: Vec = AUTHORITY_KEYRINGS.iter().map(|k| k.public().into()).collect(); - static ref OTHER_AUTHORITIES: Vec = { + static ref AUTHORITIES_WITHOUT_US: Vec = { let mut a = AUTHORITIES.clone(); a.pop(); // remove FERDIE. a }; + static ref PAST_PRESENT_FUTURE_AUTHORITIES: Vec = { + (0..50) + .map(|_| AuthorityDiscoveryPair::generate().0.public()) + .chain(AUTHORITIES.clone()) + .collect() + }; + // [2 6] // [4 5] // [1 3] @@ -85,7 +94,7 @@ struct MockAuthorityDiscovery { impl MockAuthorityDiscovery { fn new() -> Self { let authorities: HashMap<_, _> = - AUTHORITIES.clone().into_iter().map(|a| (PeerId::random(), a)).collect(); + PAST_PRESENT_FUTURE_AUTHORITIES.clone().into_iter().map(|a| (PeerId::random(), a)).collect(); let addrs = authorities .clone() .into_iter() @@ -117,10 +126,10 @@ impl AuthorityDiscovery for MockAuthorityDiscovery { } } -async fn get_other_authorities_addrs() -> Vec> { - let mut addrs = Vec::with_capacity(OTHER_AUTHORITIES.len()); +async fn get_multiaddrs(authorities: Vec) -> Vec> { + let mut addrs = Vec::with_capacity(authorities.len()); let mut discovery = MOCK_AUTHORITY_DISCOVERY.clone(); - for authority in OTHER_AUTHORITIES.iter().cloned() { + for authority in authorities.into_iter() { if let Some(addr) = discovery.get_addresses_by_authority_id(authority).await { addrs.push(addr); } @@ -128,10 +137,10 @@ async fn get_other_authorities_addrs() -> Vec> { addrs } -async fn get_other_authorities_addrs_map() -> HashMap> { - let mut addrs = HashMap::with_capacity(OTHER_AUTHORITIES.len()); +async fn get_address_map(authorities: Vec) -> HashMap> { + let mut addrs = HashMap::with_capacity(authorities.len()); let mut discovery = MOCK_AUTHORITY_DISCOVERY.clone(); - for authority in OTHER_AUTHORITIES.iter().cloned() { + for authority in authorities.into_iter() { if let Some(addr) = discovery.get_addresses_by_authority_id(authority.clone()).await { addrs.insert(authority, addr); } @@ -303,7 +312,7 @@ fn issues_a_connection_request_on_new_session() { validator_addrs, peer_set, }) => { - assert_eq!(validator_addrs, get_other_authorities_addrs().await); + assert_eq!(validator_addrs, get_multiaddrs(AUTHORITIES_WITHOUT_US.clone()).await); assert_eq!(peer_set, PeerSet::Validation); } ); @@ -382,7 +391,7 @@ fn issues_a_connection_request_on_new_session() { validator_addrs, peer_set, }) => { - assert_eq!(validator_addrs, get_other_authorities_addrs().await); + assert_eq!(validator_addrs, get_multiaddrs(AUTHORITIES_WITHOUT_US.clone()).await); assert_eq!(peer_set, PeerSet::Validation); } ); @@ -395,6 +404,72 @@ fn issues_a_connection_request_on_new_session() { assert!(state.last_failure.is_none()); } +#[test] +fn issues_connection_request_to_past_present_future() { + let hash = Hash::repeat_byte(0xAA); + test_harness(make_subsystem(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + overseer_signal_active_leaves(overseer, hash).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionIndexForChild(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(1)).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + tx.send(Ok(Some(make_session_info()))).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Authorities(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(PAST_PRESENT_FUTURE_AUTHORITIES.clone())).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToResolvedValidators { + validator_addrs, + peer_set, + }) => { + let all_without_ferdie: Vec<_> = PAST_PRESENT_FUTURE_AUTHORITIES + .iter() + .cloned() + .filter(|p| p != &Sr25519Keyring::Ferdie.public().into()) + .collect(); + + let addrs = get_multiaddrs(all_without_ferdie).await; + + assert_eq!(validator_addrs, addrs); + assert_eq!(peer_set, PeerSet::Validation); + } + ); + + // Ensure neighbors are unaffected + test_neighbors(overseer, 1).await; + + virtual_overseer + }); +} + #[test] fn test_log_output() { sp_tracing::try_init_simple(); @@ -477,7 +552,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { validator_addrs, peer_set, }) => { - let mut expected = get_other_authorities_addrs_map().await; + let mut expected = get_address_map(AUTHORITIES_WITHOUT_US.clone()).await; expected.remove(&alice); expected.remove(&bob); let expected: HashSet = expected.into_iter().map(|(_,v)| v.into_iter()).flatten().collect(); @@ -542,7 +617,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { validator_addrs, peer_set, }) => { - let mut expected = get_other_authorities_addrs_map().await; + let mut expected = get_address_map(AUTHORITIES_WITHOUT_US.clone()).await; expected.remove(&bob); let expected: HashSet = expected.into_iter().map(|(_,v)| v.into_iter()).flatten().collect(); assert_eq!(validator_addrs.into_iter().map(|v| v.into_iter()).flatten().collect::>(), expected); From 6531322aea8ec59f3915033d093af7bd96b1fdf8 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 22:38:04 -0500 Subject: [PATCH 14/72] fmt --- node/network/gossip-support/src/tests.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/node/network/gossip-support/src/tests.rs b/node/network/gossip-support/src/tests.rs index 7962903c5fb8..72c1dff85ad6 100644 --- a/node/network/gossip-support/src/tests.rs +++ b/node/network/gossip-support/src/tests.rs @@ -25,8 +25,8 @@ use lazy_static::lazy_static; use sc_network::multiaddr::Protocol; use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; -use sp_core::crypto::Pair as PairT; use sp_consensus_babe::{AllowedSlots, BabeEpochConfiguration, Epoch as BabeEpoch}; +use sp_core::crypto::Pair as PairT; use sp_keyring::Sr25519Keyring; use polkadot_node_subsystem::{ @@ -93,8 +93,11 @@ struct MockAuthorityDiscovery { impl MockAuthorityDiscovery { fn new() -> Self { - let authorities: HashMap<_, _> = - PAST_PRESENT_FUTURE_AUTHORITIES.clone().into_iter().map(|a| (PeerId::random(), a)).collect(); + let authorities: HashMap<_, _> = PAST_PRESENT_FUTURE_AUTHORITIES + .clone() + .into_iter() + .map(|a| (PeerId::random(), a)) + .collect(); let addrs = authorities .clone() .into_iter() @@ -137,7 +140,9 @@ async fn get_multiaddrs(authorities: Vec) -> Vec) -> HashMap> { +async fn get_address_map( + authorities: Vec, +) -> HashMap> { let mut addrs = HashMap::with_capacity(authorities.len()); let mut discovery = MOCK_AUTHORITY_DISCOVERY.clone(); for authority in authorities.into_iter() { From a3214b9752799ccb0bea740423ab47217efed1da Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 22:38:21 -0500 Subject: [PATCH 15/72] network bridge: updated types --- node/network/bridge/src/lib.rs | 37 +++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index ca734a91aaaa..d2df8bac5fd4 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -34,7 +34,10 @@ use polkadot_overseer::gen::{OverseerError, Subsystem}; use polkadot_primitives::v2::{BlockNumber, Hash}; use polkadot_subsystem::{ errors::{SubsystemError, SubsystemResult}, - messages::{AllMessages, CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage}, + messages::{ + network_bridge_event::{NewGossipTopology, TopologyPeerInfo}, + AllMessages, CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage, + }, overseer, ActivatedLeaf, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, SubsystemSender, }; @@ -45,7 +48,7 @@ use polkadot_subsystem::{ pub use polkadot_node_network_protocol::peer_set::{peer_sets_info, IsAuthority}; use std::{ - collections::{hash_map, HashMap, HashSet}, + collections::{hash_map, HashMap}, sync::Arc, }; @@ -590,6 +593,7 @@ where ).await; } NetworkBridgeMessage::NewGossipTopology { + session, our_neighbors_x, our_neighbors_y, } => { @@ -602,36 +606,45 @@ where ); let ads = &mut authority_discovery_service; - let mut gossip_peers_x = HashSet::with_capacity(our_neighbors_x.len()); - let mut gossip_peers_y = HashSet::with_capacity(our_neighbors_y.len()); + let mut gossip_peers_x = HashMap::with_capacity(our_neighbors_x.len()); + let mut gossip_peers_y = HashMap::with_capacity(our_neighbors_y.len()); - for authority in our_neighbors_x { + for (authority, validator_index) in our_neighbors_x { let addr = get_peer_id_by_authority_id( ads, authority.clone(), ).await; if let Some(peer_id) = addr { - gossip_peers_x.insert(peer_id); + gossip_peers_x.insert(authority, TopologyPeerInfo { + peer_ids: vec![peer_id], + validator_index, + }); } } - for authority in our_neighbors_y { + for (authority, validator_index) in our_neighbors_y { let addr = get_peer_id_by_authority_id( ads, authority.clone(), ).await; if let Some(peer_id) = addr { - gossip_peers_y.insert(peer_id); + gossip_peers_y.insert(authority, TopologyPeerInfo { + peer_ids: vec![peer_id], + validator_index, + }); } } dispatch_validation_event_to_all_unbounded( - NetworkBridgeEvent::NewGossipTopology { - our_neighbors_x: gossip_peers_x, - our_neighbors_y: gossip_peers_y, - }, + NetworkBridgeEvent::NewGossipTopology( + NewGossipTopology { + session, + our_neighbors_x: gossip_peers_x, + our_neighbors_y: gossip_peers_y, + } + ), ctx.sender(), ); } From 2b5c78765f47c9eeb1c25e69e3682033b1cc2fd0 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 22:44:18 -0500 Subject: [PATCH 16/72] update protocols to new gossip topology message --- node/network/approval-distribution/src/lib.rs | 8 ++++++-- node/network/bitfield-distribution/src/lib.rs | 8 ++++++-- node/network/statement-distribution/src/lib.rs | 8 ++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 71a4842ae986..9c1456f45a12 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -215,12 +215,16 @@ impl State { entry.known_by.remove(&peer_id); }) }, - NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { + NetworkBridgeEvent::NewGossipTopology(topology) => { // TODO [now]: update shared dimension of all peers. // TODO [now]: update broadcast dimensions of all messages // TODO [now]: broadcast messages along new dimensions. + let peers: HashSet = topology.our_neighbors_x + .values() + .chain(topology.our_neighbors_y.values()) + .flat_map(|peer_info| peer_info.peer_ids.iter().cloned()) + .collect(); - let peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); let newly_added: Vec = peers.difference(&self.gossip_peers).cloned().collect(); self.gossip_peers = peers; diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs index 7b65a4aace3b..27e62f023b4f 100644 --- a/node/network/bitfield-distribution/src/lib.rs +++ b/node/network/bitfield-distribution/src/lib.rs @@ -523,8 +523,12 @@ async fn handle_network_msg( // get rid of superfluous data state.peer_views.remove(&peer); }, - NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { - let peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); + NetworkBridgeEvent::NewGossipTopology(topology) => { + let peers: HashSet = topology.our_neighbors_x + .values() + .chain(topology.our_neighbors_y.values()) + .flat_map(|peer_info| peer_info.peer_ids.iter().cloned()) + .collect(); let newly_added: Vec = peers.difference(&state.gossip_peers).cloned().collect(); state.gossip_peers = peers; for new_peer in newly_added { diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 6a92953c5704..2dd30aecad1b 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -1623,8 +1623,12 @@ async fn handle_network_update( }); } }, - NetworkBridgeEvent::NewGossipTopology { our_neighbors_x, our_neighbors_y } => { - let new_peers: HashSet<_> = our_neighbors_x.union(&our_neighbors_y).cloned().collect(); + NetworkBridgeEvent::NewGossipTopology(topology) => { + let new_peers: HashSet = topology.our_neighbors_x + .values() + .chain(topology.our_neighbors_y.values()) + .flat_map(|peer_info| peer_info.peer_ids.iter().cloned()) + .collect(); let _ = metrics.time_network_bridge_update_v1("new_gossip_topology"); let newly_added: Vec = new_peers.difference(gossip_peers).cloned().collect(); *gossip_peers = new_peers; From 8a4d0ac2e76264e7498c993be77ccaba22f605ac Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 22 Mar 2022 22:46:49 -0500 Subject: [PATCH 17/72] guide updates --- .../implementers-guide/src/types/network.md | 22 +++++++++++++++---- .../src/types/overseer-protocol.md | 2 ++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/roadmap/implementers-guide/src/types/network.md b/roadmap/implementers-guide/src/types/network.md index a48177b60156..772f040b764a 100644 --- a/roadmap/implementers-guide/src/types/network.md +++ b/roadmap/implementers-guide/src/types/network.md @@ -142,6 +142,23 @@ enum CollationProtocolV1 { These updates are posted from the [Network Bridge Subsystem](../node/utility/network-bridge.md) to other subsystems based on registered listeners. ```rust +struct NewGossipTopology { + /// The session index this topology corresponds to. + session: SessionIndex, + /// Neighbors in the 'X' dimension of the grid. + our_neighbors_x: HashMap, + /// Neighbors in the 'Y' dimension of the grid. + our_neighbors_y: HashMap, +} + +struct TopologyPeerInfo { + /// The validator's known peer IDs. + peer_ids: Vec, + /// The index of the validator in the discovery keys of the corresponding + /// `SessionInfo`. + validator_index: ValidatorIndex, +} + enum NetworkBridgeEvent { /// A peer with given ID is now connected. PeerConnected(PeerId, ObservedRole, Option>), @@ -154,10 +171,7 @@ enum NetworkBridgeEvent { /// /// Note, that the distribution subsystems need to handle the last /// view update of the newly added gossip peers manually. - NewGossipTopology { - our_neighbors_x: HashSet, - our_neighbors_y: HashSet, - } + NewGossipTopology(NewGossipTopology), /// We received a message from the given peer. PeerMessage(PeerId, M), /// The given peer has updated its description of its view. diff --git a/roadmap/implementers-guide/src/types/overseer-protocol.md b/roadmap/implementers-guide/src/types/overseer-protocol.md index 6528e375fd3e..534b828a6c10 100644 --- a/roadmap/implementers-guide/src/types/overseer-protocol.md +++ b/roadmap/implementers-guide/src/types/overseer-protocol.md @@ -553,6 +553,8 @@ enum NetworkBridgeMessage { /// Inform the distribution subsystems about the new /// gossip network topology formed. NewGossipTopology { + /// The session this topology corresponds to. + session: SessionIndex, /// Ids of our neighbors in the X dimension of the new gossip topology. /// We're not necessarily connected to all of them, but we should try to be. our_neighbors_x: HashSet, From 3d32758ee7d534babd261a4abcc80fe35228ef59 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 10:22:24 -0500 Subject: [PATCH 18/72] add session to BlockApprovalMeta --- node/core/approval-voting/src/import.rs | 1 + node/core/approval-voting/src/lib.rs | 1 + node/network/approval-distribution/src/lib.rs | 4 +++- node/primitives/src/approval.rs | 4 +++- roadmap/implementers-guide/src/types/overseer-protocol.md | 2 ++ 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/node/core/approval-voting/src/import.rs b/node/core/approval-voting/src/import.rs index cb8dea17a51e..3af81db8784c 100644 --- a/node/core/approval-voting/src/import.rs +++ b/node/core/approval-voting/src/import.rs @@ -543,6 +543,7 @@ pub(crate) async fn handle_new_head( parent_hash: block_header.parent_hash, candidates: included_candidates.iter().map(|(hash, _, _, _)| *hash).collect(), slot, + session: session_index, }); imported_candidates.push(BlockImportedCandidates { diff --git a/node/core/approval-voting/src/lib.rs b/node/core/approval-voting/src/lib.rs index 6037abd2a66a..a4fd49636ed6 100644 --- a/node/core/approval-voting/src/lib.rs +++ b/node/core/approval-voting/src/lib.rs @@ -1006,6 +1006,7 @@ fn distribution_messages_for_activation( parent_hash: block_entry.parent_hash(), candidates: block_entry.candidates().iter().map(|(_, c_hash)| *c_hash).collect(), slot: block_entry.slot(), + session: block_entry.session(), }); for (i, (_, candidate_hash)) in block_entry.candidates().iter().enumerate() { diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 4346dbb1e080..aad37b3a7e73 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -37,7 +37,7 @@ use polkadot_node_subsystem::{ }; use polkadot_node_subsystem_util::{self as util, MIN_GOSSIP_PEERS}; use polkadot_primitives::v2::{ - BlockNumber, CandidateIndex, Hash, ValidatorIndex, ValidatorSignature, + BlockNumber, CandidateIndex, Hash, ValidatorIndex, ValidatorSignature, SessionIndex, }; use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; @@ -184,6 +184,8 @@ struct BlockEntry { knowledge: Knowledge, /// A votes entry for each candidate indexed by [`CandidateIndex`]. candidates: Vec, + /// The session index of this block. + session: SessionIndex, } #[derive(Debug)] diff --git a/node/primitives/src/approval.rs b/node/primitives/src/approval.rs index d53a37ed4e7b..836cda12c8fb 100644 --- a/node/primitives/src/approval.rs +++ b/node/primitives/src/approval.rs @@ -22,7 +22,7 @@ pub use sp_consensus_vrf::schnorrkel::{Randomness, VRFOutput, VRFProof}; use parity_scale_codec::{Decode, Encode}; use polkadot_primitives::v2::{ BlockNumber, CandidateHash, CandidateIndex, CoreIndex, Hash, Header, ValidatorIndex, - ValidatorSignature, + ValidatorSignature, SessionIndex, }; use sp_application_crypto::ByteArray; use sp_consensus_babe as babe_primitives; @@ -128,6 +128,8 @@ pub struct BlockApprovalMeta { pub candidates: Vec, /// The consensus slot of the block. pub slot: Slot, + /// The session of the block. + pub session: SessionIndex, } /// Errors that can occur during the approvals protocol. diff --git a/roadmap/implementers-guide/src/types/overseer-protocol.md b/roadmap/implementers-guide/src/types/overseer-protocol.md index 534b828a6c10..4180f0e20d01 100644 --- a/roadmap/implementers-guide/src/types/overseer-protocol.md +++ b/roadmap/implementers-guide/src/types/overseer-protocol.md @@ -175,6 +175,8 @@ struct BlockApprovalMeta { candidates: Vec, /// The consensus slot of the block. slot: Slot, + /// The session of the block. + session: SessionIndex, } enum ApprovalDistributionMessage { From a645fb3c7adcb1514d17d2d93d038552e622f3b7 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 10:24:57 -0500 Subject: [PATCH 19/72] add session to block info --- node/network/approval-distribution/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index aad37b3a7e73..9d2436389955 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -312,7 +312,9 @@ impl State { parent_hash: meta.parent_hash.clone(), knowledge: Knowledge::default(), candidates, + session: meta.session, }); + new_hashes.insert(meta.hash.clone()); // In case there are duplicates, we should only set this if the entry From 1ec49572d7a2203303e84de0e40a670e16b65bd4 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 11:24:47 -0500 Subject: [PATCH 20/72] refactor knowledge and remove most unify logic --- node/network/approval-distribution/src/lib.rs | 370 +++++------------- 1 file changed, 91 insertions(+), 279 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 9d2436389955..5b4dc1b86a24 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -130,32 +130,54 @@ struct State { recent_outdated_blocks: RecentlyOutdated, } -/// A short description of a validator's assignment or approval. -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -enum MessageFingerprint { - Assignment(Hash, CandidateIndex, ValidatorIndex), - Approval(Hash, CandidateIndex, ValidatorIndex), +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum MessageKind { + Assignment, + Approval, } +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +struct MessageSubject(Hash, CandidateIndex, ValidatorIndex); + #[derive(Debug, Clone, Default)] struct Knowledge { - known_messages: HashSet, + // When there is no entry, this means the message is unknown + // When there is an entry with `MessageKind::Assignment`, the assignment is known. + // When there is an entry with `MessageKind::Approval`, the assignment and approval are known. + known_messages: HashMap, } impl Knowledge { - fn contains(&self, fingerprint: &MessageFingerprint) -> bool { - self.known_messages.contains(fingerprint) + fn contains(&self, message: &MessageSubject, kind: MessageKind) -> bool { + match (kind, self.known_messages.get(message)) { + (_, None) => false, + (MessageKind::Assignment, Some(_)) => true, + (MessageKind::Approval, Some(MessageKind::Assignment)) => false, + (MessageKind::Approval, Some(MessageKind::Approval)) => true, + } } - fn insert(&mut self, fingerprint: MessageFingerprint) -> bool { - self.known_messages.insert(fingerprint) + fn insert(&mut self, message: MessageSubject, kind: MessageKind) -> bool { + match self.known_messages.entry(message) { + hash_map::Entry::Vacant(vacant) => { + vacant.insert(kind); + true + } + hash_map::Entry::Occupied(mut occupied) => { + match (*occupied.get(), kind) { + (MessageKind::Assignment, MessageKind::Assignment) => false, + (MessageKind::Approval, MessageKind::Approval) => false, + (MessageKind::Approval, MessageKind::Assignment) => false, + (MessageKind::Assignment, MessageKind::Approval) => { + *occupied.get_mut() = MessageKind::Approval; + true + } + } + } + } } } -/// The difference of our knowledge and peer's knowledge -/// that is used to send the missing information. -type MissingKnowledge = HashSet; - /// Information that has been circulated to and from a peer. #[derive(Debug, Clone, Default)] struct PeerKnowledge { @@ -166,8 +188,8 @@ struct PeerKnowledge { } impl PeerKnowledge { - fn contains(&self, fingerprint: &MessageFingerprint) -> bool { - self.sent.contains(fingerprint) || self.received.contains(fingerprint) + fn contains(&self, message: &MessageSubject, kind: MessageKind) -> bool { + self.sent.contains(message, kind) || self.received.contains(message, kind) } } @@ -337,8 +359,6 @@ impl State { let view_intersection = View::new(intersection.cloned(), peer_data.view.finalized_number); Self::unify_with_peer( - ctx, - &self.gossip_peers, metrics, &mut self.blocks, peer_id.clone(), @@ -421,7 +441,7 @@ impl State { ); for (assignment, claimed_index) in assignments.into_iter() { if let Some(pending) = self.pending_known.get_mut(&assignment.block_hash) { - let fingerprint = MessageFingerprint::Assignment( + let message_subject = MessageSubject( assignment.block_hash, claimed_index, assignment.validator, @@ -430,7 +450,7 @@ impl State { gum::trace!( target: LOG_TARGET, %peer_id, - ?fingerprint, + ?message_subject, "Pending assignment", ); @@ -461,7 +481,7 @@ impl State { ); for approval_vote in approvals.into_iter() { if let Some(pending) = self.pending_known.get_mut(&approval_vote.block_hash) { - let fingerprint = MessageFingerprint::Approval( + let message_subject = MessageSubject( approval_vote.block_hash, approval_vote.candidate_index, approval_vote.validator, @@ -470,7 +490,7 @@ impl State { gum::trace!( target: LOG_TARGET, %peer_id, - ?fingerprint, + ?message_subject, "Pending approval", ); @@ -495,7 +515,7 @@ impl State { // and has an entry in the `PeerData` struct. async fn handle_peer_view_change( &mut self, - ctx: &mut (impl SubsystemContext + _ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), metrics: &Metrics, peer_id: PeerId, @@ -528,8 +548,6 @@ impl State { } Self::unify_with_peer( - ctx, - &self.gossip_peers, metrics, &mut self.blocks, peer_id.clone(), @@ -586,23 +604,22 @@ impl State { }, }; - // compute a fingerprint of the assignment - let fingerprint = - MessageFingerprint::Assignment(block_hash, claimed_candidate_index, validator_index); + // compute metadata on the assignment. + let message_subject = MessageSubject(block_hash, claimed_candidate_index, validator_index); + let message_kind = MessageKind::Assignment; if let Some(peer_id) = source.peer_id() { // check if our knowledge of the peer already contains this assignment match entry.known_by.entry(peer_id.clone()) { hash_map::Entry::Occupied(mut peer_knowledge) => { let peer_knowledge = peer_knowledge.get_mut(); - if peer_knowledge.contains(&fingerprint) { + if peer_knowledge.contains(&message_subject, message_kind) { // wasn't included before - if !peer_knowledge.received.insert(fingerprint.clone()) { + if !peer_knowledge.received.insert(message_subject.clone(), message_kind) { gum::debug!( target: LOG_TARGET, ?peer_id, - hash = ?block_hash, - ?fingerprint, + ?message_subject, "Duplicate assignment", ); modify_reputation(ctx, peer_id, COST_DUPLICATE_MESSAGE).await; @@ -614,8 +631,7 @@ impl State { gum::debug!( target: LOG_TARGET, ?peer_id, - hash = ?block_hash, - ?fingerprint, + ?message_subject, "Assignment from a peer is out of view", ); modify_reputation(ctx, peer_id.clone(), COST_UNEXPECTED_MESSAGE).await; @@ -623,11 +639,11 @@ impl State { } // if the assignment is known to be valid, reward the peer - if entry.knowledge.known_messages.contains(&fingerprint) { + if entry.knowledge.contains(&message_subject, message_kind) { modify_reputation(ctx, peer_id.clone(), BENEFIT_VALID_MESSAGE).await; if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) { - gum::trace!(target: LOG_TARGET, ?peer_id, ?fingerprint, "Known assignment"); - peer_knowledge.received.insert(fingerprint.clone()); + gum::trace!(target: LOG_TARGET, ?peer_id, ?message_subject, "Known assignment"); + peer_knowledge.received.insert(message_subject, message_kind); } return } @@ -651,13 +667,13 @@ impl State { }; drop(timer); - gum::trace!(target: LOG_TARGET, hash = ?block_hash, ?source, ?fingerprint, ?result, "Checked assignment",); + gum::trace!(target: LOG_TARGET, ?source, ?message_subject, ?result, "Checked assignment",); match result { AssignmentCheckResult::Accepted => { modify_reputation(ctx, peer_id.clone(), BENEFIT_VALID_MESSAGE_FIRST).await; - entry.knowledge.known_messages.insert(fingerprint.clone()); + entry.knowledge.known_messages.insert(message_subject.clone(), message_kind); if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) { - peer_knowledge.received.insert(fingerprint.clone()); + peer_knowledge.received.insert(message_subject.clone(), message_kind); } }, AssignmentCheckResult::AcceptedDuplicate => { @@ -665,7 +681,7 @@ impl State { // There is more than one way each validator can be assigned to each core. // cf. https://github.com/paritytech/polkadot/pull/2160#discussion_r557628699 if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) { - peer_knowledge.received.insert(fingerprint); + peer_knowledge.received.insert(message_subject.clone(), message_kind); } gum::debug!( target: LOG_TARGET, @@ -698,20 +714,20 @@ impl State { }, } } else { - if !entry.knowledge.known_messages.insert(fingerprint.clone()) { + if !entry.knowledge.insert(message_subject.clone(), message_kind) { // if we already imported an assignment, there is no need to distribute it again gum::warn!( target: LOG_TARGET, - ?fingerprint, + ?message_subject, "Importing locally an already known assignment", ); return } else { - gum::debug!(target: LOG_TARGET, ?fingerprint, "Importing locally a new assignment",); + gum::debug!(target: LOG_TARGET, ?message_subject, "Importing locally a new assignment",); } } - // Invariant: none of the peers except for the `source` know about the assignment. + // Invariant: to our knowledge, none of the peers except for the `source` know about the assignment. metrics.on_assignment_imported(); match entry.candidates.get_mut(claimed_candidate_index as usize) { @@ -749,11 +765,11 @@ impl State { let peers = util::choose_random_subset(|e| gossip_peers.contains(e), peers, MIN_GOSSIP_PEERS); - // Add the fingerprint of the assignment to the knowledge of each peer. + // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { // we already filtered peers above, so this should always be Some if let Some(peer_knowledge) = entry.known_by.get_mut(peer) { - peer_knowledge.sent.insert(fingerprint.clone()); + peer_knowledge.sent.insert(message_subject.clone(), message_kind); } } @@ -801,22 +817,16 @@ impl State { }, }; - // compute a fingerprint of the approval - let fingerprint = - MessageFingerprint::Approval(block_hash.clone(), candidate_index, validator_index); + // compute metadata on the assignment. + let message_subject = MessageSubject(block_hash, candidate_index, validator_index); + let message_kind = MessageKind::Approval; if let Some(peer_id) = source.peer_id() { - let assignment_fingerprint = MessageFingerprint::Assignment( - block_hash.clone(), - candidate_index, - validator_index, - ); - - if !entry.knowledge.known_messages.contains(&assignment_fingerprint) { + if !entry.knowledge.contains(&message_subject, MessageKind::Assignment) { gum::debug!( target: LOG_TARGET, ?peer_id, - ?fingerprint, + ?message_subject, "Unknown approval assignment", ); modify_reputation(ctx, peer_id, COST_UNEXPECTED_MESSAGE).await; @@ -827,12 +837,12 @@ impl State { match entry.known_by.entry(peer_id.clone()) { hash_map::Entry::Occupied(mut knowledge) => { let peer_knowledge = knowledge.get_mut(); - if peer_knowledge.contains(&fingerprint) { - if !peer_knowledge.received.insert(fingerprint.clone()) { + if peer_knowledge.contains(&message_subject, message_kind) { + if !peer_knowledge.received.insert(message_subject.clone(), message_kind) { gum::debug!( target: LOG_TARGET, ?peer_id, - ?fingerprint, + ?message_subject, "Duplicate approval", ); @@ -845,7 +855,7 @@ impl State { gum::debug!( target: LOG_TARGET, ?peer_id, - ?fingerprint, + ?message_subject, "Approval from a peer is out of view", ); modify_reputation(ctx, peer_id.clone(), COST_UNEXPECTED_MESSAGE).await; @@ -853,11 +863,11 @@ impl State { } // if the approval is known to be valid, reward the peer - if entry.knowledge.contains(&fingerprint) { - gum::trace!(target: LOG_TARGET, ?peer_id, ?fingerprint, "Known approval"); + if entry.knowledge.contains(&message_subject, message_kind) { + gum::trace!(target: LOG_TARGET, ?peer_id, ?message_subject, "Known approval"); modify_reputation(ctx, peer_id.clone(), BENEFIT_VALID_MESSAGE).await; if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) { - peer_knowledge.received.insert(fingerprint.clone()); + peer_knowledge.received.insert(message_subject.clone(), message_kind); } return } @@ -877,14 +887,14 @@ impl State { }; drop(timer); - gum::trace!(target: LOG_TARGET, ?peer_id, ?fingerprint, ?result, "Checked approval",); + gum::trace!(target: LOG_TARGET, ?peer_id, ?message_subject, ?result, "Checked approval",); match result { ApprovalCheckResult::Accepted => { modify_reputation(ctx, peer_id.clone(), BENEFIT_VALID_MESSAGE_FIRST).await; - entry.knowledge.insert(fingerprint.clone()); + entry.knowledge.insert(message_subject.clone(), message_kind); if let Some(peer_knowledge) = entry.known_by.get_mut(&peer_id) { - peer_knowledge.received.insert(fingerprint.clone()); + peer_knowledge.received.insert(message_subject.clone(), message_kind); } }, ApprovalCheckResult::Bad(error) => { @@ -899,20 +909,20 @@ impl State { }, } } else { - if !entry.knowledge.insert(fingerprint.clone()) { + if !entry.knowledge.insert(message_subject.clone(), message_kind) { // if we already imported an approval, there is no need to distribute it again gum::warn!( target: LOG_TARGET, - ?fingerprint, + ?message_subject, "Importing locally an already known approval", ); return } else { - gum::debug!(target: LOG_TARGET, ?fingerprint, "Importing locally a new approval",); + gum::debug!(target: LOG_TARGET, ?message_subject, "Importing locally a new approval",); } } - // Invariant: none of the peers except for the `source` know about the approval. + // Invariant: to our knowledge, none of the peers except for the `source` know about the approval. metrics.on_approval_imported(); match entry.candidates.get_mut(candidate_index as usize) { @@ -928,7 +938,7 @@ impl State { }, Some(ApprovalState::Approved(..)) => { unreachable!( - "we only insert it after the fingerprint, checked the fingerprint above; qed" + "we only insert it after the metadata, checked the metadata above; qed" ); }, None => { @@ -969,11 +979,11 @@ impl State { let peers = util::choose_random_subset(|e| gossip_peers.contains(e), peers, MIN_GOSSIP_PEERS); - // Add the fingerprint of the assignment to the knowledge of each peer. + // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { // we already filtered peers above, so this should always be Some if let Some(entry) = entry.known_by.get_mut(peer) { - entry.sent.insert(fingerprint.clone()); + entry.sent.insert(message_subject.clone(), message_kind); } } @@ -999,9 +1009,6 @@ impl State { } async fn unify_with_peer( - ctx: &mut (impl SubsystemContext - + overseer::SubsystemContext), - gossip_peers: &HashSet, metrics: &Metrics, entries: &mut HashMap, peer_id: PeerId, @@ -1009,217 +1016,22 @@ impl State { ) { metrics.on_unify_with_peer(); let _timer = metrics.time_unify_with_peer(); - let mut to_send: Vec<(Hash, MissingKnowledge)> = Vec::new(); let view_finalized_number = view.finalized_number; for head in view.into_iter() { let mut block = head; - let interesting_blocks = std::iter::from_fn(|| { - // step 2. + loop { + // TODO [now]: send messages based on required routing and grid dimension. + let entry = match entries.get_mut(&block) { Some(entry) if entry.number > view_finalized_number => entry, - _ => return None, - }; - let missing_knowledge = match entry.known_by.entry(peer_id.clone()) { - hash_map::Entry::Occupied(e) => { - let missing: MissingKnowledge = entry - .knowledge - .known_messages - .iter() - .filter(|m| !e.get().contains(m)) - .cloned() - .collect(); - // step 3. - // We assume if peer's knowledge is complete for block N, - // this is also true for its ancestors. - // This safeguard is needed primarily in case of long finality stalls - // so we don't waste time in a loop for every peer. - if missing.is_empty() { - gum::trace!( - target: LOG_TARGET, - ?block, - ?peer_id, - "Stopping at this block, because peer knows all", - ); - return None - } - missing - }, - // step 4. - hash_map::Entry::Vacant(vacant) => { - let knowledge = PeerKnowledge::default(); - vacant.insert(knowledge); - entry.knowledge.known_messages.clone() - }, + _ => break, }; - // step 5. - let interesting_block = block; - block = entry.parent_hash.clone(); - Some((interesting_block, missing_knowledge)) - }); - to_send.extend(interesting_blocks); - } - - let is_gossip_peer = gossip_peers.contains(&peer_id); - let lucky = is_gossip_peer || - util::gen_ratio( - util::MIN_GOSSIP_PEERS.saturating_sub(gossip_peers.len()), - util::MIN_GOSSIP_PEERS, - ); - if !lucky { - gum::trace!(target: LOG_TARGET, ?peer_id, "Unlucky peer"); - return - } - - // step 6. - // send all assignments and approvals for all candidates in those blocks to the peer - Self::send_gossip_messages_to_peer(entries, ctx, peer_id, to_send).await; - } - - async fn send_gossip_messages_to_peer( - entries: &mut HashMap, - ctx: &mut (impl SubsystemContext - + overseer::SubsystemContext), - peer_id: PeerId, - blocks: Vec<(Hash, MissingKnowledge)>, - ) { - let mut assignments = Vec::new(); - let mut approvals = Vec::new(); - let num_blocks = blocks.len(); - - for (block, missing) in blocks.into_iter() { - let entry = match entries.get_mut(&block) { - Some(entry) => entry, - None => continue, // should be unreachable - }; - gum::trace!( - target: LOG_TARGET, - "Sending all assignments and approvals in block {} to peer {}", - block, - peer_id, - ); - - for (candidate_index, candidate_entry) in entry.candidates.iter().enumerate() { - let candidate_index = candidate_index as u32; - for (validator_index, approval_state) in candidate_entry.approvals.iter() { - let assignment_fingerprint = MessageFingerprint::Assignment( - block.clone(), - candidate_index, - validator_index.clone(), - ); - - match approval_state { - ApprovalState::Assigned(cert) => { - if !missing.contains(&assignment_fingerprint) { - gum::trace!( - target: LOG_TARGET, - ?block, - ?validator_index, - ?candidate_index, - "Skipping sending known assignment", - ); - continue - } - if let Some(p) = entry.known_by.get_mut(&peer_id) { - p.sent.insert(assignment_fingerprint); - } - assignments.push(( - IndirectAssignmentCert { - block_hash: block.clone(), - validator: validator_index.clone(), - cert: cert.clone(), - }, - candidate_index.clone(), - )); - }, - ApprovalState::Approved(assignment_cert, signature) => { - let fingerprint = MessageFingerprint::Approval( - block.clone(), - candidate_index, - validator_index.clone(), - ); - if missing.contains(&assignment_fingerprint) { - if let Some(p) = entry.known_by.get_mut(&peer_id) { - p.sent.insert(assignment_fingerprint); - } - assignments.push(( - IndirectAssignmentCert { - block_hash: block.clone(), - validator: validator_index.clone(), - cert: assignment_cert.clone(), - }, - candidate_index.clone(), - )); - } else { - gum::trace!( - target: LOG_TARGET, - ?block, - ?validator_index, - ?candidate_index, - "Skipping sending known assignment", - ); - } - if missing.contains(&fingerprint) { - if let Some(p) = entry.known_by.get_mut(&peer_id) { - p.sent.insert(fingerprint); - } - approvals.push(IndirectSignedApprovalVote { - block_hash: block.clone(), - validator: validator_index.clone(), - candidate_index: candidate_index.clone(), - signature: signature.clone(), - }); - } else { - gum::trace!( - target: LOG_TARGET, - ?block, - ?validator_index, - ?candidate_index, - "Skipping sending known approval", - ); - } - }, - } - } + entry.known_by.entry(peer_id.clone()).or_default(); + block = entry.parent_hash.clone(); } } - - if !assignments.is_empty() { - gum::trace!( - target: LOG_TARGET, - num = assignments.len(), - ?num_blocks, - ?peer_id, - "Sending assignments to a peer", - ); - - ctx.send_message(NetworkBridgeMessage::SendValidationMessage( - vec![peer_id.clone()], - protocol_v1::ValidationProtocol::ApprovalDistribution( - protocol_v1::ApprovalDistributionMessage::Assignments(assignments), - ), - )) - .await; - } - - if !approvals.is_empty() { - gum::trace!( - target: LOG_TARGET, - num = approvals.len(), - ?num_blocks, - ?peer_id, - "Sending approvals to a peer", - ); - - ctx.send_message(NetworkBridgeMessage::SendValidationMessage( - vec![peer_id], - protocol_v1::ValidationProtocol::ApprovalDistribution( - protocol_v1::ApprovalDistributionMessage::Approvals(approvals), - ), - )) - .await; - } } } From 7d790cc4d94351f840bbe7768140ec7061211fbe Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 12:00:48 -0500 Subject: [PATCH 21/72] start replacing gossip_peers with new SessionTopologies --- node/network/approval-distribution/src/lib.rs | 80 +++++++++++++------ 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 5b4dc1b86a24..4c64ed8875b9 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -32,12 +32,13 @@ use polkadot_node_subsystem::{ ApprovalCheckResult, ApprovalDistributionMessage, ApprovalVotingMessage, AssignmentCheckResult, NetworkBridgeEvent, NetworkBridgeMessage, }, + messages::network_bridge_event::TopologyPeerInfo, overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, SubsystemError, }; use polkadot_node_subsystem_util::{self as util, MIN_GOSSIP_PEERS}; use polkadot_primitives::v2::{ - BlockNumber, CandidateIndex, Hash, ValidatorIndex, ValidatorSignature, SessionIndex, + AuthorityDiscoveryId, BlockNumber, CandidateIndex, Hash, ValidatorIndex, ValidatorSignature, SessionIndex, }; use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; @@ -101,6 +102,43 @@ impl RecentlyOutdated { } } +struct SessionTopology { + our_neighbors_x: HashMap, + our_neighbors_y: HashMap, +} + +#[derive(Default)] +struct SessionTopologies { + inner: HashMap, usize)> +} + +impl SessionTopologies { + fn get_topology(&self, session: SessionIndex) -> Option<&SessionTopology> { + self.inner.get(&session).and_then(|val| val.0.as_ref()) + } + + fn inc_session_refs(&mut self, session: SessionIndex) { + self.inner.entry(session).or_insert((None, 0)).1 += 1; + } + + fn dec_session_refs(&mut self, session: SessionIndex) { + if let hash_map::Entry::Occupied(mut occupied) = self.inner.entry(session) { + occupied.get_mut().1 = occupied.get().1.saturating_sub(1); + if occupied.get().1 == 0 { + let _ = occupied.remove(); + } + } + } + + // No-op if already present. + fn insert_topology(&mut self, session: SessionIndex, topology: SessionTopology) { + let entry = self.inner.entry(session).or_insert((None, 0)); + if entry.0.is_none() { + entry.0 = Some(topology); + } + } +} + /// The [`State`] struct is responsible for tracking the overall state of the subsystem. /// /// It tracks metadata about our view of the unfinalized chain, @@ -122,9 +160,8 @@ struct State { /// Peer data is partially stored here, and partially inline within the [`BlockEntry`]s peer_data: HashMap, - /// Track all our neighbors in the current gossip topology. - /// We're not necessarily connected to all of them. - gossip_peers: HashSet, + /// Topologies for various different sessions. + topologies: SessionTopologies, /// Tracks recently finalized blocks. recent_outdated_blocks: RecentlyOutdated, @@ -266,23 +303,9 @@ impl State { }) }, NetworkBridgeEvent::NewGossipTopology(topology) => { - // TODO [now]: update shared dimension of all peers. - // TODO [now]: update broadcast dimensions of all messages - // TODO [now]: broadcast messages along new dimensions. - let peers: HashSet = topology.our_neighbors_x - .values() - .chain(topology.our_neighbors_y.values()) - .flat_map(|peer_info| peer_info.peer_ids.iter().cloned()) - .collect(); - - let newly_added: Vec = - peers.difference(&self.gossip_peers).cloned().collect(); - self.gossip_peers = peers; - for peer_id in newly_added { - if let Some(peer_data) = self.peer_data.remove(&peer_id) { - self.handle_peer_view_change(ctx, metrics, peer_id, peer_data.view).await; - } - } + // TODO [now]: add to session topologies + // TODO [now]: iterate all blocks in the session and + // update required routing for all messages. and route to necessary peers. }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { self.handle_peer_view_change(ctx, metrics, peer_id, view).await; @@ -337,6 +360,8 @@ impl State { session: meta.session, }); + self.topologies.inc_session_refs(meta.session); + new_hashes.insert(meta.hash.clone()); // In case there are duplicates, we should only set this if the entry @@ -569,7 +594,9 @@ impl State { // now that we pruned `self.blocks_by_number`, let's clean up `self.blocks` too old_blocks.values().flatten().for_each(|relay_block| { self.recent_outdated_blocks.note_outdated(*relay_block); - self.blocks.remove(relay_block); + if let Some(block_entry) = self.blocks.remove(relay_block) { + self.topologies.dec_session_refs(block_entry.session); + } }); } @@ -761,9 +788,10 @@ impl State { .collect::>(); let assignments = vec![(assignment, claimed_candidate_index)]; - let gossip_peers = &self.gossip_peers; + + // TODO [now]: make use of topology let peers = - util::choose_random_subset(|e| gossip_peers.contains(e), peers, MIN_GOSSIP_PEERS); + util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { @@ -975,9 +1003,9 @@ impl State { .filter(|key| maybe_peer_id.as_ref().map_or(true, |id| id != key)) .collect::>(); - let gossip_peers = &self.gossip_peers; + // TODO [now]: just send to peers we've sent assignments to. let peers = - util::choose_random_subset(|e| gossip_peers.contains(e), peers, MIN_GOSSIP_PEERS); + util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { From 6d8717afc529263d9432094e2264332cab63fd90 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 12:39:58 -0500 Subject: [PATCH 22/72] add routing information to message state --- node/network/approval-distribution/src/lib.rs | 142 ++++++++++++------ node/network/bitfield-distribution/src/lib.rs | 3 +- .../network/statement-distribution/src/lib.rs | 3 +- node/primitives/src/approval.rs | 4 +- 4 files changed, 99 insertions(+), 53 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 4c64ed8875b9..ba8048ff7eee 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -29,16 +29,16 @@ use polkadot_node_primitives::approval::{ }; use polkadot_node_subsystem::{ messages::{ - ApprovalCheckResult, ApprovalDistributionMessage, ApprovalVotingMessage, - AssignmentCheckResult, NetworkBridgeEvent, NetworkBridgeMessage, + network_bridge_event::TopologyPeerInfo, ApprovalCheckResult, ApprovalDistributionMessage, + ApprovalVotingMessage, AssignmentCheckResult, NetworkBridgeEvent, NetworkBridgeMessage, }, - messages::network_bridge_event::TopologyPeerInfo, overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, SubsystemError, }; use polkadot_node_subsystem_util::{self as util, MIN_GOSSIP_PEERS}; use polkadot_primitives::v2::{ - AuthorityDiscoveryId, BlockNumber, CandidateIndex, Hash, ValidatorIndex, ValidatorSignature, SessionIndex, + AuthorityDiscoveryId, BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, + ValidatorSignature, }; use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; @@ -67,16 +67,9 @@ pub struct ApprovalDistribution { metrics: Metrics, } -#[derive(Debug, Clone, Copy)] -enum GridDimension { - Row, - Column, -} - #[derive(Default)] struct PeerData { view: View, - shared_dimension: Option, } /// Contains recently finalized @@ -109,7 +102,7 @@ struct SessionTopology { #[derive(Default)] struct SessionTopologies { - inner: HashMap, usize)> + inner: HashMap, usize)>, } impl SessionTopologies { @@ -199,18 +192,16 @@ impl Knowledge { hash_map::Entry::Vacant(vacant) => { vacant.insert(kind); true - } - hash_map::Entry::Occupied(mut occupied) => { - match (*occupied.get(), kind) { - (MessageKind::Assignment, MessageKind::Assignment) => false, - (MessageKind::Approval, MessageKind::Approval) => false, - (MessageKind::Approval, MessageKind::Assignment) => false, - (MessageKind::Assignment, MessageKind::Approval) => { - *occupied.get_mut() = MessageKind::Approval; - true - } - } - } + }, + hash_map::Entry::Occupied(mut occupied) => match (*occupied.get(), kind) { + (MessageKind::Assignment, MessageKind::Assignment) => false, + (MessageKind::Approval, MessageKind::Approval) => false, + (MessageKind::Approval, MessageKind::Assignment) => false, + (MessageKind::Assignment, MessageKind::Approval) => { + *occupied.get_mut() = MessageKind::Approval; + true + }, + }, } } } @@ -253,12 +244,38 @@ enum ApprovalState { Approved(AssignmentCert, ValidatorSignature), } +#[derive(Debug, Clone, Copy, PartialEq)] +enum RequiredRouting { + /// We don't know yet, because we're waiting for topology info + /// (race condition between learning about the first blocks in a new session + /// and getting the topology for that session) + PendingTopology, + /// Propagate to all peers sharing either the X or Y dimension of the grid. + GridXY, + /// Propagate to all peers sharing the X dimension of the grid. + GridX, + /// Propagate to all peers sharing the Y dimension of the grid. + GridY, + /// No required progation. + None, +} + +// routing state bundled with messages for the candidate. Corresponding assignments +// and approvals are stored together and should be routed in the same way, with +// assignments preceding approvals in all cases. +#[derive(Debug)] +struct MessageState { + required_routing: RequiredRouting, + random_routing: usize, // Number peers to target in random routing. + approval_state: ApprovalState, +} + /// Information about candidates in the context of a particular block they are included in. /// In other words, multiple `CandidateEntry`s may exist for the same candidate, /// if it is included by multiple blocks - this is likely the case when there are forks. #[derive(Debug, Default)] struct CandidateEntry { - approvals: HashMap, + messages: HashMap, } #[derive(Debug, Clone)] @@ -572,13 +589,7 @@ impl State { }); } - Self::unify_with_peer( - metrics, - &mut self.blocks, - peer_id.clone(), - view, - ) - .await; + Self::unify_with_peer(metrics, &mut self.blocks, peer_id.clone(), view).await; } fn handle_block_finalized(&mut self, finalized_number: BlockNumber) { @@ -694,7 +705,13 @@ impl State { }; drop(timer); - gum::trace!(target: LOG_TARGET, ?source, ?message_subject, ?result, "Checked assignment",); + gum::trace!( + target: LOG_TARGET, + ?source, + ?message_subject, + ?result, + "Checked assignment", + ); match result { AssignmentCheckResult::Accepted => { modify_reputation(ctx, peer_id.clone(), BENEFIT_VALID_MESSAGE_FIRST).await; @@ -750,7 +767,11 @@ impl State { ); return } else { - gum::debug!(target: LOG_TARGET, ?message_subject, "Importing locally a new assignment",); + gum::debug!( + target: LOG_TARGET, + ?message_subject, + "Importing locally a new assignment", + ); } } @@ -761,10 +782,14 @@ impl State { Some(candidate_entry) => { // set the approval state for validator_index to Assigned // unless the approval state is set already - candidate_entry - .approvals - .entry(validator_index) - .or_insert_with(|| ApprovalState::Assigned(assignment.cert.clone())); + candidate_entry.messages.entry(validator_index).or_insert_with(|| { + // TODO [now]: do routing. + MessageState { + required_routing: RequiredRouting::None, + random_routing: 0, + approval_state: ApprovalState::Assigned(assignment.cert.clone()), + } + }); }, None => { gum::warn!( @@ -790,8 +815,7 @@ impl State { let assignments = vec![(assignment, claimed_candidate_index)]; // TODO [now]: make use of topology - let peers = - util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); + let peers = util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { @@ -915,7 +939,13 @@ impl State { }; drop(timer); - gum::trace!(target: LOG_TARGET, ?peer_id, ?message_subject, ?result, "Checked approval",); + gum::trace!( + target: LOG_TARGET, + ?peer_id, + ?message_subject, + ?result, + "Checked approval", + ); match result { ApprovalCheckResult::Accepted => { modify_reputation(ctx, peer_id.clone(), BENEFIT_VALID_MESSAGE_FIRST).await; @@ -946,7 +976,11 @@ impl State { ); return } else { - gum::debug!(target: LOG_TARGET, ?message_subject, "Importing locally a new approval",); + gum::debug!( + target: LOG_TARGET, + ?message_subject, + "Importing locally a new approval", + ); } } @@ -957,14 +991,25 @@ impl State { Some(candidate_entry) => { // set the approval state for validator_index to Approved // it should be in assigned state already - match candidate_entry.approvals.remove(&validator_index) { - Some(ApprovalState::Assigned(cert)) => { - candidate_entry.approvals.insert( + match candidate_entry.messages.remove(&validator_index) { + Some(MessageState { + approval_state: ApprovalState::Assigned(cert), + required_routing, + random_routing, + }) => { + candidate_entry.messages.insert( validator_index, - ApprovalState::Approved(cert, vote.signature.clone()), + MessageState { + approval_state: ApprovalState::Approved( + cert, + vote.signature.clone(), + ), + required_routing, + random_routing, + }, ); }, - Some(ApprovalState::Approved(..)) => { + Some(_) => { unreachable!( "we only insert it after the metadata, checked the metadata above; qed" ); @@ -1004,8 +1049,7 @@ impl State { .collect::>(); // TODO [now]: just send to peers we've sent assignments to. - let peers = - util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); + let peers = util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs index 27e62f023b4f..f01557781b30 100644 --- a/node/network/bitfield-distribution/src/lib.rs +++ b/node/network/bitfield-distribution/src/lib.rs @@ -524,7 +524,8 @@ async fn handle_network_msg( state.peer_views.remove(&peer); }, NetworkBridgeEvent::NewGossipTopology(topology) => { - let peers: HashSet = topology.our_neighbors_x + let peers: HashSet = topology + .our_neighbors_x .values() .chain(topology.our_neighbors_y.values()) .flat_map(|peer_info| peer_info.peer_ids.iter().cloned()) diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 2dd30aecad1b..d438af50a70c 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -1624,7 +1624,8 @@ async fn handle_network_update( } }, NetworkBridgeEvent::NewGossipTopology(topology) => { - let new_peers: HashSet = topology.our_neighbors_x + let new_peers: HashSet = topology + .our_neighbors_x .values() .chain(topology.our_neighbors_y.values()) .flat_map(|peer_info| peer_info.peer_ids.iter().cloned()) diff --git a/node/primitives/src/approval.rs b/node/primitives/src/approval.rs index 836cda12c8fb..ab9a4ce012bd 100644 --- a/node/primitives/src/approval.rs +++ b/node/primitives/src/approval.rs @@ -21,8 +21,8 @@ pub use sp_consensus_vrf::schnorrkel::{Randomness, VRFOutput, VRFProof}; use parity_scale_codec::{Decode, Encode}; use polkadot_primitives::v2::{ - BlockNumber, CandidateHash, CandidateIndex, CoreIndex, Hash, Header, ValidatorIndex, - ValidatorSignature, SessionIndex, + BlockNumber, CandidateHash, CandidateIndex, CoreIndex, Hash, Header, SessionIndex, + ValidatorIndex, ValidatorSignature, }; use sp_application_crypto::ByteArray; use sp_consensus_babe as babe_primitives; From 7da64debf70dd7d21b5d9d6fb67b4de5c090d66f Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 21:51:07 -0500 Subject: [PATCH 23/72] add some utilities to SessionTopology --- node/network/approval-distribution/src/lib.rs | 85 +++++++++++++++++-- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index ba8048ff7eee..1316a9caf5c6 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -29,7 +29,7 @@ use polkadot_node_primitives::approval::{ }; use polkadot_node_subsystem::{ messages::{ - network_bridge_event::TopologyPeerInfo, ApprovalCheckResult, ApprovalDistributionMessage, + network_bridge_event, ApprovalCheckResult, ApprovalDistributionMessage, ApprovalVotingMessage, AssignmentCheckResult, NetworkBridgeEvent, NetworkBridgeMessage, }, overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, @@ -96,8 +96,59 @@ impl RecentlyOutdated { } struct SessionTopology { - our_neighbors_x: HashMap, - our_neighbors_y: HashMap, + peers_x: HashSet, + validator_indices_x: HashSet, + peers_y: HashSet, + validator_indices_y: HashSet, +} + +impl SessionTopology { + fn required_routing_for(&self, validator_index: ValidatorIndex) -> RequiredRouting { + let grid_x = self.validator_indices_x.contains(&validator_index); + let grid_y = self.validator_indices_y.contains(&validator_index); + + match (grid_x, grid_y) { + (false, false) => RequiredRouting::None, + (true, false) => RequiredRouting::GridX, + (false, true) => RequiredRouting::GridY, + (true, true) => RequiredRouting::GridXY, // if the grid works as expected, this shouldn't happen. + } + } + + // Get a filter function based on this topology and the required routing + // which returns `true` for peers that are within the required routing set + // and false otherwise. + fn peer_filter<'a>( + &'a self, + required_routing: RequiredRouting, + ) -> impl Fn(&PeerId) -> bool + 'a { + let (grid_x, grid_y) = match required_routing { + RequiredRouting::GridX => (true, false), + RequiredRouting::GridY => (false, true), + RequiredRouting::GridXY => (true, true), + RequiredRouting::None | RequiredRouting::PendingTopology => (false, false), + }; + + move |peer| { + (grid_x && self.peers_x.contains(peer)) || (grid_y && self.peers_y.contains(peer)) + } + } +} + +impl From for SessionTopology { + fn from(topology: network_bridge_event::NewGossipTopology) -> Self { + let peers_x = + topology.our_neighbors_x.values().flat_map(|p| &p.peer_ids).cloned().collect(); + let peers_y = + topology.our_neighbors_y.values().flat_map(|p| &p.peer_ids).cloned().collect(); + + let validator_indices_x = + topology.our_neighbors_x.values().map(|p| p.validator_index.clone()).collect(); + let validator_indices_y = + topology.our_neighbors_y.values().map(|p| p.validator_index.clone()).collect(); + + SessionTopology { peers_x, peers_y, validator_indices_x, validator_indices_y } + } } #[derive(Default)] @@ -320,9 +371,14 @@ impl State { }) }, NetworkBridgeEvent::NewGossipTopology(topology) => { - // TODO [now]: add to session topologies - // TODO [now]: iterate all blocks in the session and - // update required routing for all messages. and route to necessary peers. + let session = topology.session; + self.handle_new_session_topology( + ctx, + metrics, + session, + SessionTopology::from(topology), + ) + .await; }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { self.handle_peer_view_change(ctx, metrics, peer_id, view).await; @@ -465,6 +521,22 @@ impl State { } } + async fn handle_new_session_topology( + &mut self, + ctx: &mut (impl SubsystemContext + + overseer::SubsystemContext), + metrics: &Metrics, + session: SessionIndex, + topology: SessionTopology, + ) { + self.topologies.insert_topology(session, topology); + let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); + + // TODO [now]: iterate all blocks in the session + // Update required routing for each. + // Send messages for all with changed required routing. + } + async fn process_incoming_peer_message( &mut self, ctx: &mut (impl SubsystemContext @@ -997,6 +1069,7 @@ impl State { required_routing, random_routing, }) => { + // TODO [now]: distribute candidate_entry.messages.insert( validator_index, MessageState { From 0a3ffa7ddb053095b0c240545dd411a2f1ad9210 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 22:17:38 -0500 Subject: [PATCH 24/72] implement new gossip topology logic --- node/network/approval-distribution/src/lib.rs | 129 +++++++++++++++++- 1 file changed, 126 insertions(+), 3 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 1316a9caf5c6..081a440b54cf 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -295,6 +295,22 @@ enum ApprovalState { Approved(AssignmentCert, ValidatorSignature), } +impl ApprovalState { + fn assignment_cert(&self) -> &AssignmentCert { + match *self { + ApprovalState::Assigned(ref cert) => cert, + ApprovalState::Approved(ref cert, _) => cert, + } + } + + fn approval_signature(&self) -> Option { + match *self { + ApprovalState::Assigned(_) => None, + ApprovalState::Approved(_, ref sig) => Some(sig.clone()), + } + } +} + #[derive(Debug, Clone, Copy, PartialEq)] enum RequiredRouting { /// We don't know yet, because we're waiting for topology info @@ -311,6 +327,16 @@ enum RequiredRouting { None, } +impl RequiredRouting { + // Whether the required routing set is definitely empty. + fn is_empty(self) -> bool { + match self { + RequiredRouting::PendingTopology | RequiredRouting::None => true, + _ => false, + } + } +} + // routing state bundled with messages for the candidate. Corresponding assignments // and approvals are stored together and should be routed in the same way, with // assignments preceding approvals in all cases. @@ -532,9 +558,106 @@ impl State { self.topologies.insert_topology(session, topology); let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); - // TODO [now]: iterate all blocks in the session - // Update required routing for each. - // Send messages for all with changed required routing. + let mut peer_assignments = HashMap::new(); + let mut peer_approvals = HashMap::new(); + + // Iterate all blocks in the session, producing payloads + // for each connected peer. + for (block_hash, block_entry) in &mut self.blocks { + if block_entry.session != session { + continue + } + + // Iterate all messages in all candidates. + for (candidate_index, validator, message_state) in block_entry + .candidates + .iter_mut() + .enumerate() + .flat_map(|(c_i, c)| c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v))) + { + if message_state.required_routing == RequiredRouting::PendingTopology { + message_state.required_routing = + topology.required_routing_for(validator.clone()); + } + + if message_state.required_routing.is_empty() { + continue + } + + // Propagate the message to all peers in the required routing set. + let peer_filter = topology.peer_filter(message_state.required_routing); + let message_subject = + MessageSubject(block_hash.clone(), candidate_index, validator.clone()); + + let assignment_message = ( + IndirectAssignmentCert { + block_hash: block_hash.clone(), + validator: validator.clone(), + cert: message_state.approval_state.assignment_cert().clone(), + }, + candidate_index, + ); + let approval_message = + message_state.approval_state.approval_signature().map(|signature| { + IndirectSignedApprovalVote { + block_hash: block_hash.clone(), + validator: validator.clone(), + candidate_index, + signature, + } + }); + + for (peer, peer_knowledge) in &mut block_entry.known_by { + if !peer_filter(peer) { + continue + } + + if !peer_knowledge.contains(&message_subject, MessageKind::Assignment) { + peer_knowledge + .sent + .insert(message_subject.clone(), MessageKind::Assignment); + peer_assignments + .entry(peer.clone()) + .or_insert_with(Vec::new) + .push(assignment_message.clone()); + } + + if let Some(approval_message) = approval_message.as_ref() { + if !peer_knowledge.contains(&message_subject, MessageKind::Approval) { + peer_knowledge + .sent + .insert(message_subject.clone(), MessageKind::Approval); + peer_approvals + .entry(peer.clone()) + .or_insert_with(Vec::new) + .push(approval_message.clone()); + } + } + } + } + } + + // Send messages in accumulated packets, assignments preceding approvals. + + for (peer, assignments_packet) in peer_assignments { + ctx.send_message(NetworkBridgeMessage::SendValidationMessage( + vec![peer], + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(assignments_packet), + ), + )) + .await; + } + + for (peer, approvals_packet) in peer_approvals { + ctx.send_message(NetworkBridgeMessage::SendValidationMessage( + vec![peer], + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(approvals_packet), + ), + )) + .await; + } } async fn process_incoming_peer_message( From 476c0b0c5e3b014700eb8170509b65b81e05e33b Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 22:57:57 -0500 Subject: [PATCH 25/72] re-implement unify_with_peer --- node/network/approval-distribution/src/lib.rs | 121 +++++++++++++++++- 1 file changed, 116 insertions(+), 5 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 081a440b54cf..afc440334ba7 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -483,8 +483,10 @@ impl State { let view_intersection = View::new(intersection.cloned(), peer_data.view.finalized_number); Self::unify_with_peer( + ctx, metrics, &mut self.blocks, + &self.topologies, peer_id.clone(), view_intersection, ) @@ -752,7 +754,7 @@ impl State { // and has an entry in the `PeerData` struct. async fn handle_peer_view_change( &mut self, - _ctx: &mut (impl SubsystemContext + ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), metrics: &Metrics, peer_id: PeerId, @@ -784,7 +786,15 @@ impl State { }); } - Self::unify_with_peer(metrics, &mut self.blocks, peer_id.clone(), view).await; + Self::unify_with_peer( + ctx, + metrics, + &mut self.blocks, + &self.topologies, + peer_id.clone(), + view, + ) + .await; } fn handle_block_finalized(&mut self, finalized_number: BlockNumber) { @@ -1277,29 +1287,130 @@ impl State { } async fn unify_with_peer( + ctx: &mut (impl SubsystemContext + + overseer::SubsystemContext), metrics: &Metrics, entries: &mut HashMap, + topologies: &SessionTopologies, peer_id: PeerId, view: View, ) { metrics.on_unify_with_peer(); let _timer = metrics.time_unify_with_peer(); + let mut assignments_to_send = Vec::new(); + let mut approvals_to_send = Vec::new(); + let view_finalized_number = view.finalized_number; for head in view.into_iter() { let mut block = head; loop { - // TODO [now]: send messages based on required routing and grid dimension. - + let sent_before = assignments_to_send.len() + approvals_to_send.len(); let entry = match entries.get_mut(&block) { Some(entry) if entry.number > view_finalized_number => entry, _ => break, }; - entry.known_by.entry(peer_id.clone()).or_default(); + let topology = match topologies.get_topology(entry.session) { + Some(t) => t, + None => { + // The gossip topology for a recently entered session might be missing + // as we're still awaiting it from the network subsystems. + // + // We'll send required messages when we get it. + + block = entry.parent_hash.clone(); + continue + }, + }; + + let peer_knowledge = entry.known_by.entry(peer_id.clone()).or_default(); + + // Iterate all messages in all candidates. + for (candidate_index, validator, message_state) in + entry.candidates.iter_mut().enumerate().flat_map(|(c_i, c)| { + c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v)) + }) { + if message_state.required_routing.is_empty() { + continue + } + + // Propagate the message to all peers in the required routing set. + let peer_filter = topology.peer_filter(message_state.required_routing); + if !peer_filter(&peer_id) { + continue + } + + let message_subject = + MessageSubject(block.clone(), candidate_index, validator.clone()); + + let assignment_message = ( + IndirectAssignmentCert { + block_hash: block.clone(), + validator: validator.clone(), + cert: message_state.approval_state.assignment_cert().clone(), + }, + candidate_index, + ); + + let approval_message = + message_state.approval_state.approval_signature().map(|signature| { + IndirectSignedApprovalVote { + block_hash: block.clone(), + validator: validator.clone(), + candidate_index, + signature, + } + }); + + if !peer_knowledge.contains(&message_subject, MessageKind::Assignment) { + peer_knowledge + .sent + .insert(message_subject.clone(), MessageKind::Assignment); + assignments_to_send.push(assignment_message); + } + + if let Some(approval_message) = approval_message { + if !peer_knowledge.contains(&message_subject, MessageKind::Approval) { + peer_knowledge + .sent + .insert(message_subject.clone(), MessageKind::Approval); + approvals_to_send.push(approval_message); + } + } + } + + // If peer's knowledge is complete relative to our knowledge at one block, + // it's complete in its ancestors too. + + let sent_after = assignments_to_send.len() + approvals_to_send.len(); + if sent_before == sent_after { + break + } + block = entry.parent_hash.clone(); } } + + if !assignments_to_send.is_empty() { + ctx.send_message(NetworkBridgeMessage::SendValidationMessage( + vec![peer_id.clone()], + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(assignments_to_send), + ), + )) + .await; + } + + if !approvals_to_send.is_empty() { + ctx.send_message(NetworkBridgeMessage::SendValidationMessage( + vec![peer_id.clone()], + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(approvals_to_send), + ), + )) + .await; + } } } From 6263ab13ab01d5f5f4998f33151083e1bf8e7b65 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 23:10:38 -0500 Subject: [PATCH 26/72] distribute assignments according to topology --- node/network/approval-distribution/src/lib.rs | 60 ++++++++++++------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index afc440334ba7..3230ea4038bc 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -37,8 +37,7 @@ use polkadot_node_subsystem::{ }; use polkadot_node_subsystem_util::{self as util, MIN_GOSSIP_PEERS}; use polkadot_primitives::v2::{ - AuthorityDiscoveryId, BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, - ValidatorSignature, + BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, ValidatorSignature, }; use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; @@ -355,7 +354,7 @@ struct CandidateEntry { messages: HashMap, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] enum MessageSource { Peer(PeerId), Local, @@ -398,13 +397,8 @@ impl State { }, NetworkBridgeEvent::NewGossipTopology(topology) => { let session = topology.session; - self.handle_new_session_topology( - ctx, - metrics, - session, - SessionTopology::from(topology), - ) - .await; + self.handle_new_session_topology(ctx, session, SessionTopology::from(topology)) + .await; }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { self.handle_peer_view_change(ctx, metrics, peer_id, view).await; @@ -553,7 +547,6 @@ impl State { &mut self, ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), - metrics: &Metrics, session: SessionIndex, topology: SessionTopology, ) { @@ -983,14 +976,24 @@ impl State { // Invariant: to our knowledge, none of the peers except for the `source` know about the assignment. metrics.on_assignment_imported(); + let topology = self.topologies.get_topology(entry.session); + + let required_routing = if source == MessageSource::Local { + RequiredRouting::GridXY + } else { + topology.map_or(RequiredRouting::PendingTopology, |t| { + t.required_routing_for(validator_index) + }) + }; + match entry.candidates.get_mut(claimed_candidate_index as usize) { Some(candidate_entry) => { // set the approval state for validator_index to Assigned // unless the approval state is set already candidate_entry.messages.entry(validator_index).or_insert_with(|| { - // TODO [now]: do routing. + // TODO [now]: do random routing. MessageState { - required_routing: RequiredRouting::None, + required_routing, random_routing: 0, approval_state: ApprovalState::Assigned(assignment.cert.clone()), } @@ -1006,22 +1009,33 @@ impl State { }, } - // Dispatch a ApprovalDistributionV1Message::Assignment(assignment, candidate_index) - // to all peers in the BlockEntry's known_by set who know about the block, - // excluding the peer in the source, if source has kind MessageSource::Peer. - let maybe_peer_id = source.peer_id(); + let topology = match topology { + None => return, + Some(t) => t, + }; + + if required_routing.is_empty() { + return + } + + // Dispatch the message to all peers in the routing set which + // know the block. + // + // If the topology isn't known yet (race with networking subsystems) + // then messages will be sent when we get it. + + let assignments = vec![(assignment, claimed_candidate_index)]; + let topology_filter = topology.peer_filter(required_routing); + let source_peer = source.peer_id(); + let peers = entry .known_by .keys() + .filter(|p| topology_filter(p)) + .filter(|p| source_peer.as_ref().map_or(true, |source| &source != p)) .cloned() - .filter(|key| maybe_peer_id.as_ref().map_or(true, |id| id != key)) .collect::>(); - let assignments = vec![(assignment, claimed_candidate_index)]; - - // TODO [now]: make use of topology - let peers = util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); - // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { // we already filtered peers above, so this should always be Some From 2e7d0470bce166f4a3b59712958d19c2e2bfd308 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 23 Mar 2022 23:20:41 -0500 Subject: [PATCH 27/72] finish grid topology implementation --- node/network/approval-distribution/src/lib.rs | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 3230ea4038bc..66477ff9c0d7 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -35,7 +35,6 @@ use polkadot_node_subsystem::{ overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, SubsystemError, }; -use polkadot_node_subsystem_util::{self as util, MIN_GOSSIP_PEERS}; use polkadot_primitives::v2::{ BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, ValidatorSignature, }; @@ -1206,7 +1205,7 @@ impl State { // Invariant: to our knowledge, none of the peers except for the `source` know about the approval. metrics.on_approval_imported(); - match entry.candidates.get_mut(candidate_index as usize) { + let required_routing = match entry.candidates.get_mut(candidate_index as usize) { Some(candidate_entry) => { // set the approval state for validator_index to Approved // it should be in assigned state already @@ -1216,7 +1215,6 @@ impl State { required_routing, random_routing, }) => { - // TODO [now]: distribute candidate_entry.messages.insert( validator_index, MessageState { @@ -1228,6 +1226,8 @@ impl State { random_routing, }, ); + + required_routing }, Some(_) => { unreachable!( @@ -1243,6 +1243,8 @@ impl State { ?validator_index, "Importing an approval we don't have an assignment for", ); + + return; }, } }, @@ -1254,23 +1256,33 @@ impl State { ?validator_index, "Expected a candidate entry on import_and_circulate_approval", ); + + return; }, - } + }; // Dispatch a ApprovalDistributionV1Message::Approval(vote) - // to all peers in the BlockEntry's known_by set who know about the block, - // excluding the peer in the source, if source has kind MessageSource::Peer. - let maybe_peer_id = source.peer_id(); + // to all peers required by the topology, with the exception of the source peer. + + let topology = match self.topologies.get_topology(entry.session) { + Some(t) => t, + None => return, + }; + + if required_routing.is_empty() { + return + } + + let topology_filter = topology.peer_filter(required_routing); + let source_peer = source.peer_id(); let peers = entry .known_by .keys() + .filter(|p| topology_filter(p)) + .filter(|p| source_peer.as_ref().map_or(true, |source| &source != p)) .cloned() - .filter(|key| maybe_peer_id.as_ref().map_or(true, |id| id != key)) .collect::>(); - // TODO [now]: just send to peers we've sent assignments to. - let peers = util::choose_random_subset(|e| true, peers, MIN_GOSSIP_PEERS); - // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { // we already filtered peers above, so this should always be Some @@ -1279,8 +1291,8 @@ impl State { } } - let approvals = vec![vote]; if !peers.is_empty() { + let approvals = vec![vote]; gum::trace!( target: LOG_TARGET, ?block_hash, From 8e016137337fab8a8db7baf9f29e26615ed91237 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 12:59:59 -0500 Subject: [PATCH 28/72] refactor network bridge slightly --- node/network/bridge/src/lib.rs | 63 +++++++++++++++++----------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index d2df8bac5fd4..0ca57f044c8b 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -31,7 +31,7 @@ use polkadot_node_network_protocol::{ }; use polkadot_node_subsystem_util::metrics::{self, prometheus}; use polkadot_overseer::gen::{OverseerError, Subsystem}; -use polkadot_primitives::v2::{BlockNumber, Hash}; +use polkadot_primitives::v2::{AuthorityDiscoveryId, BlockNumber, Hash, ValidatorIndex}; use polkadot_subsystem::{ errors::{SubsystemError, SubsystemResult}, messages::{ @@ -49,6 +49,7 @@ pub use polkadot_node_network_protocol::peer_set::{peer_sets_info, IsAuthority}; use std::{ collections::{hash_map, HashMap}, + iter::ExactSizeIterator, sync::Arc, }; @@ -605,37 +606,15 @@ where "Gossip topology has changed", ); - let ads = &mut authority_discovery_service; - let mut gossip_peers_x = HashMap::with_capacity(our_neighbors_x.len()); - let mut gossip_peers_y = HashMap::with_capacity(our_neighbors_y.len()); - - for (authority, validator_index) in our_neighbors_x { - let addr = get_peer_id_by_authority_id( - ads, - authority.clone(), - ).await; - - if let Some(peer_id) = addr { - gossip_peers_x.insert(authority, TopologyPeerInfo { - peer_ids: vec![peer_id], - validator_index, - }); - } - } + let gossip_peers_x = update_gossip_peers_1d( + &mut authority_discovery_service, + our_neighbors_x, + ).await; - for (authority, validator_index) in our_neighbors_y { - let addr = get_peer_id_by_authority_id( - ads, - authority.clone(), - ).await; - - if let Some(peer_id) = addr { - gossip_peers_y.insert(authority, TopologyPeerInfo { - peer_ids: vec![peer_id], - validator_index, - }); - } - } + let gossip_peers_y = update_gossip_peers_1d( + &mut authority_discovery_service, + our_neighbors_y, + ).await; dispatch_validation_event_to_all_unbounded( NetworkBridgeEvent::NewGossipTopology( @@ -655,6 +634,28 @@ where } } +async fn update_gossip_peers_1d( + ads: &mut AD, + neighbors: N, +) -> HashMap +where + AD: validator_discovery::AuthorityDiscovery, + N: IntoIterator, + N::IntoIter: std::iter::ExactSizeIterator, +{ + let neighbors = neighbors.into_iter(); + let mut peers = HashMap::with_capacity(neighbors.len()); + for (authority, validator_index) in neighbors { + let addr = get_peer_id_by_authority_id(ads, authority.clone()).await; + + if let Some(peer_id) = addr { + peers.insert(authority, TopologyPeerInfo { peer_ids: vec![peer_id], validator_index }); + } + } + + peers +} + async fn handle_network_messages( mut sender: impl SubsystemSender, mut network_service: impl Network, From d37f2f275280b1ec2419bab4e7cbf70dba424306 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 13:13:32 -0500 Subject: [PATCH 29/72] issue connection requests on all past/present/future --- node/network/gossip-support/src/lib.rs | 39 ++++++++++++++++++-------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index 05c7b3f22b79..cabd5c056b1d 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -243,9 +243,6 @@ where self.last_session_index = Some(session_index); } - let our_index = - ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await?; - // Connect to authorities from the past/present/future. // // This is maybe not the right place for this logic to live, @@ -260,17 +257,18 @@ where { let mut connections = authorities_past_present_future(ctx, leaf).await?; - // ... ignoring our own identity. - if let Some(pos) = connections - .iter() - .position(|v| v == &session_info.discovery_keys[our_index]) - { - connections.remove(pos); + // Remove all of our locally controlled validator indices so we don't connect to ourself. + // If we control none of them, don't issue connection requests - we're outside + // of the 'clique' of recent validators. + if remove_all_controlled(&self.keystore, &mut connections).await != 0 { + self.issue_connection_request(ctx, connections).await; } - - self.issue_connection_request(ctx, connections).await; } + // Gossip topology is only relevant for authorities in the current session. + let our_index = + ensure_i_am_an_authority(&self.keystore, &session_info.discovery_keys).await?; + if is_new_session { self.update_authority_status_metrics(&session_info).await; @@ -475,6 +473,25 @@ async fn ensure_i_am_an_authority( Err(util::Error::NotAValidator) } +/// Filter out all controlled keys in the given set. Returns the number of keys removed. +async fn remove_all_controlled( + keystore: &SyncCryptoStorePtr, + authorities: &mut Vec, +) -> usize { + let mut to_remove = Vec::new(); + for (i, v) in authorities.iter().enumerate() { + if CryptoStore::has_keys(&**keystore, &[(v.to_raw_vec(), AuthorityDiscoveryId::ID)]).await { + to_remove.push(i); + } + } + + for i in to_remove.iter().rev().copied() { + authorities.remove(i); + } + + to_remove.len() +} + /// We partition the list of all sorted `authorities` into `sqrt(len)` groups of `sqrt(len)` size /// and form a matrix where each validator is connected to all validators in its row and column. /// This is similar to `[web3]` research proposed topology, except for the groups are not parachain From 1c39537748c9a75a53115962721ebd89f772489b Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 13:13:36 -0500 Subject: [PATCH 30/72] fmt --- node/network/approval-distribution/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 66477ff9c0d7..9e3bd6e8c01a 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -1244,7 +1244,7 @@ impl State { "Importing an approval we don't have an assignment for", ); - return; + return }, } }, @@ -1257,7 +1257,7 @@ impl State { "Expected a candidate entry on import_and_circulate_approval", ); - return; + return }, }; From 3f5b6c3f3bb3030af04dc91c4f65ebfc94b5262e Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 13:15:21 -0500 Subject: [PATCH 31/72] address grumbles --- node/network/bitfield-distribution/src/lib.rs | 1 + node/network/statement-distribution/src/lib.rs | 1 + node/subsystem-types/src/messages/network_bridge_event.rs | 2 +- roadmap/implementers-guide/src/types/network.md | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs index f01557781b30..c3225a7f7f33 100644 --- a/node/network/bitfield-distribution/src/lib.rs +++ b/node/network/bitfield-distribution/src/lib.rs @@ -524,6 +524,7 @@ async fn handle_network_msg( state.peer_views.remove(&peer); }, NetworkBridgeEvent::NewGossipTopology(topology) => { + // Combine all peers in the x & y direction as we don't make any distinction. let peers: HashSet = topology .our_neighbors_x .values() diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index d438af50a70c..2829e07f8e32 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -1624,6 +1624,7 @@ async fn handle_network_update( } }, NetworkBridgeEvent::NewGossipTopology(topology) => { + // Combine all peers in the x & y direction as we don't make any distinction. let new_peers: HashSet = topology .our_neighbors_x .values() diff --git a/node/subsystem-types/src/messages/network_bridge_event.rs b/node/subsystem-types/src/messages/network_bridge_event.rs index 86f449580487..91facbc6fe1f 100644 --- a/node/subsystem-types/src/messages/network_bridge_event.rs +++ b/node/subsystem-types/src/messages/network_bridge_event.rs @@ -30,7 +30,7 @@ pub struct TopologyPeerInfo { /// The validator's known peer IDs. pub peer_ids: Vec, /// The index of the validator in the discovery keys of the corresponding - /// `SessionInfo`. + /// `SessionInfo`. This can extend _beyond_ the set of active parachain validators. pub validator_index: ValidatorIndex, } diff --git a/roadmap/implementers-guide/src/types/network.md b/roadmap/implementers-guide/src/types/network.md index 772f040b764a..34875816ca34 100644 --- a/roadmap/implementers-guide/src/types/network.md +++ b/roadmap/implementers-guide/src/types/network.md @@ -155,7 +155,7 @@ struct TopologyPeerInfo { /// The validator's known peer IDs. peer_ids: Vec, /// The index of the validator in the discovery keys of the corresponding - /// `SessionInfo`. + /// `SessionInfo`. This can extend _beyond_ the set of active parachain validators. validator_index: ValidatorIndex, } From 481f72ca50d2e1138b86d90f736ce5564d5946ea Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 13:38:08 -0500 Subject: [PATCH 32/72] tighten invariants in unify_with_peer --- node/network/approval-distribution/src/lib.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 9e3bd6e8c01a..4a68ce56cdbc 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -1337,6 +1337,15 @@ impl State { _ => break, }; + // Any peer which is in the `known_by` set has already been + // sent all messages it's meant to get for that block and all + // in-scope prior blocks. + if entry.known_by.contains_key(&peer_id) { + break + } + + let peer_knowledge = entry.known_by.entry(peer_id.clone()).or_default(); + let topology = match topologies.get_topology(entry.session) { Some(t) => t, None => { @@ -1350,8 +1359,6 @@ impl State { }, }; - let peer_knowledge = entry.known_by.entry(peer_id.clone()).or_default(); - // Iterate all messages in all candidates. for (candidate_index, validator, message_state) in entry.candidates.iter_mut().enumerate().flat_map(|(c_i, c)| { @@ -1406,14 +1413,6 @@ impl State { } } - // If peer's knowledge is complete relative to our knowledge at one block, - // it's complete in its ancestors too. - - let sent_after = assignments_to_send.len() + approvals_to_send.len(); - if sent_before == sent_after { - break - } - block = entry.parent_hash.clone(); } } From fc43f4357c287d4e35a7188702e89eaf3fbc513e Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 15:27:57 -0500 Subject: [PATCH 33/72] implement random propagation --- Cargo.lock | 2 + node/network/approval-distribution/Cargo.toml | 2 + node/network/approval-distribution/src/lib.rs | 199 +++++++++++++----- 3 files changed, 147 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 729c685a592f..665cba52dd78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6332,6 +6332,8 @@ dependencies = [ "polkadot-node-subsystem-test-helpers", "polkadot-node-subsystem-util", "polkadot-primitives", + "rand 0.8.5", + "rand_chacha 0.3.1", "rand_core 0.5.1", "schnorrkel", "sp-core", diff --git a/node/network/approval-distribution/Cargo.toml b/node/network/approval-distribution/Cargo.toml index 749e47227a12..9aa5f0bfbeed 100644 --- a/node/network/approval-distribution/Cargo.toml +++ b/node/network/approval-distribution/Cargo.toml @@ -10,6 +10,7 @@ polkadot-node-network-protocol = { path = "../protocol" } polkadot-node-subsystem = { path = "../../subsystem" } polkadot-node-subsystem-util = { path = "../../subsystem-util" } polkadot-primitives = { path = "../../../primitives" } +rand = "0.8" futures = "0.3.21" gum = { package = "tracing-gum", path = "../../gum" } @@ -23,5 +24,6 @@ polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } assert_matches = "1.4.0" schnorrkel = { version = "0.9.1", default-features = false } rand_core = "0.5.1" # should match schnorrkel +rand_chacha = "0.3.1" env_logger = "0.9.0" log = "0.4.13" diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 4a68ce56cdbc..74d9c628cfd3 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -21,6 +21,7 @@ #![warn(missing_docs)] use futures::{channel::oneshot, FutureExt as _}; +use rand::{Rng, CryptoRng, SeedableRng}; use polkadot_node_network_protocol::{ v1 as protocol_v1, PeerId, UnifiedReputationChange as Rep, View, }; @@ -60,6 +61,19 @@ const BENEFIT_VALID_MESSAGE: Rep = Rep::BenefitMinor("Peer sent a valid message" const BENEFIT_VALID_MESSAGE_FIRST: Rep = Rep::BenefitMinorFirst("Valid message with new information"); + +/// The number of peers to randomly propagate messages to. +const RANDOM_CIRCULATION: usize = 8; +/// The sample rate for randomly propagating messages. This +/// reduces leftward skew of the binomial distribution but also +/// introduces a bias towards peers who get a block earlier +/// than others. +const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS; + +/// How far unfinalized a block must be before validators broadcast +/// their messages to all peers. +const AGGRESSIVE_PROPAGATION_THRESHOLD: usize = 10; + /// The Approval Distribution subsystem. pub struct ApprovalDistribution { metrics: Metrics, @@ -335,13 +349,37 @@ impl RequiredRouting { } } +#[derive(Debug, Default, Clone, Copy)] +struct RandomRouting { + // The number of peers to target. + target: usize, + // The number of peers this has been sent to. + sent: usize, +} + +impl RandomRouting { + fn sample(&self, n_peers_total: usize, rng: &mut (impl CryptoRng + Rng)) -> bool { + if n_peers_total == 0 || self.sent >= self.target { + false + } else if RANDOM_SAMPLE_RATE > n_peers_total { + true + } else { + rng.gen_ratio(RANDOM_SAMPLE_RATE as _, n_peers_total as _) + } + } + + fn inc_sent(&mut self) { + self.sent += 1 + } +} + // routing state bundled with messages for the candidate. Corresponding assignments // and approvals are stored together and should be routed in the same way, with // assignments preceding approvals in all cases. #[derive(Debug)] struct MessageState { required_routing: RequiredRouting, - random_routing: usize, // Number peers to target in random routing. + random_routing: RandomRouting, approval_state: ApprovalState, } @@ -380,6 +418,7 @@ impl State { + overseer::SubsystemContext), metrics: &Metrics, event: NetworkBridgeEvent, + rng: &mut (impl CryptoRng + Rng), ) { match event { NetworkBridgeEvent::PeerConnected(peer_id, role, _) => { @@ -400,7 +439,7 @@ impl State { .await; }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { - self.handle_peer_view_change(ctx, metrics, peer_id, view).await; + self.handle_peer_view_change(ctx, metrics, peer_id, view, rng).await; }, NetworkBridgeEvent::OurViewChange(view) => { gum::trace!(target: LOG_TARGET, ?view, "Own view change"); @@ -423,7 +462,7 @@ impl State { }); }, NetworkBridgeEvent::PeerMessage(peer_id, msg) => { - self.process_incoming_peer_message(ctx, metrics, peer_id, msg).await; + self.process_incoming_peer_message(ctx, metrics, peer_id, msg, rng).await; }, } } @@ -434,6 +473,7 @@ impl State { + overseer::SubsystemContext), metrics: &Metrics, metas: Vec, + rng: &mut (impl CryptoRng + Rng), ) { let mut new_hashes = HashSet::new(); for meta in &metas { @@ -480,8 +520,10 @@ impl State { metrics, &mut self.blocks, &self.topologies, + self.peer_data.len(), peer_id.clone(), view_intersection, + rng, ) .await; } @@ -524,6 +566,7 @@ impl State { MessageSource::Peer(peer_id), assignment, claimed_index, + rng, ) .await; }, @@ -661,6 +704,7 @@ impl State { metrics: &Metrics, peer_id: PeerId, msg: protocol_v1::ApprovalDistributionMessage, + rng: &mut (impl CryptoRng + Rng), ) { match msg { protocol_v1::ApprovalDistributionMessage::Assignments(assignments) => { @@ -699,6 +743,7 @@ impl State { MessageSource::Peer(peer_id.clone()), assignment, claimed_index, + rng, ) .await; } @@ -751,6 +796,7 @@ impl State { metrics: &Metrics, peer_id: PeerId, view: View, + rng: &mut (impl CryptoRng + Rng), ) { gum::trace!(target: LOG_TARGET, ?view, "Peer view change"); let finalized_number = view.finalized_number; @@ -783,8 +829,10 @@ impl State { metrics, &mut self.blocks, &self.topologies, + self.peer_data.len(), peer_id.clone(), view, + rng, ) .await; } @@ -816,6 +864,7 @@ impl State { source: MessageSource, assignment: IndirectAssignmentCert, claimed_candidate_index: CandidateIndex, + rng: &mut (impl CryptoRng + Rng), ) { let block_hash = assignment.block_hash.clone(); let validator_index = assignment.validator; @@ -985,18 +1034,20 @@ impl State { }) }; - match entry.candidates.get_mut(claimed_candidate_index as usize) { + let message_state = match entry.candidates.get_mut(claimed_candidate_index as usize) { Some(candidate_entry) => { // set the approval state for validator_index to Assigned // unless the approval state is set already candidate_entry.messages.entry(validator_index).or_insert_with(|| { - // TODO [now]: do random routing. MessageState { required_routing, - random_routing: 0, + random_routing: RandomRouting { + target: RANDOM_CIRCULATION, + sent: 0, + }, approval_state: ApprovalState::Assigned(assignment.cert.clone()), } - }); + }) }, None => { gum::warn!( @@ -1005,18 +1056,11 @@ impl State { ?claimed_candidate_index, "Expected a candidate entry on import_and_circulate_assignment", ); - }, - } - let topology = match topology { - None => return, - Some(t) => t, + return + }, }; - if required_routing.is_empty() { - return - } - // Dispatch the message to all peers in the routing set which // know the block. // @@ -1024,14 +1068,35 @@ impl State { // then messages will be sent when we get it. let assignments = vec![(assignment, claimed_candidate_index)]; - let topology_filter = topology.peer_filter(required_routing); + let topology_filter = topology.as_ref().map(|t| t.peer_filter(required_routing)); + let n_peers_total = self.peer_data.len(); let source_peer = source.peer_id(); + let mut peer_filter = move |peer| { + if Some(peer) == source_peer.as_ref() { + return false + } + + if let Some(true) = topology_filter.as_ref().map(|f| f(peer)) { + return true + } + + // Note: at this point, we haven't received the message from any peers + // other than the source peer, and we just got it, so we haven't sent it + // to any peers either. + let route_random = message_state.random_routing.sample(n_peers_total, rng); + + if route_random { + message_state.random_routing.inc_sent(); + } + + route_random + }; + let peers = entry .known_by .keys() - .filter(|p| topology_filter(p)) - .filter(|p| source_peer.as_ref().map_or(true, |source| &source != p)) + .filter(|p| peer_filter(p)) .cloned() .collect::>(); @@ -1264,22 +1329,34 @@ impl State { // Dispatch a ApprovalDistributionV1Message::Approval(vote) // to all peers required by the topology, with the exception of the source peer. - let topology = match self.topologies.get_topology(entry.session) { - Some(t) => t, - None => return, - }; + let topology = self.topologies.get_topology(entry.session); + let topology_filter = topology.as_ref().map(|t| t.peer_filter(required_routing)); + let source_peer = source.peer_id(); - if required_routing.is_empty() { - return - } + let message_subject = &message_subject; + let peer_filter = move |peer, knowledge: &PeerKnowledge| { + if Some(peer) == source_peer.as_ref() { + return false; + } + + // Here we're leaning on a few behaviors of assignment propagation: + // 1. At this point, the only peer we're aware of which has the approval + // message is the source peer. + // 2. We have sent the assignment message to every peer in the required routing + // which is aware of this block _unless_ the peer we originally received the + // assignment from was part of the required routing. In that case, we've sent + // the assignment to all aware peers in the required routing _except_ the original + // source of the assignment. Hence the `in_topology_check`. + // 3. Any randomly selected peers have been sent the assignment already. + let in_topology = topology_filter.as_ref().map_or(false, |f| f(peer)); + in_topology || knowledge.sent.contains(message_subject, MessageKind::Assignment) + }; - let topology_filter = topology.peer_filter(required_routing); - let source_peer = source.peer_id(); let peers = entry .known_by - .keys() - .filter(|p| topology_filter(p)) - .filter(|p| source_peer.as_ref().map_or(true, |source| &source != p)) + .iter() + .filter(|(p, k)| peer_filter(p, k)) + .map(|(p, _)| p) .cloned() .collect::>(); @@ -1318,8 +1395,10 @@ impl State { metrics: &Metrics, entries: &mut HashMap, topologies: &SessionTopologies, + total_peers: usize, peer_id: PeerId, view: View, + rng: &mut (impl CryptoRng + Rng), ) { metrics.on_unify_with_peer(); let _timer = metrics.time_unify_with_peer(); @@ -1331,7 +1410,6 @@ impl State { for head in view.into_iter() { let mut block = head; loop { - let sent_before = assignments_to_send.len() + approvals_to_send.len(); let entry = match entries.get_mut(&block) { Some(entry) if entry.number > view_finalized_number => entry, _ => break, @@ -1346,32 +1424,35 @@ impl State { let peer_knowledge = entry.known_by.entry(peer_id.clone()).or_default(); - let topology = match topologies.get_topology(entry.session) { - Some(t) => t, - None => { - // The gossip topology for a recently entered session might be missing - // as we're still awaiting it from the network subsystems. - // - // We'll send required messages when we get it. - - block = entry.parent_hash.clone(); - continue - }, - }; + let topology = topologies.get_topology(entry.session); // Iterate all messages in all candidates. for (candidate_index, validator, message_state) in entry.candidates.iter_mut().enumerate().flat_map(|(c_i, c)| { c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v)) }) { - if message_state.required_routing.is_empty() { - continue - } + // Propagate the message to all peers in the required routing set OR + // randomly sample peers. + { + let topology_filter = topology.as_ref().map(|t| t.peer_filter(message_state.required_routing)); + + let random_routing = &mut message_state.random_routing; + let rng = &mut *rng; + let mut peer_filter = move |peer_id| { + let in_topology = topology_filter.as_ref().map_or(false, |f| f(peer_id)); + in_topology || { + let route_random = random_routing.sample(total_peers, rng); + if route_random { + random_routing.inc_sent(); + } + + route_random + } + }; - // Propagate the message to all peers in the required routing set. - let peer_filter = topology.peer_filter(message_state.required_routing); - if !peer_filter(&peer_id) { - continue + if !peer_filter(&peer_id) { + continue + } } let message_subject = @@ -1468,11 +1549,15 @@ impl ApprovalDistribution { Context: overseer::SubsystemContext, { let mut state = State::default(); - self.run_inner(ctx, &mut state).await + + // According to the docs of `rand`, this is a ChaCha12 RNG in practice + // and will always be chosen for strong performance and security properties. + let mut rng = rand::rngs::StdRng::from_entropy(); + self.run_inner(ctx, &mut state, &mut rng).await } /// Used for testing. - async fn run_inner(self, mut ctx: Context, state: &mut State) + async fn run_inner(self, mut ctx: Context, state: &mut State, rng: &mut (impl CryptoRng + Rng)) where Context: SubsystemContext, Context: overseer::SubsystemContext, @@ -1487,7 +1572,7 @@ impl ApprovalDistribution { }; match message { FromOverseer::Communication { msg } => - Self::handle_incoming(&mut ctx, state, msg, &self.metrics).await, + Self::handle_incoming(&mut ctx, state, msg, &self.metrics, rng).await, FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { .. })) => { @@ -1510,16 +1595,17 @@ impl ApprovalDistribution { state: &mut State, msg: ApprovalDistributionMessage, metrics: &Metrics, + rng: &mut (impl CryptoRng + Rng), ) where Context: SubsystemContext, Context: overseer::SubsystemContext, { match msg { ApprovalDistributionMessage::NetworkBridgeUpdateV1(event) => { - state.handle_network_msg(ctx, metrics, event).await; + state.handle_network_msg(ctx, metrics, event, rng).await; }, ApprovalDistributionMessage::NewBlocks(metas) => { - state.handle_new_blocks(ctx, metrics, metas).await; + state.handle_new_blocks(ctx, metrics, metas, rng).await; }, ApprovalDistributionMessage::DistributeAssignment(cert, candidate_index) => { gum::debug!( @@ -1536,6 +1622,7 @@ impl ApprovalDistribution { MessageSource::Local, cert, candidate_index, + rng, ) .await; }, From 023ed7abf3f0f275b8b56a62d66438a54b1ebc0d Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 15:42:18 -0500 Subject: [PATCH 34/72] refactor: extract required routing adjustment logic --- node/network/approval-distribution/src/lib.rs | 267 ++++++++++-------- 1 file changed, 146 insertions(+), 121 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 74d9c628cfd3..a1cf2aa7b39a 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -21,7 +21,6 @@ #![warn(missing_docs)] use futures::{channel::oneshot, FutureExt as _}; -use rand::{Rng, CryptoRng, SeedableRng}; use polkadot_node_network_protocol::{ v1 as protocol_v1, PeerId, UnifiedReputationChange as Rep, View, }; @@ -39,6 +38,7 @@ use polkadot_node_subsystem::{ use polkadot_primitives::v2::{ BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, ValidatorSignature, }; +use rand::{CryptoRng, Rng, SeedableRng}; use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; use self::metrics::Metrics; @@ -61,7 +61,6 @@ const BENEFIT_VALID_MESSAGE: Rep = Rep::BenefitMinor("Peer sent a valid message" const BENEFIT_VALID_MESSAGE_FIRST: Rep = Rep::BenefitMinorFirst("Valid message with new information"); - /// The number of peers to randomly propagate messages to. const RANDOM_CIRCULATION: usize = 8; /// The sample rate for randomly propagating messages. This @@ -595,106 +594,15 @@ impl State { self.topologies.insert_topology(session, topology); let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); - let mut peer_assignments = HashMap::new(); - let mut peer_approvals = HashMap::new(); - - // Iterate all blocks in the session, producing payloads - // for each connected peer. - for (block_hash, block_entry) in &mut self.blocks { - if block_entry.session != session { - continue - } - - // Iterate all messages in all candidates. - for (candidate_index, validator, message_state) in block_entry - .candidates - .iter_mut() - .enumerate() - .flat_map(|(c_i, c)| c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v))) - { - if message_state.required_routing == RequiredRouting::PendingTopology { - message_state.required_routing = - topology.required_routing_for(validator.clone()); - } - - if message_state.required_routing.is_empty() { - continue - } - - // Propagate the message to all peers in the required routing set. - let peer_filter = topology.peer_filter(message_state.required_routing); - let message_subject = - MessageSubject(block_hash.clone(), candidate_index, validator.clone()); - - let assignment_message = ( - IndirectAssignmentCert { - block_hash: block_hash.clone(), - validator: validator.clone(), - cert: message_state.approval_state.assignment_cert().clone(), - }, - candidate_index, - ); - let approval_message = - message_state.approval_state.approval_signature().map(|signature| { - IndirectSignedApprovalVote { - block_hash: block_hash.clone(), - validator: validator.clone(), - candidate_index, - signature, - } - }); - - for (peer, peer_knowledge) in &mut block_entry.known_by { - if !peer_filter(peer) { - continue - } - - if !peer_knowledge.contains(&message_subject, MessageKind::Assignment) { - peer_knowledge - .sent - .insert(message_subject.clone(), MessageKind::Assignment); - peer_assignments - .entry(peer.clone()) - .or_insert_with(Vec::new) - .push(assignment_message.clone()); - } - - if let Some(approval_message) = approval_message.as_ref() { - if !peer_knowledge.contains(&message_subject, MessageKind::Approval) { - peer_knowledge - .sent - .insert(message_subject.clone(), MessageKind::Approval); - peer_approvals - .entry(peer.clone()) - .or_insert_with(Vec::new) - .push(approval_message.clone()); - } - } - } + adjust_required_routing_and_propagate( + ctx, + &mut self.blocks, + &self.topologies, + |block_entry| block_entry.session == session, + |required_routing, validator_index| if *required_routing == RequiredRouting::PendingTopology { + *required_routing = topology.required_routing_for(validator_index.clone()); } - } - - // Send messages in accumulated packets, assignments preceding approvals. - - for (peer, assignments_packet) in peer_assignments { - ctx.send_message(NetworkBridgeMessage::SendValidationMessage( - vec![peer], - protocol_v1::ValidationProtocol::ApprovalDistribution( - protocol_v1::ApprovalDistributionMessage::Assignments(assignments_packet), - ), - )) - .await; - } - - for (peer, approvals_packet) in peer_approvals { - ctx.send_message(NetworkBridgeMessage::SendValidationMessage( - vec![peer], - protocol_v1::ValidationProtocol::ApprovalDistribution( - protocol_v1::ApprovalDistributionMessage::Approvals(approvals_packet), - ), - )) - .await; - } + ).await; } async fn process_incoming_peer_message( @@ -1038,15 +946,10 @@ impl State { Some(candidate_entry) => { // set the approval state for validator_index to Assigned // unless the approval state is set already - candidate_entry.messages.entry(validator_index).or_insert_with(|| { - MessageState { - required_routing, - random_routing: RandomRouting { - target: RANDOM_CIRCULATION, - sent: 0, - }, - approval_state: ApprovalState::Assigned(assignment.cert.clone()), - } + candidate_entry.messages.entry(validator_index).or_insert_with(|| MessageState { + required_routing, + random_routing: RandomRouting { target: RANDOM_CIRCULATION, sent: 0 }, + approval_state: ApprovalState::Assigned(assignment.cert.clone()), }) }, None => { @@ -1093,12 +996,7 @@ impl State { route_random }; - let peers = entry - .known_by - .keys() - .filter(|p| peer_filter(p)) - .cloned() - .collect::>(); + let peers = entry.known_by.keys().filter(|p| peer_filter(p)).cloned().collect::>(); // Add the metadata of the assignment to the knowledge of each peer. for peer in peers.iter() { @@ -1336,7 +1234,7 @@ impl State { let message_subject = &message_subject; let peer_filter = move |peer, knowledge: &PeerKnowledge| { if Some(peer) == source_peer.as_ref() { - return false; + return false } // Here we're leaning on a few behaviors of assignment propagation: @@ -1434,12 +1332,15 @@ impl State { // Propagate the message to all peers in the required routing set OR // randomly sample peers. { - let topology_filter = topology.as_ref().map(|t| t.peer_filter(message_state.required_routing)); + let topology_filter = topology + .as_ref() + .map(|t| t.peer_filter(message_state.required_routing)); let random_routing = &mut message_state.random_routing; let rng = &mut *rng; let mut peer_filter = move |peer_id| { - let in_topology = topology_filter.as_ref().map_or(false, |f| f(peer_id)); + let in_topology = + topology_filter.as_ref().map_or(false, |f| f(peer_id)); in_topology || { let route_random = random_routing.sample(total_peers, rng); if route_random { @@ -1520,6 +1421,126 @@ impl State { } } +// This adjusts the required routing of messages in blocks that pass the block filter +// according to the modifier function given. +// +// Then, if the topology is known, this progates messages to all peers in the required +// routing set which are aware of the block. Peers which are unaware of the block +// will have the message sent when it enters their view in `unify_with_peer`. +// +// Note that the required routing of a message can be modified even if the +// topology is unknown yet. +async fn adjust_required_routing_and_propagate( + ctx: &mut (impl SubsystemContext + + overseer::SubsystemContext), + blocks: &mut HashMap, + topologies: &SessionTopologies, + block_filter: impl Fn(&BlockEntry) -> bool, + routing_modifier: impl Fn(&mut RequiredRouting, &ValidatorIndex), +) { + let mut peer_assignments = HashMap::new(); + let mut peer_approvals = HashMap::new(); + + // Iterate all blocks in the session, producing payloads + // for each connected peer. + for (block_hash, block_entry) in blocks { + if !block_filter(&block_entry) { + continue + } + + // Iterate all messages in all candidates. + for (candidate_index, validator, message_state) in block_entry + .candidates + .iter_mut() + .enumerate() + .flat_map(|(c_i, c)| c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v))) + { + let prev_routing = message_state.required_routing; + routing_modifier(&mut message_state.required_routing, validator); + + if message_state.required_routing.is_empty() || + message_state.required_routing == prev_routing + { + continue + } + + let topology = match topologies.get_topology(block_entry.session) { + Some(t) => t, + None => continue, + }; + + // Propagate the message to all peers in the required routing set. + let peer_filter = topology.peer_filter(message_state.required_routing); + let message_subject = + MessageSubject(block_hash.clone(), candidate_index, validator.clone()); + + let assignment_message = ( + IndirectAssignmentCert { + block_hash: block_hash.clone(), + validator: validator.clone(), + cert: message_state.approval_state.assignment_cert().clone(), + }, + candidate_index, + ); + let approval_message = + message_state.approval_state.approval_signature().map(|signature| { + IndirectSignedApprovalVote { + block_hash: block_hash.clone(), + validator: validator.clone(), + candidate_index, + signature, + } + }); + + for (peer, peer_knowledge) in &mut block_entry.known_by { + if !peer_filter(peer) { + continue + } + + if !peer_knowledge.contains(&message_subject, MessageKind::Assignment) { + peer_knowledge.sent.insert(message_subject.clone(), MessageKind::Assignment); + peer_assignments + .entry(peer.clone()) + .or_insert_with(Vec::new) + .push(assignment_message.clone()); + } + + if let Some(approval_message) = approval_message.as_ref() { + if !peer_knowledge.contains(&message_subject, MessageKind::Approval) { + peer_knowledge.sent.insert(message_subject.clone(), MessageKind::Approval); + peer_approvals + .entry(peer.clone()) + .or_insert_with(Vec::new) + .push(approval_message.clone()); + } + } + } + } + } + + // Send messages in accumulated packets, assignments preceding approvals. + + for (peer, assignments_packet) in peer_assignments { + ctx.send_message(NetworkBridgeMessage::SendValidationMessage( + vec![peer], + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(assignments_packet), + ), + )) + .await; + } + + for (peer, approvals_packet) in peer_approvals { + ctx.send_message(NetworkBridgeMessage::SendValidationMessage( + vec![peer], + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(approvals_packet), + ), + )) + .await; + } +} + /// Modify the reputation of a peer based on its behavior. async fn modify_reputation( ctx: &mut (impl SubsystemContext @@ -1557,8 +1578,12 @@ impl ApprovalDistribution { } /// Used for testing. - async fn run_inner(self, mut ctx: Context, state: &mut State, rng: &mut (impl CryptoRng + Rng)) - where + async fn run_inner( + self, + mut ctx: Context, + state: &mut State, + rng: &mut (impl CryptoRng + Rng), + ) where Context: SubsystemContext, Context: overseer::SubsystemContext, { From 38f1e8235a4eab8dff68089f44393ca06cc00bc5 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 15:59:58 -0500 Subject: [PATCH 35/72] some block-age logic --- node/network/approval-distribution/src/lib.rs | 69 ++++++++++++++++--- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index a1cf2aa7b39a..20972ca34cba 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -71,7 +71,7 @@ const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS /// How far unfinalized a block must be before validators broadcast /// their messages to all peers. -const AGGRESSIVE_PROPAGATION_THRESHOLD: usize = 10; +const AGGRESSIVE_PROPAGATION_THRESHOLD: BlockNumber = 10; /// The Approval Distribution subsystem. pub struct ApprovalDistribution { @@ -137,7 +137,7 @@ impl SessionTopology { RequiredRouting::GridX => (true, false), RequiredRouting::GridY => (false, true), RequiredRouting::GridXY => (true, true), - RequiredRouting::None | RequiredRouting::PendingTopology => (false, false), + RequiredRouting::None | RequiredRouting::PendingTopology(_) => (false, false), }; move |peer| { @@ -327,7 +327,9 @@ enum RequiredRouting { /// We don't know yet, because we're waiting for topology info /// (race condition between learning about the first blocks in a new session /// and getting the topology for that session) - PendingTopology, + /// + /// The `bool` here indicates whether this is a local message or not. + PendingTopology(bool), /// Propagate to all peers sharing either the X or Y dimension of the grid. GridXY, /// Propagate to all peers sharing the X dimension of the grid. @@ -339,10 +341,18 @@ enum RequiredRouting { } impl RequiredRouting { + fn for_local(block_age: BlockNumber) -> Self { + if block_age >= AGGRESSIVE_PROPAGATION_THRESHOLD { + RequiredRouting::GridXY // TODO [now]: ALL variant + } else { + RequiredRouting::GridXY + } + } + // Whether the required routing set is definitely empty. fn is_empty(self) -> bool { match self { - RequiredRouting::PendingTopology | RequiredRouting::None => true, + RequiredRouting::PendingTopology(_) | RequiredRouting::None => true, _ => false, } } @@ -594,13 +604,19 @@ impl State { self.topologies.insert_topology(session, topology); let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); + let blocks_by_number = &self.blocks_by_number; adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, |block_entry| block_entry.session == session, - |required_routing, validator_index| if *required_routing == RequiredRouting::PendingTopology { - *required_routing = topology.required_routing_for(validator_index.clone()); + |block_number, required_routing, validator_index| { + if *required_routing == RequiredRouting::PendingTopology(true) { + let block_age = block_age(blocks_by_number, block_number); + *required_routing = RequiredRouting::for_local(block_age); + } else if *required_routing == RequiredRouting::PendingTopology(false) { + *required_routing = topology.required_routing_for(validator_index.clone()); + } } ).await; } @@ -935,9 +951,10 @@ impl State { let topology = self.topologies.get_topology(entry.session); let required_routing = if source == MessageSource::Local { - RequiredRouting::GridXY + let block_age = block_age(&self.blocks_by_number, entry.number); + topology.map_or(RequiredRouting::PendingTopology(true), |_| RequiredRouting::for_local(block_age)) } else { - topology.map_or(RequiredRouting::PendingTopology, |t| { + topology.map_or(RequiredRouting::PendingTopology(false), |t| { t.required_routing_for(validator_index) }) }; @@ -1421,6 +1438,38 @@ impl State { } } +// Get the age of the given block number relative to the highest stored. +fn block_age( + blocks_by_number: &BTreeMap>, + block_number: BlockNumber, +) -> BlockNumber { + match blocks_by_number.iter().rev().last() { + None => { + gum::warn!( + target: LOG_TARGET, + block_number, + "Asked for block age compared to an empty list", + ); + + 0 + } + Some((most_recent, _)) => { + if *most_recent < block_number { + gum::warn!( + target: LOG_TARGET, + most_recent, + block_number, + "Asked for block age for block newer than most recent", + ); + + 0 + } else { + most_recent - block_number + } + } + } +} + // This adjusts the required routing of messages in blocks that pass the block filter // according to the modifier function given. // @@ -1436,7 +1485,7 @@ async fn adjust_required_routing_and_propagate( blocks: &mut HashMap, topologies: &SessionTopologies, block_filter: impl Fn(&BlockEntry) -> bool, - routing_modifier: impl Fn(&mut RequiredRouting, &ValidatorIndex), + routing_modifier: impl Fn(BlockNumber, &mut RequiredRouting, &ValidatorIndex), ) { let mut peer_assignments = HashMap::new(); let mut peer_approvals = HashMap::new(); @@ -1456,7 +1505,7 @@ async fn adjust_required_routing_and_propagate( .flat_map(|(c_i, c)| c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v))) { let prev_routing = message_state.required_routing; - routing_modifier(&mut message_state.required_routing, validator); + routing_modifier(block_entry.number, &mut message_state.required_routing, validator); if message_state.required_routing.is_empty() || message_state.required_routing == prev_routing From d216a2c8e1cef9412cad76102d413a0d03cc99f1 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 16:16:40 -0500 Subject: [PATCH 36/72] aggressively propagate messages when finality is slow --- node/network/approval-distribution/src/lib.rs | 79 +++++++++++-------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 20972ca34cba..0ac46547fa8f 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -71,7 +71,7 @@ const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS /// How far unfinalized a block must be before validators broadcast /// their messages to all peers. -const AGGRESSIVE_PROPAGATION_THRESHOLD: BlockNumber = 10; +const AGGRESSIVE_PROPAGATION_THRESHOLD: BlockNumber = 20; /// The Approval Distribution subsystem. pub struct ApprovalDistribution { @@ -129,19 +129,13 @@ impl SessionTopology { // Get a filter function based on this topology and the required routing // which returns `true` for peers that are within the required routing set // and false otherwise. - fn peer_filter<'a>( - &'a self, - required_routing: RequiredRouting, - ) -> impl Fn(&PeerId) -> bool + 'a { - let (grid_x, grid_y) = match required_routing { - RequiredRouting::GridX => (true, false), - RequiredRouting::GridY => (false, true), - RequiredRouting::GridXY => (true, true), - RequiredRouting::None | RequiredRouting::PendingTopology(_) => (false, false), - }; - - move |peer| { - (grid_x && self.peers_x.contains(peer)) || (grid_y && self.peers_y.contains(peer)) + fn route_to_peer(&self, required_routing: RequiredRouting, peer: &PeerId) -> bool { + match required_routing { + RequiredRouting::All => true, + RequiredRouting::GridX => self.peers_x.contains(peer), + RequiredRouting::GridY => self.peers_y.contains(peer), + RequiredRouting::GridXY => self.peers_x.contains(peer) || self.peers_y.contains(peer), + RequiredRouting::None | RequiredRouting::PendingTopology(_) => false, } } } @@ -330,6 +324,8 @@ enum RequiredRouting { /// /// The `bool` here indicates whether this is a local message or not. PendingTopology(bool), + /// Propagate to all peers of any kind. + All, /// Propagate to all peers sharing either the X or Y dimension of the grid. GridXY, /// Propagate to all peers sharing the X dimension of the grid. @@ -343,7 +339,7 @@ enum RequiredRouting { impl RequiredRouting { fn for_local(block_age: BlockNumber) -> Self { if block_age >= AGGRESSIVE_PROPAGATION_THRESHOLD { - RequiredRouting::GridXY // TODO [now]: ALL variant + RequiredRouting::All } else { RequiredRouting::GridXY } @@ -592,6 +588,24 @@ impl State { } } } + + let max_age = self.blocks_by_number.iter().rev().last().map(|(n, _)| *n); + if let Some(max_age) = max_age { + // For any blocks that have just become old, we trigger sending of our local messages to all peers. + // Note that this can only happen if finality is slow. + adjust_required_routing_and_propagate( + ctx, + &mut self.blocks, + &self.topologies, + |block_entry| block_entry.number + AGGRESSIVE_PROPAGATION_THRESHOLD >= max_age, + |_, required_routing, _| { + if *required_routing == RequiredRouting::GridXY { + *required_routing = RequiredRouting::All + } + }, + ) + .await; + } } async fn handle_new_session_topology( @@ -617,8 +631,9 @@ impl State { } else if *required_routing == RequiredRouting::PendingTopology(false) { *required_routing = topology.required_routing_for(validator_index.clone()); } - } - ).await; + }, + ) + .await; } async fn process_incoming_peer_message( @@ -952,7 +967,9 @@ impl State { let required_routing = if source == MessageSource::Local { let block_age = block_age(&self.blocks_by_number, entry.number); - topology.map_or(RequiredRouting::PendingTopology(true), |_| RequiredRouting::for_local(block_age)) + topology.map_or(RequiredRouting::PendingTopology(true), |_| { + RequiredRouting::for_local(block_age) + }) } else { topology.map_or(RequiredRouting::PendingTopology(false), |t| { t.required_routing_for(validator_index) @@ -988,7 +1005,6 @@ impl State { // then messages will be sent when we get it. let assignments = vec![(assignment, claimed_candidate_index)]; - let topology_filter = topology.as_ref().map(|t| t.peer_filter(required_routing)); let n_peers_total = self.peer_data.len(); let source_peer = source.peer_id(); @@ -997,7 +1013,7 @@ impl State { return false } - if let Some(true) = topology_filter.as_ref().map(|f| f(peer)) { + if let Some(true) = topology.as_ref().map(|t| t.route_to_peer(required_routing, peer)) { return true } @@ -1245,7 +1261,6 @@ impl State { // to all peers required by the topology, with the exception of the source peer. let topology = self.topologies.get_topology(entry.session); - let topology_filter = topology.as_ref().map(|t| t.peer_filter(required_routing)); let source_peer = source.peer_id(); let message_subject = &message_subject; @@ -1263,7 +1278,7 @@ impl State { // the assignment to all aware peers in the required routing _except_ the original // source of the assignment. Hence the `in_topology_check`. // 3. Any randomly selected peers have been sent the assignment already. - let in_topology = topology_filter.as_ref().map_or(false, |f| f(peer)); + let in_topology = topology.map_or(false, |t| t.route_to_peer(required_routing, peer)); in_topology || knowledge.sent.contains(message_subject, MessageKind::Assignment) }; @@ -1349,15 +1364,13 @@ impl State { // Propagate the message to all peers in the required routing set OR // randomly sample peers. { - let topology_filter = topology - .as_ref() - .map(|t| t.peer_filter(message_state.required_routing)); - let random_routing = &mut message_state.random_routing; + let required_routing = message_state.required_routing; let rng = &mut *rng; let mut peer_filter = move |peer_id| { - let in_topology = - topology_filter.as_ref().map_or(false, |f| f(peer_id)); + let in_topology = topology + .as_ref() + .map_or(false, |t| t.route_to_peer(required_routing, peer_id)); in_topology || { let route_random = random_routing.sample(total_peers, rng); if route_random { @@ -1452,8 +1465,8 @@ fn block_age( ); 0 - } - Some((most_recent, _)) => { + }, + Some((most_recent, _)) => if *most_recent < block_number { gum::warn!( target: LOG_TARGET, @@ -1465,8 +1478,7 @@ fn block_age( 0 } else { most_recent - block_number - } - } + }, } } @@ -1519,7 +1531,6 @@ async fn adjust_required_routing_and_propagate( }; // Propagate the message to all peers in the required routing set. - let peer_filter = topology.peer_filter(message_state.required_routing); let message_subject = MessageSubject(block_hash.clone(), candidate_index, validator.clone()); @@ -1542,7 +1553,7 @@ async fn adjust_required_routing_and_propagate( }); for (peer, peer_knowledge) in &mut block_entry.known_by { - if !peer_filter(peer) { + if !topology.route_to_peer(message_state.required_routing, peer) { continue } From 831192bd5df70beb7429d94deda44b1f889e344b Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 17:08:50 -0500 Subject: [PATCH 37/72] overhaul aggression system to have 3 levels --- node/network/approval-distribution/src/lib.rs | 193 ++++++++++-------- 1 file changed, 111 insertions(+), 82 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 0ac46547fa8f..bc5460de6152 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -69,9 +69,20 @@ const RANDOM_CIRCULATION: usize = 8; /// than others. const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS; -/// How far unfinalized a block must be before validators broadcast -/// their messages to all peers. -const AGGRESSIVE_PROPAGATION_THRESHOLD: BlockNumber = 20; +// A note on aggression thresholds: changes in propagation apply only to blocks which are the +// _direct descendants_ of the finalized block which are older than the given threshold, +// not to all blocks older than the threshold. Most likely, a few assignments struggle to +// be propagated in a single block and this holds up all of its descendants blocks. +// Accordingly, we only step on the gas for the block which is most obviously holding up finality. + +/// Aggression level 1: all validators send all their own messages to all peers. +const AGGRESSION_L1_THRESHOLD: BlockNumber = 10; + +/// Aggression level 2: L1 + all validators send all messages to all XY peers +const AGGRESSION_L2_THRESHOLD: BlockNumber = 25; + +/// Aggression level 3: last-ditch: all validators send all messages to all peers. +const AGGRESSION_L3_THRESHOLD: BlockNumber = 50; /// The Approval Distribution subsystem. pub struct ApprovalDistribution { @@ -114,7 +125,11 @@ struct SessionTopology { } impl SessionTopology { - fn required_routing_for(&self, validator_index: ValidatorIndex) -> RequiredRouting { + fn required_routing_for(&self, validator_index: ValidatorIndex, local: bool) -> RequiredRouting { + if local { + return RequiredRouting::GridXY; + } + let grid_x = self.validator_indices_x.contains(&validator_index); let grid_y = self.validator_indices_y.contains(&validator_index); @@ -135,7 +150,7 @@ impl SessionTopology { RequiredRouting::GridX => self.peers_x.contains(peer), RequiredRouting::GridY => self.peers_y.contains(peer), RequiredRouting::GridXY => self.peers_x.contains(peer) || self.peers_y.contains(peer), - RequiredRouting::None | RequiredRouting::PendingTopology(_) => false, + RequiredRouting::None | RequiredRouting::PendingTopology => false, } } } @@ -321,9 +336,7 @@ enum RequiredRouting { /// We don't know yet, because we're waiting for topology info /// (race condition between learning about the first blocks in a new session /// and getting the topology for that session) - /// - /// The `bool` here indicates whether this is a local message or not. - PendingTopology(bool), + PendingTopology, /// Propagate to all peers of any kind. All, /// Propagate to all peers sharing either the X or Y dimension of the grid. @@ -337,18 +350,10 @@ enum RequiredRouting { } impl RequiredRouting { - fn for_local(block_age: BlockNumber) -> Self { - if block_age >= AGGRESSIVE_PROPAGATION_THRESHOLD { - RequiredRouting::All - } else { - RequiredRouting::GridXY - } - } - // Whether the required routing set is definitely empty. fn is_empty(self) -> bool { match self { - RequiredRouting::PendingTopology(_) | RequiredRouting::None => true, + RequiredRouting::PendingTopology | RequiredRouting::None => true, _ => false, } } @@ -384,6 +389,7 @@ impl RandomRouting { #[derive(Debug)] struct MessageState { required_routing: RequiredRouting, + local: bool, random_routing: RandomRouting, approval_state: ApprovalState, } @@ -589,23 +595,7 @@ impl State { } } - let max_age = self.blocks_by_number.iter().rev().last().map(|(n, _)| *n); - if let Some(max_age) = max_age { - // For any blocks that have just become old, we trigger sending of our local messages to all peers. - // Note that this can only happen if finality is slow. - adjust_required_routing_and_propagate( - ctx, - &mut self.blocks, - &self.topologies, - |block_entry| block_entry.number + AGGRESSIVE_PROPAGATION_THRESHOLD >= max_age, - |_, required_routing, _| { - if *required_routing == RequiredRouting::GridXY { - *required_routing = RequiredRouting::All - } - }, - ) - .await; - } + self.enable_aggression(ctx, metrics).await; } async fn handle_new_session_topology( @@ -618,18 +608,14 @@ impl State { self.topologies.insert_topology(session, topology); let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); - let blocks_by_number = &self.blocks_by_number; adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, |block_entry| block_entry.session == session, - |block_number, required_routing, validator_index| { - if *required_routing == RequiredRouting::PendingTopology(true) { - let block_age = block_age(blocks_by_number, block_number); - *required_routing = RequiredRouting::for_local(block_age); - } else if *required_routing == RequiredRouting::PendingTopology(false) { - *required_routing = topology.required_routing_for(validator_index.clone()); + |required_routing, local, validator_index| { + if *required_routing == RequiredRouting::PendingTopology { + *required_routing = topology.required_routing_for(*validator_index, local); } }, ) @@ -776,7 +762,13 @@ impl State { .await; } - fn handle_block_finalized(&mut self, finalized_number: BlockNumber) { + async fn handle_block_finalized( + &mut self, + ctx: &mut (impl SubsystemContext + + overseer::SubsystemContext), + metrics: &Metrics, + finalized_number: BlockNumber, + ) { // we want to prune every block up to (including) finalized_number // why +1 here? // split_off returns everything after the given key, including the key @@ -793,6 +785,10 @@ impl State { self.topologies.dec_session_refs(block_entry.session); } }); + + // If a block was finalized, this means we may need to move our aggression + // forward to the now oldest block(s). + self.enable_aggression(ctx, metrics).await; } async fn import_and_circulate_assignment( @@ -964,17 +960,11 @@ impl State { metrics.on_assignment_imported(); let topology = self.topologies.get_topology(entry.session); + let local = source == MessageSource::Local; - let required_routing = if source == MessageSource::Local { - let block_age = block_age(&self.blocks_by_number, entry.number); - topology.map_or(RequiredRouting::PendingTopology(true), |_| { - RequiredRouting::for_local(block_age) - }) - } else { - topology.map_or(RequiredRouting::PendingTopology(false), |t| { - t.required_routing_for(validator_index) - }) - }; + let required_routing = topology.map_or(RequiredRouting::PendingTopology, |t| { + t.required_routing_for(validator_index, local) + }); let message_state = match entry.candidates.get_mut(claimed_candidate_index as usize) { Some(candidate_entry) => { @@ -982,6 +972,7 @@ impl State { // unless the approval state is set already candidate_entry.messages.entry(validator_index).or_insert_with(|| MessageState { required_routing, + local, random_routing: RandomRouting { target: RANDOM_CIRCULATION, sent: 0 }, approval_state: ApprovalState::Assigned(assignment.cert.clone()), }) @@ -1209,6 +1200,7 @@ impl State { Some(MessageState { approval_state: ApprovalState::Assigned(cert), required_routing, + local, random_routing, }) => { candidate_entry.messages.insert( @@ -1219,6 +1211,7 @@ impl State { vote.signature.clone(), ), required_routing, + local, random_routing, }, ); @@ -1449,42 +1442,78 @@ impl State { .await; } } -} -// Get the age of the given block number relative to the highest stored. -fn block_age( - blocks_by_number: &BTreeMap>, - block_number: BlockNumber, -) -> BlockNumber { - match blocks_by_number.iter().rev().last() { - None => { - gum::warn!( - target: LOG_TARGET, - block_number, - "Asked for block age compared to an empty list", - ); + async fn enable_aggression( + &mut self, + ctx: &mut (impl SubsystemContext + + overseer::SubsystemContext), + metrics: &Metrics, + ) { + let min_age = self.blocks_by_number.iter().next().map(|(num, _)| num); + let max_age = self.blocks_by_number.iter().rev().next().map(|(num, _)| num); - 0 - }, - Some((most_recent, _)) => - if *most_recent < block_number { - gum::warn!( - target: LOG_TARGET, - most_recent, - block_number, - "Asked for block age for block newer than most recent", - ); + let (min_age, max_age) = match (min_age, max_age) { + (Some(min), Some(max)) => (min, max), + _ => return, // empty. + }; - 0 - } else { - most_recent - block_number - }, + let diff = max_age - min_age; + if diff < AGGRESSION_L1_THRESHOLD { return } + + adjust_required_routing_and_propagate( + ctx, + &mut self.blocks, + &self.topologies, + |block_entry| { + // Ramp up aggression only for the very oldest block(s). + // Approval voting can get stuck on a single block preventing + // its descendants from being finalized. Waste minimal bandwidth + // this way. Also, disputes might prevent finality - again, nothing + // to waste bandwidth on newer blocks for. + &block_entry.number == min_age + } + |required_routing, local, _| { + // It's a bit surprising not to have a topology at this age. + if *required_routing == RequiredRouting::PendingTopology { + gum::debug!( + target: LOG_TARGET, + age = ?diff, + "Encountered old block pending gossip topology", + ); + return; + } + + if diff >= AGGRESSION_L3_THRESHOLD { + // last-ditch: everyone broadcasts everything to everyone. + // This is going to be very packet and bandwidth-intense, but + // it's literally the most we can do. + *required_routing = RequiredRouting::All; + } else if diff >= AGGRESSION_L2_THRESHOLD { + // Message originator sends to everyone. Everyone else sends to XY. + if local { + *required_routing = RequiredRouting::All; + } else { + *required_routing = RequiredRouting::GridXY; + } + } else if diff >= AGGRESSION_L1_THRESHOLD { + // Message originator sends to everyone. + if local { + *required_routing = RequiredRouting::All; + } + } else { + unreachable!("Difference between max and min checked to be at least aggression threshold above; qed"); + } + } + ).await; } } // This adjusts the required routing of messages in blocks that pass the block filter // according to the modifier function given. // +// The modifier accepts as inputs the current required-routing state, whether +// the message is locally originating, and the validator index of the message issuer. +// // Then, if the topology is known, this progates messages to all peers in the required // routing set which are aware of the block. Peers which are unaware of the block // will have the message sent when it enters their view in `unify_with_peer`. @@ -1497,7 +1526,7 @@ async fn adjust_required_routing_and_propagate( blocks: &mut HashMap, topologies: &SessionTopologies, block_filter: impl Fn(&BlockEntry) -> bool, - routing_modifier: impl Fn(BlockNumber, &mut RequiredRouting, &ValidatorIndex), + routing_modifier: impl Fn(&mut RequiredRouting, bool, &ValidatorIndex), ) { let mut peer_assignments = HashMap::new(); let mut peer_approvals = HashMap::new(); @@ -1517,7 +1546,7 @@ async fn adjust_required_routing_and_propagate( .flat_map(|(c_i, c)| c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v))) { let prev_routing = message_state.required_routing; - routing_modifier(block_entry.number, &mut message_state.required_routing, validator); + routing_modifier(&mut message_state.required_routing, message_state.local, validator); if message_state.required_routing.is_empty() || message_state.required_routing == prev_routing @@ -1668,7 +1697,7 @@ impl ApprovalDistribution { }, FromOverseer::Signal(OverseerSignal::BlockFinalized(_hash, number)) => { gum::trace!(target: LOG_TARGET, number = %number, "finalized signal"); - state.handle_block_finalized(number); + state.handle_block_finalized(&mut ctx, &self.metrics, number).await; }, FromOverseer::Signal(OverseerSignal::Conclude) => return, } From 93b51323aa3365bb476a4a0a76c445e97eb1b84d Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 25 Mar 2022 17:16:49 -0500 Subject: [PATCH 38/72] add aggression metrics --- node/network/approval-distribution/src/lib.rs | 39 +++++++++-------- .../approval-distribution/src/metrics.rs | 42 +++++++++++++++++++ 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index bc5460de6152..7b2eae429051 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -64,9 +64,9 @@ const BENEFIT_VALID_MESSAGE_FIRST: Rep = /// The number of peers to randomly propagate messages to. const RANDOM_CIRCULATION: usize = 8; /// The sample rate for randomly propagating messages. This -/// reduces leftward skew of the binomial distribution but also -/// introduces a bias towards peers who get a block earlier -/// than others. +/// reduces the left tail of the binomial distribution but also +/// introduces a bias towards peers who we sample before others +/// (i.e. those who get a block before others). const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS; // A note on aggression thresholds: changes in propagation apply only to blocks which are the @@ -1471,7 +1471,7 @@ impl State { // this way. Also, disputes might prevent finality - again, nothing // to waste bandwidth on newer blocks for. &block_entry.number == min_age - } + }, |required_routing, local, _| { // It's a bit surprising not to have a topology at this age. if *required_routing == RequiredRouting::PendingTopology { @@ -1483,25 +1483,30 @@ impl State { return; } + if diff >= AGGRESSION_L1_THRESHOLD { + // Message originator sends to everyone. + if local && *required_routing != RequiredRouting::All { + metrics.on_aggression_l1(); + *required_routing = RequiredRouting::All; + } + } + + if diff >= AGGRESSION_L2_THRESHOLD { + // Message originator sends to everyone. Everyone else sends to XY. + if !local && *required_routing != RequiredRouting::GridXY { + metrics.on_aggression_l2(); + *required_routing = RequiredRouting::GridXY; + } + } + if diff >= AGGRESSION_L3_THRESHOLD { // last-ditch: everyone broadcasts everything to everyone. // This is going to be very packet and bandwidth-intense, but // it's literally the most we can do. - *required_routing = RequiredRouting::All; - } else if diff >= AGGRESSION_L2_THRESHOLD { - // Message originator sends to everyone. Everyone else sends to XY. - if local { - *required_routing = RequiredRouting::All; - } else { - *required_routing = RequiredRouting::GridXY; - } - } else if diff >= AGGRESSION_L1_THRESHOLD { - // Message originator sends to everyone. - if local { + if *required_routing != RequiredRouting::All { + metrics.on_aggression_l3(); *required_routing = RequiredRouting::All; } - } else { - unreachable!("Difference between max and min checked to be at least aggression threshold above; qed"); } } ).await; diff --git a/node/network/approval-distribution/src/metrics.rs b/node/network/approval-distribution/src/metrics.rs index b96916a7f0e7..bc73b3c772e4 100644 --- a/node/network/approval-distribution/src/metrics.rs +++ b/node/network/approval-distribution/src/metrics.rs @@ -25,6 +25,9 @@ struct MetricsInner { assignments_imported_total: prometheus::Counter, approvals_imported_total: prometheus::Counter, unified_with_peer_total: prometheus::Counter, + aggression_l1_messages_total: prometheus::Counter, + aggression_l2_messages_total: prometheus::Counter, + aggression_l3_messages_total: prometheus::Counter, time_unify_with_peer: prometheus::Histogram, time_import_pending_now_known: prometheus::Histogram, @@ -69,6 +72,24 @@ impl Metrics { .as_ref() .map(|metrics| metrics.time_awaiting_approval_voting.start_timer()) } + + pub(crate) fn on_aggression_l1(&self) { + if let Some(metrics) = &self.0 { + metrics.aggression_l1_messages_total.inc(); + } + } + + pub(crate) fn on_aggression_l2(&self) { + if let Some(metrics) = &self.0 { + metrics.aggression_l2_messages_total.inc(); + } + } + + pub(crate) fn on_aggression_l3(&self) { + if let Some(metrics) = &self.0 { + metrics.aggression_l3_messages_total.inc(); + } + } } impl MetricsTrait for Metrics { @@ -95,6 +116,27 @@ impl MetricsTrait for Metrics { )?, registry, )?, + aggression_l1_messages_total: prometheus::register( + prometheus::Counter::new( + "polkadot_parachain_approval_disttribution_aggression_l1_messages_total", + "Number of messages in approval distribution for which aggression L1 has been triggered", + )?, + registry, + )?, + aggression_l2_messages_total: prometheus::register( + prometheus::Counter::new( + "polkadot_parachain_approval_disttribution_aggression_l2_messages_total", + "Number of messages in approval distribution for which aggression L2 has been triggered", + )?, + registry, + )?, + aggression_l3_messages_total: prometheus::register( + prometheus::Counter::new( + "polkadot_parachain_approval_disttribution_aggression_l3_messages_total", + "Number of messages in approval distribution for which aggression L3 has been triggered", + )?, + registry, + )?, time_unify_with_peer: prometheus::register( prometheus::Histogram::with_opts(prometheus::HistogramOpts::new( "polkadot_parachain_time_unify_with_peer", From ba86d16fbc50d0a2d41a9cac35df8ebd14442ccd Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 11:55:48 +0200 Subject: [PATCH 39/72] remove aggression L3 --- node/network/approval-distribution/src/lib.rs | 38 ++++++++----------- .../approval-distribution/src/metrics.rs | 14 ------- 2 files changed, 16 insertions(+), 36 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 7b2eae429051..4e4eba9f06df 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -81,9 +81,6 @@ const AGGRESSION_L1_THRESHOLD: BlockNumber = 10; /// Aggression level 2: L1 + all validators send all messages to all XY peers const AGGRESSION_L2_THRESHOLD: BlockNumber = 25; -/// Aggression level 3: last-ditch: all validators send all messages to all peers. -const AGGRESSION_L3_THRESHOLD: BlockNumber = 50; - /// The Approval Distribution subsystem. pub struct ApprovalDistribution { metrics: Metrics, @@ -125,9 +122,13 @@ struct SessionTopology { } impl SessionTopology { - fn required_routing_for(&self, validator_index: ValidatorIndex, local: bool) -> RequiredRouting { + fn required_routing_for( + &self, + validator_index: ValidatorIndex, + local: bool, + ) -> RequiredRouting { if local { - return RequiredRouting::GridXY; + return RequiredRouting::GridXY } let grid_x = self.validator_indices_x.contains(&validator_index); @@ -765,7 +766,7 @@ impl State { async fn handle_block_finalized( &mut self, ctx: &mut (impl SubsystemContext - + overseer::SubsystemContext), + + overseer::SubsystemContext), metrics: &Metrics, finalized_number: BlockNumber, ) { @@ -1446,8 +1447,8 @@ impl State { async fn enable_aggression( &mut self, ctx: &mut (impl SubsystemContext - + overseer::SubsystemContext), - metrics: &Metrics, + + overseer::SubsystemContext), + metrics: &Metrics, ) { let min_age = self.blocks_by_number.iter().next().map(|(num, _)| num); let max_age = self.blocks_by_number.iter().rev().next().map(|(num, _)| num); @@ -1458,7 +1459,9 @@ impl State { }; let diff = max_age - min_age; - if diff < AGGRESSION_L1_THRESHOLD { return } + if diff < AGGRESSION_L1_THRESHOLD { + return + } adjust_required_routing_and_propagate( ctx, @@ -1480,7 +1483,7 @@ impl State { age = ?diff, "Encountered old block pending gossip topology", ); - return; + return } if diff >= AGGRESSION_L1_THRESHOLD { @@ -1498,18 +1501,9 @@ impl State { *required_routing = RequiredRouting::GridXY; } } - - if diff >= AGGRESSION_L3_THRESHOLD { - // last-ditch: everyone broadcasts everything to everyone. - // This is going to be very packet and bandwidth-intense, but - // it's literally the most we can do. - if *required_routing != RequiredRouting::All { - metrics.on_aggression_l3(); - *required_routing = RequiredRouting::All; - } - } - } - ).await; + }, + ) + .await; } } diff --git a/node/network/approval-distribution/src/metrics.rs b/node/network/approval-distribution/src/metrics.rs index bc73b3c772e4..d7a361879e8e 100644 --- a/node/network/approval-distribution/src/metrics.rs +++ b/node/network/approval-distribution/src/metrics.rs @@ -27,7 +27,6 @@ struct MetricsInner { unified_with_peer_total: prometheus::Counter, aggression_l1_messages_total: prometheus::Counter, aggression_l2_messages_total: prometheus::Counter, - aggression_l3_messages_total: prometheus::Counter, time_unify_with_peer: prometheus::Histogram, time_import_pending_now_known: prometheus::Histogram, @@ -84,12 +83,6 @@ impl Metrics { metrics.aggression_l2_messages_total.inc(); } } - - pub(crate) fn on_aggression_l3(&self) { - if let Some(metrics) = &self.0 { - metrics.aggression_l3_messages_total.inc(); - } - } } impl MetricsTrait for Metrics { @@ -130,13 +123,6 @@ impl MetricsTrait for Metrics { )?, registry, )?, - aggression_l3_messages_total: prometheus::register( - prometheus::Counter::new( - "polkadot_parachain_approval_disttribution_aggression_l3_messages_total", - "Number of messages in approval distribution for which aggression L3 has been triggered", - )?, - registry, - )?, time_unify_with_peer: prometheus::register( prometheus::Histogram::with_opts(prometheus::HistogramOpts::new( "polkadot_parachain_time_unify_with_peer", From f4448ba299af9fe7d0bab63bf4337c682c6cf46a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 11:57:54 +0200 Subject: [PATCH 40/72] reduce random circulation --- node/network/approval-distribution/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 4e4eba9f06df..0abad01bc287 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -62,7 +62,7 @@ const BENEFIT_VALID_MESSAGE_FIRST: Rep = Rep::BenefitMinorFirst("Valid message with new information"); /// The number of peers to randomly propagate messages to. -const RANDOM_CIRCULATION: usize = 8; +const RANDOM_CIRCULATION: usize = 4; /// The sample rate for randomly propagating messages. This /// reduces the left tail of the binomial distribution but also /// introduces a bias towards peers who we sample before others From e0bef50570c9de7f9e82727f6eb3d66addefa9a2 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 12:15:38 +0200 Subject: [PATCH 41/72] remove PeerData --- node/network/approval-distribution/src/lib.rs | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 0abad01bc287..8899facd9638 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -86,11 +86,6 @@ pub struct ApprovalDistribution { metrics: Metrics, } -#[derive(Default)] -struct PeerData { - view: View, -} - /// Contains recently finalized /// or those pruned due to finalization. #[derive(Default)] @@ -223,7 +218,7 @@ struct State { pending_known: HashMap>, /// Peer data is partially stored here, and partially inline within the [`BlockEntry`]s - peer_data: HashMap, + peer_views: HashMap, /// Topologies for various different sessions. topologies: SessionTopologies, @@ -436,11 +431,11 @@ impl State { NetworkBridgeEvent::PeerConnected(peer_id, role, _) => { // insert a blank view if none already present gum::trace!(target: LOG_TARGET, ?peer_id, ?role, "Peer connected"); - self.peer_data.entry(peer_id).or_default(); + self.peer_views.entry(peer_id).or_default(); }, NetworkBridgeEvent::PeerDisconnected(peer_id) => { gum::trace!(target: LOG_TARGET, ?peer_id, "Peer disconnected"); - self.peer_data.remove(&peer_id); + self.peer_views.remove(&peer_id); self.blocks.iter_mut().for_each(|(_hash, entry)| { entry.known_by.remove(&peer_id); }) @@ -523,16 +518,16 @@ impl State { ); { - for (peer_id, peer_data) in self.peer_data.iter() { - let intersection = peer_data.view.iter().filter(|h| new_hashes.contains(h)); + for (peer_id, view) in self.peer_views.iter() { + let intersection = view.iter().filter(|h| new_hashes.contains(h)); let view_intersection = - View::new(intersection.cloned(), peer_data.view.finalized_number); + View::new(intersection.cloned(), view.finalized_number); Self::unify_with_peer( ctx, metrics, &mut self.blocks, &self.topologies, - self.peer_data.len(), + self.peer_views.len(), peer_id.clone(), view_intersection, rng, @@ -727,9 +722,9 @@ impl State { gum::trace!(target: LOG_TARGET, ?view, "Peer view change"); let finalized_number = view.finalized_number; let old_view = self - .peer_data + .peer_views .get_mut(&peer_id) - .map(|d| std::mem::replace(&mut d.view, view.clone())); + .map(|d| std::mem::replace(d, view.clone())); let old_finalized_number = old_view.map(|v| v.finalized_number).unwrap_or(0); // we want to prune every block known_by peer up to (including) view.finalized_number @@ -755,7 +750,7 @@ impl State { metrics, &mut self.blocks, &self.topologies, - self.peer_data.len(), + self.peer_views.len(), peer_id.clone(), view, rng, @@ -997,7 +992,7 @@ impl State { // then messages will be sent when we get it. let assignments = vec![(assignment, claimed_candidate_index)]; - let n_peers_total = self.peer_data.len(); + let n_peers_total = self.peer_views.len(); let source_peer = source.peer_id(); let mut peer_filter = move |peer| { From bf334892e4b202a79043b80838a10ab74935bb52 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 12:29:20 +0200 Subject: [PATCH 42/72] get approval tests compiling --- .../approval-distribution/src/tests.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 93f5e5e2ca9f..666c46b5706e 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -25,6 +25,7 @@ use polkadot_node_subsystem::messages::{AllMessages, ApprovalCheckError}; use polkadot_node_subsystem_test_helpers as test_helpers; use polkadot_node_subsystem_util::TimeoutExt as _; use std::time::Duration; +use rand::SeedableRng; type VirtualOverseer = test_helpers::TestSubsystemContextHandle; @@ -46,7 +47,9 @@ fn test_harness>( let subsystem = ApprovalDistribution::new(Default::default()); { - let subsystem = subsystem.run_inner(context, &mut state); + let mut rng = rand_chacha::ChaCha12Rng::seed_from_u64(12345); + + let subsystem = subsystem.run_inner(context, &mut state, &mut rng); let test_fut = test_fn(virtual_overseer); @@ -201,6 +204,7 @@ fn try_import_the_same_assignment() { number: 2, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; @@ -283,6 +287,7 @@ fn spam_attack_results_in_negative_reputation_change() { number: 2, candidates: vec![Default::default(); candidates_count], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); @@ -365,6 +370,7 @@ fn peer_sending_us_the_same_we_just_sent_them_is_ok() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; @@ -442,6 +448,7 @@ fn import_approval_happy_path() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; @@ -528,6 +535,7 @@ fn import_approval_bad() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; @@ -605,6 +613,7 @@ fn update_our_view() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let meta_b = BlockApprovalMeta { hash: hash_b, @@ -612,6 +621,7 @@ fn update_our_view() { number: 2, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let meta_c = BlockApprovalMeta { hash: hash_c, @@ -619,6 +629,7 @@ fn update_our_view() { number: 3, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta_a, meta_b, meta_c]); @@ -678,6 +689,7 @@ fn update_peer_view() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let meta_b = BlockApprovalMeta { hash: hash_b, @@ -685,6 +697,7 @@ fn update_peer_view() { number: 2, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let meta_c = BlockApprovalMeta { hash: hash_c, @@ -692,6 +705,7 @@ fn update_peer_view() { number: 3, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta_a, meta_b, meta_c]); @@ -829,6 +843,7 @@ fn import_remotely_then_locally() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; @@ -913,6 +928,7 @@ fn sends_assignments_even_when_state_is_approved() { number: 1, candidates: vec![Default::default(); 1], slot: 1.into(), + session: 1, }; let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; @@ -999,6 +1015,7 @@ fn race_condition_in_local_vs_remote_view_update() { number: 2, candidates: vec![Default::default(); candidates_count], slot: 1.into(), + session: 1, }; // This will send a peer view that is ahead of our view From 3a3a4ccbf8df40540476cb685b26a677abe1f922 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:22:02 +0200 Subject: [PATCH 43/72] use btree_map in known_by to make deterministic --- node/network/approval-distribution/src/lib.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 8899facd9638..cf249bfa163e 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -39,7 +39,7 @@ use polkadot_primitives::v2::{ BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, ValidatorSignature, }; use rand::{CryptoRng, Rng, SeedableRng}; -use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; +use std::collections::{hash_map, btree_map, BTreeMap, HashMap, HashSet, VecDeque}; use self::metrics::Metrics; @@ -67,6 +67,8 @@ const RANDOM_CIRCULATION: usize = 4; /// reduces the left tail of the binomial distribution but also /// introduces a bias towards peers who we sample before others /// (i.e. those who get a block before others). +/// +/// Peers are sampled as `RANDOM_SAMPLE_RATE / N_PEERS`. const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS; // A note on aggression thresholds: changes in propagation apply only to blocks which are the @@ -292,7 +294,7 @@ impl PeerKnowledge { struct BlockEntry { /// Peers who we know are aware of this block and thus, the candidates within it. /// This maps to their knowledge of messages. - known_by: HashMap, + known_by: BTreeMap, /// The number of the block. number: BlockNumber, /// The parent hash of the block. @@ -491,7 +493,7 @@ impl State { candidates.resize_with(candidates_count, Default::default); entry.insert(BlockEntry { - known_by: HashMap::new(), + known_by: BTreeMap::new(), number: meta.number, parent_hash: meta.parent_hash.clone(), knowledge: Knowledge::default(), @@ -826,7 +828,7 @@ impl State { if let Some(peer_id) = source.peer_id() { // check if our knowledge of the peer already contains this assignment match entry.known_by.entry(peer_id.clone()) { - hash_map::Entry::Occupied(mut peer_knowledge) => { + btree_map::Entry::Occupied(mut peer_knowledge) => { let peer_knowledge = peer_knowledge.get_mut(); if peer_knowledge.contains(&message_subject, message_kind) { // wasn't included before @@ -842,7 +844,7 @@ impl State { return } }, - hash_map::Entry::Vacant(_) => { + btree_map::Entry::Vacant(_) => { gum::debug!( target: LOG_TARGET, ?peer_id, @@ -1088,7 +1090,7 @@ impl State { // check if our knowledge of the peer already contains this approval match entry.known_by.entry(peer_id.clone()) { - hash_map::Entry::Occupied(mut knowledge) => { + btree_map::Entry::Occupied(mut knowledge) => { let peer_knowledge = knowledge.get_mut(); if peer_knowledge.contains(&message_subject, message_kind) { if !peer_knowledge.received.insert(message_subject.clone(), message_kind) { @@ -1104,7 +1106,7 @@ impl State { return } }, - hash_map::Entry::Vacant(_) => { + btree_map::Entry::Vacant(_) => { gum::debug!( target: LOG_TARGET, ?peer_id, From a56e5ef55e5061b294e1fab5112b3fd6d8904750 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:26:54 +0200 Subject: [PATCH 44/72] Revert "use btree_map in known_by to make deterministic" This reverts commit 3a3a4ccbf8df40540476cb685b26a677abe1f922. --- node/network/approval-distribution/src/lib.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index cf249bfa163e..8899facd9638 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -39,7 +39,7 @@ use polkadot_primitives::v2::{ BlockNumber, CandidateIndex, Hash, SessionIndex, ValidatorIndex, ValidatorSignature, }; use rand::{CryptoRng, Rng, SeedableRng}; -use std::collections::{hash_map, btree_map, BTreeMap, HashMap, HashSet, VecDeque}; +use std::collections::{hash_map, BTreeMap, HashMap, HashSet, VecDeque}; use self::metrics::Metrics; @@ -67,8 +67,6 @@ const RANDOM_CIRCULATION: usize = 4; /// reduces the left tail of the binomial distribution but also /// introduces a bias towards peers who we sample before others /// (i.e. those who get a block before others). -/// -/// Peers are sampled as `RANDOM_SAMPLE_RATE / N_PEERS`. const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS; // A note on aggression thresholds: changes in propagation apply only to blocks which are the @@ -294,7 +292,7 @@ impl PeerKnowledge { struct BlockEntry { /// Peers who we know are aware of this block and thus, the candidates within it. /// This maps to their knowledge of messages. - known_by: BTreeMap, + known_by: HashMap, /// The number of the block. number: BlockNumber, /// The parent hash of the block. @@ -493,7 +491,7 @@ impl State { candidates.resize_with(candidates_count, Default::default); entry.insert(BlockEntry { - known_by: BTreeMap::new(), + known_by: HashMap::new(), number: meta.number, parent_hash: meta.parent_hash.clone(), knowledge: Knowledge::default(), @@ -828,7 +826,7 @@ impl State { if let Some(peer_id) = source.peer_id() { // check if our knowledge of the peer already contains this assignment match entry.known_by.entry(peer_id.clone()) { - btree_map::Entry::Occupied(mut peer_knowledge) => { + hash_map::Entry::Occupied(mut peer_knowledge) => { let peer_knowledge = peer_knowledge.get_mut(); if peer_knowledge.contains(&message_subject, message_kind) { // wasn't included before @@ -844,7 +842,7 @@ impl State { return } }, - btree_map::Entry::Vacant(_) => { + hash_map::Entry::Vacant(_) => { gum::debug!( target: LOG_TARGET, ?peer_id, @@ -1090,7 +1088,7 @@ impl State { // check if our knowledge of the peer already contains this approval match entry.known_by.entry(peer_id.clone()) { - btree_map::Entry::Occupied(mut knowledge) => { + hash_map::Entry::Occupied(mut knowledge) => { let peer_knowledge = knowledge.get_mut(); if peer_knowledge.contains(&message_subject, message_kind) { if !peer_knowledge.received.insert(message_subject.clone(), message_kind) { @@ -1106,7 +1104,7 @@ impl State { return } }, - btree_map::Entry::Vacant(_) => { + hash_map::Entry::Vacant(_) => { gum::debug!( target: LOG_TARGET, ?peer_id, From 3ef1002749f803561b7bb97dfa48a26d1909e66d Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:27:59 +0200 Subject: [PATCH 45/72] test XY grid propagation --- Cargo.lock | 1 + node/network/approval-distribution/Cargo.toml | 1 + node/network/approval-distribution/src/lib.rs | 1 + .../approval-distribution/src/tests.rs | 174 +++++++++++++++++- 4 files changed, 176 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index d920c04a1c78..12094408eb8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6347,6 +6347,7 @@ dependencies = [ "rand_chacha 0.3.1", "rand_core 0.5.1", "schnorrkel", + "sp-authority-discovery", "sp-core", "tracing-gum", ] diff --git a/node/network/approval-distribution/Cargo.toml b/node/network/approval-distribution/Cargo.toml index 9aa5f0bfbeed..8271f0dd34fc 100644 --- a/node/network/approval-distribution/Cargo.toml +++ b/node/network/approval-distribution/Cargo.toml @@ -16,6 +16,7 @@ futures = "0.3.21" gum = { package = "tracing-gum", path = "../../gum" } [dev-dependencies] +sp-authority-discovery = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } polkadot-node-subsystem-util = { path = "../../subsystem-util" } diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 8899facd9638..55046d105fa6 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -996,6 +996,7 @@ impl State { let source_peer = source.peer_id(); let mut peer_filter = move |peer| { + println!("{:?} filter peer", peer); if Some(peer) == source_peer.as_ref() { return false } diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 666c46b5706e..095ed9d8cbd4 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -15,8 +15,11 @@ // along with Polkadot. If not, see . use super::*; +use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; +use sp_core::crypto::Pair as PairT; use assert_matches::assert_matches; use futures::{executor, future, Future}; +use polkadot_primitives::v2::AuthorityDiscoveryId; use polkadot_node_network_protocol::{our_view, view, ObservedRole}; use polkadot_node_primitives::approval::{ AssignmentCertKind, VRFOutput, VRFProof, RELAY_VRF_MODULO_CONTEXT, @@ -102,6 +105,62 @@ async fn overseer_recv(overseer: &mut VirtualOverseer) -> AllMessages { msg } +fn make_peers_and_authority_ids(n: usize) -> Vec<(PeerId, AuthorityDiscoveryId)> { + (0..n).map(|_| { + let peer_id = PeerId::random(); + let authority_id = AuthorityDiscoveryPair::generate().0.public(); + + (peer_id, authority_id) + }).collect() +} + +fn make_gossip_topology( + session: SessionIndex, + all_peers: &[(PeerId, AuthorityDiscoveryId)], + neighbors_x: &[usize], + neighbors_y: &[usize], +) -> network_bridge_event::NewGossipTopology { + let mut t = network_bridge_event::NewGossipTopology { + session, + our_neighbors_x: HashMap::new(), + our_neighbors_y: HashMap::new(), + }; + + for &i in neighbors_x { + t.our_neighbors_x.insert( + all_peers[i].1.clone(), + network_bridge_event::TopologyPeerInfo { + peer_ids: vec![all_peers[i].0.clone()], + validator_index: ValidatorIndex::from(i as u32), + } + ); + } + + for &i in neighbors_y { + t.our_neighbors_y.insert( + all_peers[i].1.clone(), + network_bridge_event::TopologyPeerInfo { + peer_ids: vec![all_peers[i].0.clone()], + validator_index: ValidatorIndex::from(i as u32), + } + ); + } + + t +} + +async fn setup_gossip_topology( + virtual_overseer: &mut VirtualOverseer, + gossip_topology: network_bridge_event::NewGossipTopology, +) { + overseer_send( + virtual_overseer, + ApprovalDistributionMessage::NetworkBridgeUpdateV1(NetworkBridgeEvent::NewGossipTopology( + gossip_topology, + )), + ).await; +} + async fn setup_peer_with_view( virtual_overseer: &mut VirtualOverseer, peer_id: &PeerId, @@ -1070,7 +1129,120 @@ fn race_condition_in_local_vs_remote_view_update() { }); } -// TODO [now]: test propagation of message from issuer - unshared dimension +// Tests that messages propagate to the unshared dimension. +#[test] +fn propagates_locally_generated_assignment_to_both_dimensions() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + + let expected_indices = [ + // Both dimensions in the gossip topology + 0, 10, 20, 30, 50, 51, 52, 53, + ]; + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let approval = IndirectSignedApprovalVote { + block_hash: hash, + candidate_index, + validator: validator_index, + signature: dummy_signature(), + }; + + overseer_send( + overseer, + ApprovalDistributionMessage::DistributeAssignment(cert.clone(), candidate_index), + ) + .await; + + overseer_send(overseer, ApprovalDistributionMessage::DistributeApproval(approval.clone())) + .await; + + let assignments = vec![(cert.clone(), candidate_index)]; + let approvals = vec![approval.clone()]; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + for &i in &expected_indices { + assert!( + sent_peers.contains(&peers[i].0), + "Message not sent to expected peer {}", + i, + ); + } + assert_eq!(sent_assignments, assignments); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) + ) + )) => { + for &i in &expected_indices { + assert!( + sent_peers.contains(&peers[i].0), + "Message not sent to expected peer {}", + i, + ); + } + assert_eq!(sent_approvals, approvals); + } + ); + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} + +// TODO [now]: Tests that messages propagate to the unshared dimension. + // TODO [now]: test that messages are propagated to necessary peers after they connect From 4387333b4ca8a726866e93aea30d5fac958fefd1 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:40:15 +0200 Subject: [PATCH 46/72] remove stray println --- node/network/approval-distribution/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 55046d105fa6..8899facd9638 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -996,7 +996,6 @@ impl State { let source_peer = source.peer_id(); let mut peer_filter = move |peer| { - println!("{:?} filter peer", peer); if Some(peer) == source_peer.as_ref() { return false } From fc9256073982c3646722018bb84837f1b660d7ef Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:43:51 +0200 Subject: [PATCH 47/72] test unshared dimension propagation --- node/network/approval-distribution/src/lib.rs | 5 +- .../approval-distribution/src/tests.rs | 155 ++++++++++++++++-- 2 files changed, 148 insertions(+), 12 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 8899facd9638..e26e3bd0eb30 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -131,8 +131,8 @@ impl SessionTopology { match (grid_x, grid_y) { (false, false) => RequiredRouting::None, - (true, false) => RequiredRouting::GridX, - (false, true) => RequiredRouting::GridY, + (true, false) => RequiredRouting::GridY, // messages from X go to Y + (false, true) => RequiredRouting::GridX, // messages from Y go to X (true, true) => RequiredRouting::GridXY, // if the grid works as expected, this shouldn't happen. } } @@ -958,6 +958,7 @@ impl State { let topology = self.topologies.get_topology(entry.session); let local = source == MessageSource::Local; + let required_routing = topology.map_or(RequiredRouting::PendingTopology, |t| { t.required_routing_for(validator_index, local) }); diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 095ed9d8cbd4..e0de800b1a5d 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1129,7 +1129,7 @@ fn race_condition_in_local_vs_remote_view_update() { }); } -// Tests that messages propagate to the unshared dimension. +// Tests that local messages propagate to both dimensions. #[test] fn propagates_locally_generated_assignment_to_both_dimensions() { let parent_hash = Hash::repeat_byte(0xFF); @@ -1198,7 +1198,7 @@ fn propagates_locally_generated_assignment_to_both_dimensions() { let assignments = vec![(cert.clone(), candidate_index)]; let approvals = vec![approval.clone()]; - assert_matches!( + let assignment_sent_peers = assert_matches!( overseer_recv(overseer).await, AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( sent_peers, @@ -1206,6 +1206,7 @@ fn propagates_locally_generated_assignment_to_both_dimensions() { protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) ) )) => { + assert_eq!(sent_peers.len(), expected_indices.len() + 4); for &i in &expected_indices { assert!( sent_peers.contains(&peers[i].0), @@ -1214,6 +1215,7 @@ fn propagates_locally_generated_assignment_to_both_dimensions() { ); } assert_eq!(sent_assignments, assignments); + sent_peers } ); @@ -1225,13 +1227,8 @@ fn propagates_locally_generated_assignment_to_both_dimensions() { protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) ) )) => { - for &i in &expected_indices { - assert!( - sent_peers.contains(&peers[i].0), - "Message not sent to expected peer {}", - i, - ); - } + // Random sampling is reused from the assignment. + assert_eq!(sent_peers, assignment_sent_peers); assert_eq!(sent_approvals, approvals); } ); @@ -1241,8 +1238,146 @@ fn propagates_locally_generated_assignment_to_both_dimensions() { }); } -// TODO [now]: Tests that messages propagate to the unshared dimension. +// Tests that messages propagate to the unshared dimension. +#[test] +fn propagates_assignments_along_unshared_dimension() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + // Test messages from X direction go to Y peers + { + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let assignments = vec![(cert.clone(), candidate_index)]; + + let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); + + // Issuer of the message is important, not the peer we receive from. + // 99 deliberately chosen because it's not in X or Y. + send_message_from_peer(overseer, &peers[99].0, msg).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::ApprovalVoting(ApprovalVotingMessage::CheckAndImportAssignment( + _, + _, + tx, + )) => { + tx.send(AssignmentCheckResult::Accepted).unwrap(); + } + ); + expect_reputation_change(overseer, &peers[99].0, BENEFIT_VALID_MESSAGE_FIRST).await; + + let expected_y = [50, 51, 52, 53]; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + for &i in &expected_y { + assert!( + sent_peers.contains(&peers[i].0), + "Message not sent to expected peer {}", + i, + ); + } + assert_eq!(sent_assignments, assignments); + } + ); + }; + + // Test messages from X direction go to Y peers + { + let validator_index = ValidatorIndex(50); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let assignments = vec![(cert.clone(), candidate_index)]; + + let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); + + // Issuer of the message is important, not the peer we receive from. + // 99 deliberately chosen because it's not in X or Y. + send_message_from_peer(overseer, &peers[99].0, msg).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::ApprovalVoting(ApprovalVotingMessage::CheckAndImportAssignment( + _, + _, + tx, + )) => { + tx.send(AssignmentCheckResult::Accepted).unwrap(); + } + ); + expect_reputation_change(overseer, &peers[99].0, BENEFIT_VALID_MESSAGE_FIRST).await; + + let expected_x = [0, 10, 20, 30]; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + for &i in &expected_x { + assert!( + sent_peers.contains(&peers[i].0), + "Message not sent to expected peer {}", + i, + ); + } + assert_eq!(sent_assignments, assignments); + } + ); + }; + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} // TODO [now]: test that messages are propagated to necessary peers after they connect From 852205a703cb5a60e1f5d26d69dd348580afd993 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:44:33 +0200 Subject: [PATCH 48/72] add random gossip check --- node/network/approval-distribution/src/tests.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index e0de800b1a5d..511132a712c3 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1314,6 +1314,7 @@ fn propagates_assignments_along_unshared_dimension() { protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) ) )) => { + assert_eq!(sent_peers.len(), expected_y.len() + 4); for &i in &expected_y { assert!( sent_peers.contains(&peers[i].0), @@ -1362,6 +1363,7 @@ fn propagates_assignments_along_unshared_dimension() { protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) ) )) => { + assert_eq!(sent_peers.len(), expected_x.len() + 4); for &i in &expected_x { assert!( sent_peers.contains(&peers[i].0), From b88cd62310e520463fb23bb92e61e44291408ae5 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 13:53:09 +0200 Subject: [PATCH 49/72] test unify_with_peer better --- .../approval-distribution/src/tests.rs | 145 +++++++++++++++++- 1 file changed, 144 insertions(+), 1 deletion(-) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 511132a712c3..2237955a9fe1 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1381,7 +1381,150 @@ fn propagates_assignments_along_unshared_dimension() { }); } -// TODO [now]: test that messages are propagated to necessary peers after they connect +// tests that messages are propagated to necessary peers after they connect +#[test] +fn propagates_to_required_after_connect() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + let omitted = [0, 10, 50, 51]; + + // Connect all peers except omitted. + for (i, (peer, _)) in peers.iter().enumerate() { + if !omitted.contains(&i) { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + } + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + + let expected_indices = [ + // Both dimensions in the gossip topology, minus omitted. + 20, 30, 52, 53, + ]; + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let approval = IndirectSignedApprovalVote { + block_hash: hash, + candidate_index, + validator: validator_index, + signature: dummy_signature(), + }; + + overseer_send( + overseer, + ApprovalDistributionMessage::DistributeAssignment(cert.clone(), candidate_index), + ) + .await; + + overseer_send(overseer, ApprovalDistributionMessage::DistributeApproval(approval.clone())) + .await; + + let assignments = vec![(cert.clone(), candidate_index)]; + let approvals = vec![approval.clone()]; + + let assignment_sent_peers = assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + assert_eq!(sent_peers.len(), expected_indices.len() + 4); + for &i in &expected_indices { + assert!( + sent_peers.contains(&peers[i].0), + "Message not sent to expected peer {}", + i, + ); + } + assert_eq!(sent_assignments, assignments); + sent_peers + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) + ) + )) => { + // Random sampling is reused from the assignment. + assert_eq!(sent_peers, assignment_sent_peers); + assert_eq!(sent_approvals, approvals); + } + ); + + for i in omitted.iter().copied() { + setup_peer_with_view(overseer, &peers[i].0, view![hash]).await; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + assert_eq!(sent_peers.len(), 1); + assert_eq!(&sent_peers[0], &peers[i].0); + assert_eq!(sent_assignments, assignments); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) + ) + )) => { + assert_eq!(sent_peers.len(), 1); + assert_eq!(&sent_peers[0], &peers[i].0); + assert_eq!(sent_approvals, approvals); + } + ); + } + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} // TODO [now]: test that first receipt of message broadcasts to random peers From f793a6797cd65237470c081b0cb8e9867dea7357 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:09:03 +0200 Subject: [PATCH 50/72] test sending after getting gossip topology --- .../approval-distribution/src/tests.rs | 145 +++++++++++++++++- 1 file changed, 143 insertions(+), 2 deletions(-) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 2237955a9fe1..e7eb9289b281 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1526,8 +1526,149 @@ fn propagates_to_required_after_connect() { }); } -// TODO [now]: test that first receipt of message broadcasts to random peers +// test that new gossip topology triggers send of messages. +#[test] +fn sends_to_more_peers_after_getting_topology() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers except omitted. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let approval = IndirectSignedApprovalVote { + block_hash: hash, + candidate_index, + validator: validator_index, + signature: dummy_signature(), + }; + + overseer_send( + overseer, + ApprovalDistributionMessage::DistributeAssignment(cert.clone(), candidate_index), + ) + .await; + + overseer_send(overseer, ApprovalDistributionMessage::DistributeApproval(approval.clone())) + .await; + + let assignments = vec![(cert.clone(), candidate_index)]; + let approvals = vec![approval.clone()]; + + let assignment_sent_peers = assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + // Only sends to random peers. + assert_eq!(sent_peers.len(), 4); + assert_eq!(sent_assignments, assignments); + sent_peers + } + ); -// TODO [now]: test that new gossip topology re-evaluates all messages + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) + ) + )) => { + // Random sampling is reused from the assignment. + assert_eq!(sent_peers, assignment_sent_peers); + assert_eq!(sent_approvals, approvals); + } + ); + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + + let mut expected_indices_assignments = vec![0, 10, 20, 30, 50, 51, 52, 53]; + let mut expected_indices_approvals = vec![0, 10, 20, 30, 50, 51, 52, 53]; + + + for _ in 0..expected_indices_assignments.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + // Sends to all expected peers. + assert_eq!(sent_peers.len(), 1); + assert_eq!(sent_assignments, assignments); + + let pos = expected_indices_assignments.iter() + .position(|i| &peers[*i].0 == &sent_peers[0]) + .unwrap(); + expected_indices_assignments.remove(pos); + } + ); + } + + for _ in 0..expected_indices_approvals.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) + ) + )) => { + // Sends to all expected peers. + assert_eq!(sent_peers.len(), 1); + assert_eq!(sent_approvals, approvals); + + let pos = expected_indices_approvals.iter() + .position(|i| &peers[*i].0 == &sent_peers[0]) + .unwrap(); + + expected_indices_approvals.remove(pos); + } + ); + } + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} // TODO [now]: test that when a block takes a long time to be finalized, we broadcast more aggressively. From 99dfb1168d22628531204394f535c609c78645c9 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:28:58 +0200 Subject: [PATCH 51/72] test L1 aggression on originator --- .../approval-distribution/src/tests.rs | 159 +++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index e7eb9289b281..bca1278b41cd 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -19,7 +19,7 @@ use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; use sp_core::crypto::Pair as PairT; use assert_matches::assert_matches; use futures::{executor, future, Future}; -use polkadot_primitives::v2::AuthorityDiscoveryId; +use polkadot_primitives::v2::{AuthorityDiscoveryId, BlakeTwo256, HashT}; use polkadot_node_network_protocol::{our_view, view, ObservedRole}; use polkadot_node_primitives::approval::{ AssignmentCertKind, VRFOutput, VRFProof, RELAY_VRF_MODULO_CONTEXT, @@ -1671,4 +1671,159 @@ fn sends_to_more_peers_after_getting_topology() { }); } -// TODO [now]: test that when a block takes a long time to be finalized, we broadcast more aggressively. +// test aggression L1 +#[test] +fn originator_aggression_l1() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers except omitted. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let approval = IndirectSignedApprovalVote { + block_hash: hash, + candidate_index, + validator: validator_index, + signature: dummy_signature(), + }; + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + + overseer_send( + overseer, + ApprovalDistributionMessage::DistributeAssignment(cert.clone(), candidate_index), + ) + .await; + + overseer_send(overseer, ApprovalDistributionMessage::DistributeApproval(approval.clone())) + .await; + + let assignments = vec![(cert.clone(), candidate_index)]; + let approvals = vec![approval.clone()]; + + let prev_sent_indices = assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(_) + ) + )) => { + sent_peers.into_iter() + .filter_map(|sp| peers.iter().position(|p| &p.0 == &sp)) + .collect::>() + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + _, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(_) + ) + )) => { } + ); + + // Add blocks until aggression L1 is triggered. + { + let mut parent_hash = hash; + for level in 0..AGGRESSION_L1_THRESHOLD { + let number = 1 + level + 1; // first block had number 1 + let hash = BlakeTwo256::hash_of(&(parent_hash, number)); + let meta = BlockApprovalMeta { + hash, + parent_hash, + number, + candidates: vec![], + slot: (level as u64).into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + parent_hash = hash; + } + } + + let unsent_indices = (0..peers.len()).filter(|i| !prev_sent_indices.contains(&i)).collect::>(); + + for _ in 0..unsent_indices.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + // Sends to all expected peers. + assert_eq!(sent_peers.len(), 1); + assert_eq!(sent_assignments, assignments); + + assert!(unsent_indices.iter() + .find(|i| &peers[**i].0 == &sent_peers[0]) + .is_some()); + } + ); + } + + for _ in 0..unsent_indices.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(sent_approvals) + ) + )) => { + // Sends to all expected peers. + assert_eq!(sent_peers.len(), 1); + assert_eq!(sent_approvals, approvals); + + assert!(unsent_indices.iter() + .find(|i| &peers[**i].0 == &sent_peers[0]) + .is_some()); + } + ); + } + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} From 18534856979d6863aafbf22fa3c425c0286180a9 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:32:47 +0200 Subject: [PATCH 52/72] test L1 aggression for non-originators --- .../approval-distribution/src/tests.rs | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index bca1278b41cd..d48c84ef9bfe 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1827,3 +1827,107 @@ fn originator_aggression_l1() { virtual_overseer }); } + +// test aggression L1 +#[test] +fn non_originator_aggression_l1() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers except omitted. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + + let assignments = vec![(cert.clone(), candidate_index)]; + let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); + + // Issuer of the message is important, not the peer we receive from. + // 99 deliberately chosen because it's not in X or Y. + send_message_from_peer(overseer, &peers[99].0, msg).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::ApprovalVoting(ApprovalVotingMessage::CheckAndImportAssignment( + _, + _, + tx, + )) => { + tx.send(AssignmentCheckResult::Accepted).unwrap(); + } + ); + + expect_reputation_change(overseer, &peers[99].0, BENEFIT_VALID_MESSAGE_FIRST).await; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + _, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(_) + ) + )) => { } + ); + + // Add blocks until aggression L1 is triggered. + { + let mut parent_hash = hash; + for level in 0..AGGRESSION_L1_THRESHOLD { + let number = 1 + level + 1; // first block had number 1 + let hash = BlakeTwo256::hash_of(&(parent_hash, number)); + let meta = BlockApprovalMeta { + hash, + parent_hash, + number, + candidates: vec![], + slot: (level as u64).into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + parent_hash = hash; + } + } + + // No-op on non-originator + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} From 4b9059b3e5b89981332cd82930e3466db7a8e1e8 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:39:06 +0200 Subject: [PATCH 53/72] test non-originator aggression L2 --- .../approval-distribution/src/tests.rs | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index d48c84ef9bfe..f23be9895408 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1931,3 +1931,161 @@ fn non_originator_aggression_l1() { virtual_overseer }); } + +// test aggression L2 on non-originator +#[test] +fn non_originator_aggression_l2() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers except omitted. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology( + 1, + &peers, + &[0, 10, 20, 30], + &[50, 51, 52, 53], + ), + ).await; + + let assignments = vec![(cert.clone(), candidate_index)]; + let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); + + // Issuer of the message is important, not the peer we receive from. + // 99 deliberately chosen because it's not in X or Y. + send_message_from_peer(overseer, &peers[99].0, msg).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::ApprovalVoting(ApprovalVotingMessage::CheckAndImportAssignment( + _, + _, + tx, + )) => { + tx.send(AssignmentCheckResult::Accepted).unwrap(); + } + ); + + expect_reputation_change(overseer, &peers[99].0, BENEFIT_VALID_MESSAGE_FIRST).await; + + let prev_sent_indices = assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(_) + ) + )) => { + sent_peers.into_iter() + .filter_map(|sp| peers.iter().position(|p| &p.0 == &sp)) + .collect::>() + } + ); + + // Add blocks until aggression L1 is triggered. + let chain_head = { + let mut parent_hash = hash; + for level in 0..AGGRESSION_L1_THRESHOLD { + let number = 1 + level + 1; // first block had number 1 + let hash = BlakeTwo256::hash_of(&(parent_hash, number)); + let meta = BlockApprovalMeta { + hash, + parent_hash, + number, + candidates: vec![], + slot: (level as u64).into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + parent_hash = hash; + } + + parent_hash + }; + + // No-op on non-originator + + // Add blocks until aggression L2 is triggered. + { + let mut parent_hash = chain_head; + for level in 0..AGGRESSION_L2_THRESHOLD - AGGRESSION_L1_THRESHOLD { + let number = AGGRESSION_L1_THRESHOLD + level + 1 + 1; // first block had number 1 + let hash = BlakeTwo256::hash_of(&(parent_hash, number)); + let meta = BlockApprovalMeta { + hash, + parent_hash, + number, + candidates: vec![], + slot: (level as u64).into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + parent_hash = hash; + } + } + + // XY dimension - previously sent. + let unsent_indices = [0, 10, 20, 30, 50, 51, 52, 53] + .iter() + .cloned() + .filter(|i| !prev_sent_indices.contains(&i)).collect::>(); + + for _ in 0..unsent_indices.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + // Sends to all expected peers. + assert_eq!(sent_peers.len(), 1); + assert_eq!(sent_assignments, assignments); + + assert!(unsent_indices.iter() + .find(|i| &peers[**i].0 == &sent_peers[0]) + .is_some()); + } + ); + } + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} From 481694c2a757c54c7cf6ca75ced7a00f154c31a2 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:43:26 +0200 Subject: [PATCH 54/72] fnt --- node/network/approval-distribution/src/lib.rs | 10 +- .../approval-distribution/src/tests.rs | 106 +++++++----------- 2 files changed, 44 insertions(+), 72 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index e26e3bd0eb30..034b1f1a04fb 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -520,8 +520,7 @@ impl State { { for (peer_id, view) in self.peer_views.iter() { let intersection = view.iter().filter(|h| new_hashes.contains(h)); - let view_intersection = - View::new(intersection.cloned(), view.finalized_number); + let view_intersection = View::new(intersection.cloned(), view.finalized_number); Self::unify_with_peer( ctx, metrics, @@ -721,10 +720,8 @@ impl State { ) { gum::trace!(target: LOG_TARGET, ?view, "Peer view change"); let finalized_number = view.finalized_number; - let old_view = self - .peer_views - .get_mut(&peer_id) - .map(|d| std::mem::replace(d, view.clone())); + let old_view = + self.peer_views.get_mut(&peer_id).map(|d| std::mem::replace(d, view.clone())); let old_finalized_number = old_view.map(|v| v.finalized_number).unwrap_or(0); // we want to prune every block known_by peer up to (including) view.finalized_number @@ -958,7 +955,6 @@ impl State { let topology = self.topologies.get_topology(entry.session); let local = source == MessageSource::Local; - let required_routing = topology.map_or(RequiredRouting::PendingTopology, |t| { t.required_routing_for(validator_index, local) }); diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index f23be9895408..3358304b062c 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -15,11 +15,8 @@ // along with Polkadot. If not, see . use super::*; -use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; -use sp_core::crypto::Pair as PairT; use assert_matches::assert_matches; use futures::{executor, future, Future}; -use polkadot_primitives::v2::{AuthorityDiscoveryId, BlakeTwo256, HashT}; use polkadot_node_network_protocol::{our_view, view, ObservedRole}; use polkadot_node_primitives::approval::{ AssignmentCertKind, VRFOutput, VRFProof, RELAY_VRF_MODULO_CONTEXT, @@ -27,8 +24,11 @@ use polkadot_node_primitives::approval::{ use polkadot_node_subsystem::messages::{AllMessages, ApprovalCheckError}; use polkadot_node_subsystem_test_helpers as test_helpers; use polkadot_node_subsystem_util::TimeoutExt as _; -use std::time::Duration; +use polkadot_primitives::v2::{AuthorityDiscoveryId, BlakeTwo256, HashT}; use rand::SeedableRng; +use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; +use sp_core::crypto::Pair as PairT; +use std::time::Duration; type VirtualOverseer = test_helpers::TestSubsystemContextHandle; @@ -106,12 +106,14 @@ async fn overseer_recv(overseer: &mut VirtualOverseer) -> AllMessages { } fn make_peers_and_authority_ids(n: usize) -> Vec<(PeerId, AuthorityDiscoveryId)> { - (0..n).map(|_| { - let peer_id = PeerId::random(); - let authority_id = AuthorityDiscoveryPair::generate().0.public(); - - (peer_id, authority_id) - }).collect() + (0..n) + .map(|_| { + let peer_id = PeerId::random(); + let authority_id = AuthorityDiscoveryPair::generate().0.public(); + + (peer_id, authority_id) + }) + .collect() } fn make_gossip_topology( @@ -132,7 +134,7 @@ fn make_gossip_topology( network_bridge_event::TopologyPeerInfo { peer_ids: vec![all_peers[i].0.clone()], validator_index: ValidatorIndex::from(i as u32), - } + }, ); } @@ -142,7 +144,7 @@ fn make_gossip_topology( network_bridge_event::TopologyPeerInfo { peer_ids: vec![all_peers[i].0.clone()], validator_index: ValidatorIndex::from(i as u32), - } + }, ); } @@ -158,7 +160,8 @@ async fn setup_gossip_topology( ApprovalDistributionMessage::NetworkBridgeUpdateV1(NetworkBridgeEvent::NewGossipTopology( gossip_topology, )), - ).await; + ) + .await; } async fn setup_peer_with_view( @@ -1148,13 +1151,9 @@ fn propagates_locally_generated_assignment_to_both_dimensions() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; let expected_indices = [ // Both dimensions in the gossip topology @@ -1257,13 +1256,9 @@ fn propagates_assignments_along_unshared_dimension() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; // new block `hash_a` with 1 candidates let meta = BlockApprovalMeta { @@ -1404,13 +1399,9 @@ fn propagates_to_required_after_connect() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; let expected_indices = [ // Both dimensions in the gossip topology, minus omitted. @@ -1611,18 +1602,13 @@ fn sends_to_more_peers_after_getting_topology() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; let mut expected_indices_assignments = vec![0, 10, 20, 30, 50, 51, 52, 53]; let mut expected_indices_approvals = vec![0, 10, 20, 30, 50, 51, 52, 53]; - for _ in 0..expected_indices_assignments.len() { assert_matches!( overseer_recv(overseer).await, @@ -1715,13 +1701,9 @@ fn originator_aggression_l1() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; overseer_send( overseer, @@ -1781,7 +1763,8 @@ fn originator_aggression_l1() { } } - let unsent_indices = (0..peers.len()).filter(|i| !prev_sent_indices.contains(&i)).collect::>(); + let unsent_indices = + (0..peers.len()).filter(|i| !prev_sent_indices.contains(&i)).collect::>(); for _ in 0..unsent_indices.len() { assert_matches!( @@ -1866,13 +1849,9 @@ fn non_originator_aggression_l1() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; let assignments = vec![(cert.clone(), candidate_index)]; let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); @@ -1970,13 +1949,9 @@ fn non_originator_aggression_l2() { // Set up a gossip topology. setup_gossip_topology( overseer, - make_gossip_topology( - 1, - &peers, - &[0, 10, 20, 30], - &[50, 51, 52, 53], - ), - ).await; + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; let assignments = vec![(cert.clone(), candidate_index)]; let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); @@ -2063,7 +2038,8 @@ fn non_originator_aggression_l2() { let unsent_indices = [0, 10, 20, 30, 50, 51, 52, 53] .iter() .cloned() - .filter(|i| !prev_sent_indices.contains(&i)).collect::>(); + .filter(|i| !prev_sent_indices.contains(&i)) + .collect::>(); for _ in 0..unsent_indices.len() { assert_matches!( From 32a0d1e866e716a0639f72fa7e95f4f0ea7afe78 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:46:30 +0200 Subject: [PATCH 55/72] ~spellcheck --- node/network/approval-distribution/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 034b1f1a04fb..ed351d9fc709 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -78,7 +78,7 @@ const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS /// Aggression level 1: all validators send all their own messages to all peers. const AGGRESSION_L1_THRESHOLD: BlockNumber = 10; -/// Aggression level 2: L1 + all validators send all messages to all XY peers +/// Aggression level 2: level 1 + all validators send all messages to all peers in the X and Y dimensions. const AGGRESSION_L2_THRESHOLD: BlockNumber = 25; /// The Approval Distribution subsystem. @@ -220,7 +220,7 @@ struct State { /// Peer data is partially stored here, and partially inline within the [`BlockEntry`]s peer_views: HashMap, - /// Topologies for various different sessions. + /// Keeps a topology for various different sessions. topologies: SessionTopologies, /// Tracks recently finalized blocks. @@ -341,7 +341,7 @@ enum RequiredRouting { GridX, /// Propagate to all peers sharing the Y dimension of the grid. GridY, - /// No required progation. + /// No required propagation. None, } From 031a17a012ca266f5efe85d7a826a3d638911882 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 30 Mar 2022 18:59:01 +0200 Subject: [PATCH 56/72] fix statement-distribution tests --- Cargo.lock | 1 + .../network/statement-distribution/Cargo.toml | 1 + .../statement-distribution/src/tests.rs | 31 ++++++++++++++++--- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12094408eb8e..ef45a9f11a6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7614,6 +7614,7 @@ dependencies = [ "sc-keystore", "sc-network", "sp-application-crypto", + "sp-authority-discovery", "sp-core", "sp-keyring", "sp-keystore", diff --git a/node/network/statement-distribution/Cargo.toml b/node/network/statement-distribution/Cargo.toml index 83513950a758..62c1fd27d229 100644 --- a/node/network/statement-distribution/Cargo.toml +++ b/node/network/statement-distribution/Cargo.toml @@ -24,6 +24,7 @@ fatality = "0.0.6" [dev-dependencies] polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } assert_matches = "1.4.0" +sp-authority-discovery = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/statement-distribution/src/tests.rs b/node/network/statement-distribution/src/tests.rs index 9e91ac5ba650..c20aa3dccece 100644 --- a/node/network/statement-distribution/src/tests.rs +++ b/node/network/statement-distribution/src/tests.rs @@ -34,11 +34,12 @@ use polkadot_primitives_test_helpers::{ }; use polkadot_subsystem::{ jaeger, - messages::{RuntimeApiMessage, RuntimeApiRequest}, + messages::{network_bridge_event, RuntimeApiMessage, RuntimeApiRequest}, ActivatedLeaf, LeafStatus, }; use sc_keystore::LocalKeystore; use sp_application_crypto::{sr25519::Pair, AppKey, Pair as TraitPair}; +use sp_authority_discovery::AuthorityPair; use sp_keyring::Sr25519Keyring; use sp_keystore::{CryptoStore, SyncCryptoStore, SyncCryptoStorePtr}; use std::{iter::FromIterator as _, sync::Arc, time::Duration}; @@ -1964,12 +1965,34 @@ fn handle_multiple_seconded_statements() { // Explicitly add all `lucky` peers to the gossip peers to ensure that neither `peerA` not `peerB` // receive statements + let gossip_topology = { + let mut t = network_bridge_event::NewGossipTopology { + session: 1, + our_neighbors_x: HashMap::new(), + our_neighbors_y: HashMap::new(), + }; + + // This is relying on the fact that statement distribution + // just extracts the peer IDs from this struct and does nothing else + // with it. + for (i, peer) in lucky_peers.iter().enumerate() { + let authority_id = AuthorityPair::generate().0.public(); + t.our_neighbors_x.insert( + authority_id, + network_bridge_event::TopologyPeerInfo { + peer_ids: vec![peer.clone()], + validator_index: (i as u32).into(), + }, + ); + } + + t + }; + handle .send(FromOverseer::Communication { msg: StatementDistributionMessage::NetworkBridgeUpdateV1( - NetworkBridgeEvent::NewGossipTopology( - lucky_peers.iter().cloned().collect::>(), - ), + NetworkBridgeEvent::NewGossipTopology(gossip_topology), ), }) .await; From c168e312b259f6de512f4e713119aae897c36b82 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Thu, 31 Mar 2022 13:59:39 +0200 Subject: [PATCH 57/72] fix flaky test --- node/network/approval-distribution/src/tests.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 3358304b062c..46d6c7b6ce30 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1570,6 +1570,7 @@ fn sends_to_more_peers_after_getting_topology() { let assignments = vec![(cert.clone(), candidate_index)]; let approvals = vec![approval.clone()]; + let mut expected_indices = vec![0, 10, 20, 30, 50, 51, 52, 53]; let assignment_sent_peers = assert_matches!( overseer_recv(overseer).await, AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( @@ -1580,6 +1581,13 @@ fn sends_to_more_peers_after_getting_topology() { )) => { // Only sends to random peers. assert_eq!(sent_peers.len(), 4); + for peer in &sent_peers { + let i = peers.iter().position(|p| peer == &p.0).unwrap(); + // Random gossip before topology can send to topology-targeted peers. + // Remove them from the expected indices so we don't expect + // them to get the messages again after the assignment. + expected_indices.retain(|&i2| i2 != i); + } assert_eq!(sent_assignments, assignments); sent_peers } @@ -1606,8 +1614,8 @@ fn sends_to_more_peers_after_getting_topology() { ) .await; - let mut expected_indices_assignments = vec![0, 10, 20, 30, 50, 51, 52, 53]; - let mut expected_indices_approvals = vec![0, 10, 20, 30, 50, 51, 52, 53]; + let mut expected_indices_assignments = expected_indices.clone(); + let mut expected_indices_approvals = expected_indices.clone(); for _ in 0..expected_indices_assignments.len() { assert_matches!( From dac4eef125df8109b9f143d13c6f5a1cfdc12e57 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Thu, 31 Mar 2022 22:41:55 +0200 Subject: [PATCH 58/72] fix metrics typo --- node/network/approval-distribution/src/metrics.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/network/approval-distribution/src/metrics.rs b/node/network/approval-distribution/src/metrics.rs index d7a361879e8e..c0887b25f7f4 100644 --- a/node/network/approval-distribution/src/metrics.rs +++ b/node/network/approval-distribution/src/metrics.rs @@ -111,14 +111,14 @@ impl MetricsTrait for Metrics { )?, aggression_l1_messages_total: prometheus::register( prometheus::Counter::new( - "polkadot_parachain_approval_disttribution_aggression_l1_messages_total", + "polkadot_parachain_approval_distribution_aggression_l1_messages_total", "Number of messages in approval distribution for which aggression L1 has been triggered", )?, registry, )?, aggression_l2_messages_total: prometheus::register( prometheus::Counter::new( - "polkadot_parachain_approval_disttribution_aggression_l2_messages_total", + "polkadot_parachain_approval_distribution_aggression_l2_messages_total", "Number of messages in approval distribution for which aggression L2 has been triggered", )?, registry, From 8edad526c16d5577f83c225f65c14e91cb937ae1 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Thu, 31 Mar 2022 23:46:58 +0200 Subject: [PATCH 59/72] re-send periodically --- node/network/approval-distribution/src/lib.rs | 88 ++++++++++++------- .../approval-distribution/src/tests.rs | 29 ++++-- 2 files changed, 79 insertions(+), 38 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index ed351d9fc709..b893786ca6d3 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -69,18 +69,6 @@ const RANDOM_CIRCULATION: usize = 4; /// (i.e. those who get a block before others). const RANDOM_SAMPLE_RATE: usize = polkadot_node_subsystem_util::MIN_GOSSIP_PEERS; -// A note on aggression thresholds: changes in propagation apply only to blocks which are the -// _direct descendants_ of the finalized block which are older than the given threshold, -// not to all blocks older than the threshold. Most likely, a few assignments struggle to -// be propagated in a single block and this holds up all of its descendants blocks. -// Accordingly, we only step on the gas for the block which is most obviously holding up finality. - -/// Aggression level 1: all validators send all their own messages to all peers. -const AGGRESSION_L1_THRESHOLD: BlockNumber = 10; - -/// Aggression level 2: level 1 + all validators send all messages to all peers in the X and Y dimensions. -const AGGRESSION_L2_THRESHOLD: BlockNumber = 25; - /// The Approval Distribution subsystem. pub struct ApprovalDistribution { metrics: Metrics, @@ -117,17 +105,15 @@ struct SessionTopology { } impl SessionTopology { - fn required_routing_for( - &self, - validator_index: ValidatorIndex, - local: bool, - ) -> RequiredRouting { + // Given the originator of a message, indicates the part of the topology + // we're meant to sent the message to. + fn required_routing_for(&self, originator: ValidatorIndex, local: bool) -> RequiredRouting { if local { return RequiredRouting::GridXY } - let grid_x = self.validator_indices_x.contains(&validator_index); - let grid_y = self.validator_indices_y.contains(&validator_index); + let grid_x = self.validator_indices_x.contains(&originator); + let grid_y = self.validator_indices_y.contains(&originator); match (grid_x, grid_y) { (false, false) => RequiredRouting::None, @@ -199,6 +185,32 @@ impl SessionTopologies { } } +// A note on aggression thresholds: changes in propagation apply only to blocks which are the +// _direct descendants_ of the finalized block which are older than the given threshold, +// not to all blocks older than the threshold. Most likely, a few assignments struggle to +// be propagated in a single block and this holds up all of its descendants blocks. +// Accordingly, we only step on the gas for the block which is most obviously holding up finality. +#[derive(Clone)] +struct AggressionConfig { + /// Aggression level 1: all validators send all their own messages to all peers. + l1_threshold: Option, + /// Aggression level 2: level 1 + all validators send all messages to all peers in the X and Y dimensions. + l2_threshold: Option, + /// How often to re-send messages to all targeted recipients. + /// This applies to all unfinalized blocks. + resend_unfinalized_period: Option, +} + +impl Default for AggressionConfig { + fn default() -> Self { + AggressionConfig { + l1_threshold: Some(10), + l2_threshold: Some(25), + resend_unfinalized_period: Some(5), + } + } +} + /// The [`State`] struct is responsible for tracking the overall state of the subsystem. /// /// It tracks metadata about our view of the unfinalized chain, @@ -225,6 +237,9 @@ struct State { /// Tracks recently finalized blocks. recent_outdated_blocks: RecentlyOutdated, + + /// Config for aggression. + aggression_config: AggressionConfig, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -590,7 +605,8 @@ impl State { } } - self.enable_aggression(ctx, metrics).await; + // 'true' means trigger re-send of messages in old blocks. + self.enable_aggression(ctx, true, metrics).await; } async fn handle_new_session_topology( @@ -781,7 +797,8 @@ impl State { // If a block was finalized, this means we may need to move our aggression // forward to the now oldest block(s). - self.enable_aggression(ctx, metrics).await; + // 'false' means don't trigger re-send of messages in old blocks. + self.enable_aggression(ctx, false, metrics).await; } async fn import_and_circulate_assignment( @@ -1440,10 +1457,12 @@ impl State { &mut self, ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), + do_resend: bool, metrics: &Metrics, ) { let min_age = self.blocks_by_number.iter().next().map(|(num, _)| num); let max_age = self.blocks_by_number.iter().rev().next().map(|(num, _)| num); + let config = self.aggression_config.clone(); let (min_age, max_age) = match (min_age, max_age) { (Some(min), Some(max)) => (min, max), @@ -1451,7 +1470,8 @@ impl State { }; let diff = max_age - min_age; - if diff < AGGRESSION_L1_THRESHOLD { + + if self.aggression_config.l1_threshold.map_or(true, |t| diff < t) { return } @@ -1460,6 +1480,17 @@ impl State { &mut self.blocks, &self.topologies, |block_entry| { + if do_resend && + config.resend_unfinalized_period.as_ref().map_or(false, |p| diff % p == 0) + { + // Retry sending to all peers. + for (_, knowledge) in block_entry.known_by.iter_mut() { + knowledge.sent = Default::default(); + } + + return true + } + // Ramp up aggression only for the very oldest block(s). // Approval voting can get stuck on a single block preventing // its descendants from being finalized. Waste minimal bandwidth @@ -1478,7 +1509,7 @@ impl State { return } - if diff >= AGGRESSION_L1_THRESHOLD { + if config.l1_threshold.as_ref().map_or(false, |t| &diff >= t) { // Message originator sends to everyone. if local && *required_routing != RequiredRouting::All { metrics.on_aggression_l1(); @@ -1486,7 +1517,7 @@ impl State { } } - if diff >= AGGRESSION_L2_THRESHOLD { + if config.l2_threshold.as_ref().map_or(false, |t| &diff >= t) { // Message originator sends to everyone. Everyone else sends to XY. if !local && *required_routing != RequiredRouting::GridXY { metrics.on_aggression_l2(); @@ -1516,7 +1547,7 @@ async fn adjust_required_routing_and_propagate( + overseer::SubsystemContext), blocks: &mut HashMap, topologies: &SessionTopologies, - block_filter: impl Fn(&BlockEntry) -> bool, + block_filter: impl Fn(&mut BlockEntry) -> bool, routing_modifier: impl Fn(&mut RequiredRouting, bool, &ValidatorIndex), ) { let mut peer_assignments = HashMap::new(); @@ -1525,7 +1556,7 @@ async fn adjust_required_routing_and_propagate( // Iterate all blocks in the session, producing payloads // for each connected peer. for (block_hash, block_entry) in blocks { - if !block_filter(&block_entry) { + if !block_filter(block_entry) { continue } @@ -1536,12 +1567,9 @@ async fn adjust_required_routing_and_propagate( .enumerate() .flat_map(|(c_i, c)| c.messages.iter_mut().map(move |(k, v)| (c_i as _, k, v))) { - let prev_routing = message_state.required_routing; routing_modifier(&mut message_state.required_routing, message_state.local, validator); - if message_state.required_routing.is_empty() || - message_state.required_routing == prev_routing - { + if message_state.required_routing.is_empty() { continue } diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 46d6c7b6ce30..964fc7751632 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -1673,7 +1673,11 @@ fn originator_aggression_l1() { let peers = make_peers_and_authority_ids(100); - let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let mut state = State::default(); + state.aggression_config.resend_unfinalized_period = None; + let aggression_l1_threshold = state.aggression_config.l1_threshold.clone().unwrap(); + + let _ = test_harness(state, |mut virtual_overseer| async move { let overseer = &mut virtual_overseer; // Connect all peers except omitted. @@ -1752,7 +1756,7 @@ fn originator_aggression_l1() { // Add blocks until aggression L1 is triggered. { let mut parent_hash = hash; - for level in 0..AGGRESSION_L1_THRESHOLD { + for level in 0..aggression_l1_threshold { let number = 1 + level + 1; // first block had number 1 let hash = BlakeTwo256::hash_of(&(parent_hash, number)); let meta = BlockApprovalMeta { @@ -1827,7 +1831,11 @@ fn non_originator_aggression_l1() { let peers = make_peers_and_authority_ids(100); - let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let mut state = State::default(); + state.aggression_config.resend_unfinalized_period = None; + let aggression_l1_threshold = state.aggression_config.l1_threshold.clone().unwrap(); + + let _ = test_harness(state, |mut virtual_overseer| async move { let overseer = &mut virtual_overseer; // Connect all peers except omitted. @@ -1893,7 +1901,7 @@ fn non_originator_aggression_l1() { // Add blocks until aggression L1 is triggered. { let mut parent_hash = hash; - for level in 0..AGGRESSION_L1_THRESHOLD { + for level in 0..aggression_l1_threshold { let number = 1 + level + 1; // first block had number 1 let hash = BlakeTwo256::hash_of(&(parent_hash, number)); let meta = BlockApprovalMeta { @@ -1927,7 +1935,12 @@ fn non_originator_aggression_l2() { let peers = make_peers_and_authority_ids(100); - let _ = test_harness(State::default(), |mut virtual_overseer| async move { + let mut state = State::default(); + state.aggression_config.resend_unfinalized_period = None; + + let aggression_l1_threshold = state.aggression_config.l1_threshold.clone().unwrap(); + let aggression_l2_threshold = state.aggression_config.l2_threshold.clone().unwrap(); + let _ = test_harness(state, |mut virtual_overseer| async move { let overseer = &mut virtual_overseer; // Connect all peers except omitted. @@ -1997,7 +2010,7 @@ fn non_originator_aggression_l2() { // Add blocks until aggression L1 is triggered. let chain_head = { let mut parent_hash = hash; - for level in 0..AGGRESSION_L1_THRESHOLD { + for level in 0..aggression_l1_threshold { let number = 1 + level + 1; // first block had number 1 let hash = BlakeTwo256::hash_of(&(parent_hash, number)); let meta = BlockApprovalMeta { @@ -2023,8 +2036,8 @@ fn non_originator_aggression_l2() { // Add blocks until aggression L2 is triggered. { let mut parent_hash = chain_head; - for level in 0..AGGRESSION_L2_THRESHOLD - AGGRESSION_L1_THRESHOLD { - let number = AGGRESSION_L1_THRESHOLD + level + 1 + 1; // first block had number 1 + for level in 0..aggression_l2_threshold - aggression_l1_threshold { + let number = aggression_l1_threshold + level + 1 + 1; // first block had number 1 let hash = BlakeTwo256::hash_of(&(parent_hash, number)); let meta = BlockApprovalMeta { hash, From 3d8f73a5090450cca601b7639a6c97b0f5690e1a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 1 Apr 2022 00:04:23 +0200 Subject: [PATCH 60/72] test resending --- node/network/approval-distribution/src/lib.rs | 22 ++- .../approval-distribution/src/tests.rs | 141 ++++++++++++++++++ 2 files changed, 160 insertions(+), 3 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index b893786ca6d3..7248c14972f3 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -201,6 +201,18 @@ struct AggressionConfig { resend_unfinalized_period: Option, } +impl AggressionConfig { + fn is_age_relevant(&self, block_age: BlockNumber) -> bool { + if let Some(t) = self.l1_threshold { + block_age >= t + } else if let Some(t) = self.resend_unfinalized_period { + block_age > 0 && block_age % t == 0 + } else { + false + } + } +} + impl Default for AggressionConfig { fn default() -> Self { AggressionConfig { @@ -1470,8 +1482,7 @@ impl State { }; let diff = max_age - min_age; - - if self.aggression_config.l1_threshold.map_or(true, |t| diff < t) { + if !self.aggression_config.is_age_relevant(diff) { return } @@ -1480,8 +1491,13 @@ impl State { &mut self.blocks, &self.topologies, |block_entry| { + let block_age = max_age - block_entry.number; + if do_resend && - config.resend_unfinalized_period.as_ref().map_or(false, |p| diff % p == 0) + config + .resend_unfinalized_period + .as_ref() + .map_or(false, |p| block_age > 0 && block_age % p == 0) { // Retry sending to all peers. for (_, knowledge) in block_entry.known_by.iter_mut() { diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 964fc7751632..39d4b61a6a03 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -2086,3 +2086,144 @@ fn non_originator_aggression_l2() { virtual_overseer }); } + +// Tests that messages propagate to the unshared dimension. +#[test] +fn resends_messages_periodically() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(100); + + let mut state = State::default(); + state.aggression_config.l1_threshold = None; + state.aggression_config.l2_threshold = None; + state.aggression_config.resend_unfinalized_period = Some(2); + let _ = test_harness(state, |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + + // Connect all peers. + for (peer, _) in &peers { + setup_peer_with_view(overseer, peer, view![hash]).await; + } + + // Set up a gossip topology. + setup_gossip_topology( + overseer, + make_gossip_topology(1, &peers, &[0, 10, 20, 30], &[50, 51, 52, 53]), + ) + .await; + + // new block `hash_a` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + let validator_index = ValidatorIndex(0); + let candidate_index = 0u32; + + // import an assignment and approval locally. + let cert = fake_assignment_cert(hash, validator_index); + let assignments = vec![(cert.clone(), candidate_index)]; + + { + let msg = protocol_v1::ApprovalDistributionMessage::Assignments(assignments.clone()); + + // Issuer of the message is important, not the peer we receive from. + // 99 deliberately chosen because it's not in X or Y. + send_message_from_peer(overseer, &peers[99].0, msg).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::ApprovalVoting(ApprovalVotingMessage::CheckAndImportAssignment( + _, + _, + tx, + )) => { + tx.send(AssignmentCheckResult::Accepted).unwrap(); + } + ); + expect_reputation_change(overseer, &peers[99].0, BENEFIT_VALID_MESSAGE_FIRST).await; + + let expected_y = [50, 51, 52, 53]; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + assert_eq!(sent_peers.len(), expected_y.len() + 4); + for &i in &expected_y { + assert!( + sent_peers.contains(&peers[i].0), + "Message not sent to expected peer {}", + i, + ); + } + assert_eq!(sent_assignments, assignments); + } + ); + }; + + let mut number = 1; + for _ in 0..10 { + // Add blocks until resend is done. + { + let mut parent_hash = hash; + for level in 0..2 { + number = number + 1; + let hash = BlakeTwo256::hash_of(&(parent_hash, number)); + let meta = BlockApprovalMeta { + hash, + parent_hash, + number, + candidates: vec![], + slot: (level as u64).into(), + session: 1, + }; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + parent_hash = hash; + } + } + + let mut expected_y = vec![50, 51, 52, 53]; + + // Expect messages sent only to topology peers, one by one. + for _ in 0..expected_y.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage( + sent_peers, + protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Assignments(sent_assignments) + ) + )) => { + assert_eq!(sent_peers.len(), 1); + let expected_pos = expected_y.iter() + .position(|&i| &peers[i].0 == &sent_peers[0]) + .unwrap(); + + expected_y.remove(expected_pos); + assert_eq!(sent_assignments, assignments); + } + ); + } + } + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }); +} From 28ff301bd024881d5c697f40762be3d0bc546c51 Mon Sep 17 00:00:00 2001 From: asynchronous rob Date: Fri, 1 Apr 2022 09:44:58 -0500 Subject: [PATCH 61/72] typo Co-authored-by: Bernhard Schuster --- node/network/approval-distribution/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 7248c14972f3..cdd7fcc16b13 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -106,7 +106,7 @@ struct SessionTopology { impl SessionTopology { // Given the originator of a message, indicates the part of the topology - // we're meant to sent the message to. + // we're meant to send the message to. fn required_routing_for(&self, originator: ValidatorIndex, local: bool) -> RequiredRouting { if local { return RequiredRouting::GridXY From 649e56d23dcc7d3515373f94829307cb578ad9ed Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 1 Apr 2022 22:04:31 +0200 Subject: [PATCH 62/72] add more metrics about apd messages --- node/network/approval-distribution/src/lib.rs | 70 ++++++- .../approval-distribution/src/metrics.rs | 176 ++++++++++++++++++ 2 files changed, 241 insertions(+), 5 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index cdd7fcc16b13..a9b11527d172 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -469,7 +469,7 @@ impl State { }, NetworkBridgeEvent::NewGossipTopology(topology) => { let session = topology.session; - self.handle_new_session_topology(ctx, session, SessionTopology::from(topology)) + self.handle_new_session_topology(ctx, metrics, session, SessionTopology::from(topology)) .await; }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { @@ -625,13 +625,14 @@ impl State { &mut self, ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), + metrics: &Metrics, session: SessionIndex, topology: SessionTopology, ) { self.topologies.insert_topology(session, topology); let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); - adjust_required_routing_and_propagate( + let new_session_topology_stats = adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, @@ -643,6 +644,8 @@ impl State { }, ) .await; + + metrics.note_new_topology_stats(new_session_topology_stats); } async fn process_incoming_peer_message( @@ -1052,6 +1055,8 @@ impl State { } } + let mut stats = SentMessagesStats::default(); + if !peers.is_empty() { gum::trace!( target: LOG_TARGET, @@ -1062,6 +1067,8 @@ impl State { "Sending an assignment to peers", ); + stats.assignments += peers.len(); + stats.assignment_packets += peers.len(); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( peers, protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1070,6 +1077,8 @@ impl State { )) .await; } + + metrics.note_basic_circulation_stats(stats); } async fn import_and_circulate_approval( @@ -1313,6 +1322,8 @@ impl State { } } + let mut stats = SentMessagesStats::default(); + if !peers.is_empty() { let approvals = vec![vote]; gum::trace!( @@ -1324,6 +1335,8 @@ impl State { "Sending an approval to peers", ); + stats.assignments += peers.len(); + stats.assignment_packets += peers.len(); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( peers, protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1332,6 +1345,8 @@ impl State { )) .await; } + + metrics.note_basic_circulation_stats(stats); } async fn unify_with_peer( @@ -1444,7 +1459,10 @@ impl State { } } + let mut stats = SentMessagesStats::default(); + if !assignments_to_send.is_empty() { + stats.note_assignments_packet(assignments_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1455,6 +1473,7 @@ impl State { } if !approvals_to_send.is_empty() { + stats.note_approvals_packet(approvals_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1463,6 +1482,8 @@ impl State { )) .await; } + + metrics.note_unify_with_peer_stats(stats); } async fn enable_aggression( @@ -1486,7 +1507,7 @@ impl State { return } - adjust_required_routing_and_propagate( + let resend_stats = adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, @@ -1504,9 +1525,19 @@ impl State { knowledge.sent = Default::default(); } - return true + true + } else { + false } + }, + |_, _, _| { } + ).await; + let aggression_stats = adjust_required_routing_and_propagate( + ctx, + &mut self.blocks, + &self.topologies, + |block_entry| { // Ramp up aggression only for the very oldest block(s). // Approval voting can get stuck on a single block preventing // its descendants from being finalized. Waste minimal bandwidth @@ -1543,6 +1574,29 @@ impl State { }, ) .await; + + metrics.note_resend_stats(resend_stats); + metrics.note_aggression_stats(aggression_stats); + } +} + +#[derive(Default)] +struct SentMessagesStats { + assignments: usize, + approvals: usize, + assignment_packets: usize, + approval_packets: usize, +} + +impl SentMessagesStats { + fn note_assignments_packet(&mut self, assignments: usize) { + self.assignment_packets += 1; + self.assignments += assignments; + } + + fn note_approvals_packet(&mut self, approvals: usize) { + self.approval_packets += 1; + self.approvals += approvals; } } @@ -1565,7 +1619,9 @@ async fn adjust_required_routing_and_propagate( topologies: &SessionTopologies, block_filter: impl Fn(&mut BlockEntry) -> bool, routing_modifier: impl Fn(&mut RequiredRouting, bool, &ValidatorIndex), -) { +) -> SentMessagesStats { + let mut stats = SentMessagesStats::default(); + let mut peer_assignments = HashMap::new(); let mut peer_approvals = HashMap::new(); @@ -1645,6 +1701,7 @@ async fn adjust_required_routing_and_propagate( // Send messages in accumulated packets, assignments preceding approvals. for (peer, assignments_packet) in peer_assignments { + stats.note_assignments_packet(assignments_packet.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1655,6 +1712,7 @@ async fn adjust_required_routing_and_propagate( } for (peer, approvals_packet) in peer_approvals { + stats.note_approvals_packet(approvals_packet.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1663,6 +1721,8 @@ async fn adjust_required_routing_and_propagate( )) .await; } + + stats } /// Modify the reputation of a peer based on its behavior. diff --git a/node/network/approval-distribution/src/metrics.rs b/node/network/approval-distribution/src/metrics.rs index c0887b25f7f4..e41f4a850cf9 100644 --- a/node/network/approval-distribution/src/metrics.rs +++ b/node/network/approval-distribution/src/metrics.rs @@ -15,6 +15,7 @@ // along with Polkadot. If not, see . use polkadot_node_subsystem_util::metrics::{prometheus, Metrics as MetricsTrait}; +use super::SentMessagesStats; /// Approval Distribution metrics. #[derive(Default, Clone)] @@ -31,6 +32,19 @@ struct MetricsInner { time_unify_with_peer: prometheus::Histogram, time_import_pending_now_known: prometheus::Histogram, time_awaiting_approval_voting: prometheus::Histogram, + + // TODO [now]: these metrics are (for the most part) temporary for figuring + // out why there are so many messages. + basic_circulation_messages_total: prometheus::CounterVec, + basic_circulation_packets_total: prometheus::CounterVec, + unify_with_peer_messages_total: prometheus::CounterVec, + unify_with_peer_packets_total: prometheus::CounterVec, + resend_messages_total: prometheus::CounterVec, + resend_packets_total: prometheus::CounterVec, + aggression_messages_total: prometheus::CounterVec, + aggression_packets_total: prometheus::CounterVec, + new_topology_messages_total: prometheus::CounterVec, + new_topology_packets_total: prometheus::CounterVec, } impl Metrics { @@ -83,6 +97,68 @@ impl Metrics { metrics.aggression_l2_messages_total.inc(); } } + + pub(crate) fn note_basic_circulation_stats(&self, stats: SentMessagesStats) { + if let Some(metrics) = &self.0 { + note_sent_message_stats( + stats, + &metrics.basic_circulation_messages_total, + &metrics.basic_circulation_packets_total, + ) + } + } + + pub(crate) fn note_unify_with_peer_stats(&self, stats: SentMessagesStats) { + if let Some(metrics) = &self.0 { + note_sent_message_stats( + stats, + &metrics.unify_with_peer_messages_total, + &metrics.unify_with_peer_packets_total, + ) + } + } + + pub(crate) fn note_resend_stats(&self, stats: SentMessagesStats) { + if let Some(metrics) = &self.0 { + note_sent_message_stats( + stats, + &metrics.resend_messages_total, + &metrics.resend_packets_total, + ) + } + } + + pub(crate) fn note_aggression_stats(&self, stats: SentMessagesStats) { + if let Some(metrics) = &self.0 { + note_sent_message_stats( + stats, + &metrics.aggression_messages_total, + &metrics.aggression_packets_total, + ) + } + } + + pub(crate) fn note_new_topology_stats(&self, stats: SentMessagesStats) { + if let Some(metrics) = &self.0 { + note_sent_message_stats( + stats, + &metrics.new_topology_messages_total, + &metrics.new_topology_packets_total, + ) + } + } +} + +fn note_sent_message_stats( + stats: SentMessagesStats, + message_counters: &prometheus::CounterVec, + packet_counters: &prometheus::CounterVec, +) { + message_counters.with_label_values(&["assignments"]).inc_by(stats.assignments as u64); + message_counters.with_label_values(&["approvals"]).inc_by(stats.approvals as u64); + + packet_counters.with_label_values(&["assignments"]).inc_by(stats.assignment_packets as u64); + packet_counters.with_label_values(&["approvals"]).inc_by(stats.approval_packets as u64); } impl MetricsTrait for Metrics { @@ -144,6 +220,106 @@ impl MetricsTrait for Metrics { ))?, registry, )?, + basic_circulation_messages_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_basic_circulation_messages_total", + "Number of assignments and approvals sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + basic_circulation_packets_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_basic_circulation_packets_total", + "Number of packets sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + unify_with_peer_messages_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_unify_with_peer_messages_total", + "Number of assignments and approvals sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + unify_with_peer_packets_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_unify_with_peer_packets_total", + "Number of packets sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + resend_messages_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_resend_messages_total", + "Number of assignments and approvals sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + resend_packets_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_resend_packets_total", + "Number of packets sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + aggression_messages_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_aggression_messages_total", + "Number of assignments and approvals sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + aggression_packets_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_aggression_packets_total", + "Number of packets sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + new_topology_messages_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_new_topology_messages_total", + "Number of assignments and approvals sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, + new_topology_packets_total: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_approval_new_topology_packets_total", + "Number of packets sent by basic circulation", + ), + &["kind"] + )?, + registry, + )?, }; Ok(Metrics(Some(metrics))) } From 1522b79772d18450cbed154e9f597fd4705365a4 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 10 Apr 2022 22:17:07 +0200 Subject: [PATCH 63/72] add back unify_with_peer logs --- node/network/approval-distribution/src/lib.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index a9b11527d172..ed5cda95f4aa 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -1462,6 +1462,13 @@ impl State { let mut stats = SentMessagesStats::default(); if !assignments_to_send.is_empty() { + gum::trace!( + target: LOG_TARGET, + ?peer_id, + num = assignments_to_send.len(), + "Sending assignments to unified peer", + ); + stats.note_assignments_packet(assignments_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], @@ -1473,6 +1480,14 @@ impl State { } if !approvals_to_send.is_empty() { + gum::trace!( + target: LOG_TARGET, + ?peer_id, + num = approvals_to_send.len(), + "Sending approvals to unified peer", + ); + + stats.note_approvals_packet(approvals_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], From 9fc79800367f2690a9f4af42c75abc971d7b7984 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 10 Apr 2022 22:19:28 +0200 Subject: [PATCH 64/72] make Resend an enum --- node/network/approval-distribution/src/lib.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index ed5cda95f4aa..d306c9bf7aa7 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -223,6 +223,12 @@ impl Default for AggressionConfig { } } +#[derive(PartialEq)] +enum Resend { + Yes, + No, +} + /// The [`State`] struct is responsible for tracking the overall state of the subsystem. /// /// It tracks metadata about our view of the unfinalized chain, @@ -617,8 +623,7 @@ impl State { } } - // 'true' means trigger re-send of messages in old blocks. - self.enable_aggression(ctx, true, metrics).await; + self.enable_aggression(ctx, Resend::Yes, metrics).await; } async fn handle_new_session_topology( @@ -812,8 +817,7 @@ impl State { // If a block was finalized, this means we may need to move our aggression // forward to the now oldest block(s). - // 'false' means don't trigger re-send of messages in old blocks. - self.enable_aggression(ctx, false, metrics).await; + self.enable_aggression(ctx, Resend::No, metrics).await; } async fn import_and_circulate_assignment( @@ -1505,7 +1509,7 @@ impl State { &mut self, ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), - do_resend: bool, + resend: Resend, metrics: &Metrics, ) { let min_age = self.blocks_by_number.iter().next().map(|(num, _)| num); @@ -1529,7 +1533,7 @@ impl State { |block_entry| { let block_age = max_age - block_entry.number; - if do_resend && + if resend == Resend::Yes && config .resend_unfinalized_period .as_ref() From 14c3ea7e04bfbe00d378bd527b0442e963bfafb2 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 10 Apr 2022 22:19:51 +0200 Subject: [PATCH 65/72] be more explicit when resending --- node/network/approval-distribution/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index d306c9bf7aa7..30bc7d1be5c3 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -1541,7 +1541,7 @@ impl State { { // Retry sending to all peers. for (_, knowledge) in block_entry.known_by.iter_mut() { - knowledge.sent = Default::default(); + knowledge.sent = HashMap::new(); } true From d0dc36d50724ca81e955436f1a90a9407a0768e5 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 10 Apr 2022 22:21:01 +0200 Subject: [PATCH 66/72] fmt --- node/network/approval-distribution/src/lib.rs | 15 ++++++++++----- .../approval-distribution/src/metrics.rs | 18 +++++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 30bc7d1be5c3..7b8b7a57563e 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -475,8 +475,13 @@ impl State { }, NetworkBridgeEvent::NewGossipTopology(topology) => { let session = topology.session; - self.handle_new_session_topology(ctx, metrics, session, SessionTopology::from(topology)) - .await; + self.handle_new_session_topology( + ctx, + metrics, + session, + SessionTopology::from(topology), + ) + .await; }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { self.handle_peer_view_change(ctx, metrics, peer_id, view, rng).await; @@ -1491,7 +1496,6 @@ impl State { "Sending approvals to unified peer", ); - stats.note_approvals_packet(approvals_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], @@ -1549,8 +1553,9 @@ impl State { false } }, - |_, _, _| { } - ).await; + |_, _, _| {}, + ) + .await; let aggression_stats = adjust_required_routing_and_propagate( ctx, diff --git a/node/network/approval-distribution/src/metrics.rs b/node/network/approval-distribution/src/metrics.rs index e41f4a850cf9..835315a959d0 100644 --- a/node/network/approval-distribution/src/metrics.rs +++ b/node/network/approval-distribution/src/metrics.rs @@ -14,8 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use polkadot_node_subsystem_util::metrics::{prometheus, Metrics as MetricsTrait}; use super::SentMessagesStats; +use polkadot_node_subsystem_util::metrics::{prometheus, Metrics as MetricsTrait}; /// Approval Distribution metrics. #[derive(Default, Clone)] @@ -154,11 +154,19 @@ fn note_sent_message_stats( message_counters: &prometheus::CounterVec, packet_counters: &prometheus::CounterVec, ) { - message_counters.with_label_values(&["assignments"]).inc_by(stats.assignments as u64); - message_counters.with_label_values(&["approvals"]).inc_by(stats.approvals as u64); + message_counters + .with_label_values(&["assignments"]) + .inc_by(stats.assignments as u64); + message_counters + .with_label_values(&["approvals"]) + .inc_by(stats.approvals as u64); - packet_counters.with_label_values(&["assignments"]).inc_by(stats.assignment_packets as u64); - packet_counters.with_label_values(&["approvals"]).inc_by(stats.approval_packets as u64); + packet_counters + .with_label_values(&["assignments"]) + .inc_by(stats.assignment_packets as u64); + packet_counters + .with_label_values(&["approvals"]) + .inc_by(stats.approval_packets as u64); } impl MetricsTrait for Metrics { From d406a2ccedf8f79b99d78929b2c9d513cb29626c Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sun, 10 Apr 2022 22:33:38 +0200 Subject: [PATCH 67/72] fix error --- node/network/approval-distribution/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 7b8b7a57563e..b9033d5f035b 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -1545,7 +1545,7 @@ impl State { { // Retry sending to all peers. for (_, knowledge) in block_entry.known_by.iter_mut() { - knowledge.sent = HashMap::new(); + knowledge.sent = Knowledge::default(); } true From 43a4123d761aab4fd102863eb3861fe63e4f3506 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 13 Apr 2022 01:39:29 +0200 Subject: [PATCH 68/72] add a TODO for refactoring --- node/network/gossip-support/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index cabd5c056b1d..d8ba6ce7c89d 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -515,6 +515,8 @@ where let random_seed = { let (tx, rx) = oneshot::channel(); + // TODO https://github.com/paritytech/polkadot/issues/5316: + // get the random seed from the `SessionInfo` instead. ctx.send_message(RuntimeApiMessage::Request( relay_parent, RuntimeApiRequest::CurrentBabeEpoch(tx), From 383e7a0d1d80f3ad4660981f7562e3a77e8fb16e Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 19 Apr 2022 10:32:38 -0500 Subject: [PATCH 69/72] remove debug metrics --- node/network/approval-distribution/src/lib.rs | 59 +----- .../approval-distribution/src/metrics.rs | 184 ------------------ 2 files changed, 4 insertions(+), 239 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index b9033d5f035b..956b7828915b 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -477,7 +477,6 @@ impl State { let session = topology.session; self.handle_new_session_topology( ctx, - metrics, session, SessionTopology::from(topology), ) @@ -635,14 +634,13 @@ impl State { &mut self, ctx: &mut (impl SubsystemContext + overseer::SubsystemContext), - metrics: &Metrics, session: SessionIndex, topology: SessionTopology, ) { self.topologies.insert_topology(session, topology); let topology = self.topologies.get_topology(session).expect("just inserted above; qed"); - let new_session_topology_stats = adjust_required_routing_and_propagate( + adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, @@ -654,8 +652,6 @@ impl State { }, ) .await; - - metrics.note_new_topology_stats(new_session_topology_stats); } async fn process_incoming_peer_message( @@ -1064,8 +1060,6 @@ impl State { } } - let mut stats = SentMessagesStats::default(); - if !peers.is_empty() { gum::trace!( target: LOG_TARGET, @@ -1076,8 +1070,6 @@ impl State { "Sending an assignment to peers", ); - stats.assignments += peers.len(); - stats.assignment_packets += peers.len(); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( peers, protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1086,8 +1078,6 @@ impl State { )) .await; } - - metrics.note_basic_circulation_stats(stats); } async fn import_and_circulate_approval( @@ -1331,8 +1321,6 @@ impl State { } } - let mut stats = SentMessagesStats::default(); - if !peers.is_empty() { let approvals = vec![vote]; gum::trace!( @@ -1344,8 +1332,6 @@ impl State { "Sending an approval to peers", ); - stats.assignments += peers.len(); - stats.assignment_packets += peers.len(); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( peers, protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1354,8 +1340,6 @@ impl State { )) .await; } - - metrics.note_basic_circulation_stats(stats); } async fn unify_with_peer( @@ -1468,8 +1452,6 @@ impl State { } } - let mut stats = SentMessagesStats::default(); - if !assignments_to_send.is_empty() { gum::trace!( target: LOG_TARGET, @@ -1478,7 +1460,6 @@ impl State { "Sending assignments to unified peer", ); - stats.note_assignments_packet(assignments_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1496,7 +1477,6 @@ impl State { "Sending approvals to unified peer", ); - stats.note_approvals_packet(approvals_to_send.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer_id.clone()], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1505,8 +1485,6 @@ impl State { )) .await; } - - metrics.note_unify_with_peer_stats(stats); } async fn enable_aggression( @@ -1530,7 +1508,7 @@ impl State { return } - let resend_stats = adjust_required_routing_and_propagate( + adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, @@ -1557,7 +1535,7 @@ impl State { ) .await; - let aggression_stats = adjust_required_routing_and_propagate( + adjust_required_routing_and_propagate( ctx, &mut self.blocks, &self.topologies, @@ -1598,29 +1576,6 @@ impl State { }, ) .await; - - metrics.note_resend_stats(resend_stats); - metrics.note_aggression_stats(aggression_stats); - } -} - -#[derive(Default)] -struct SentMessagesStats { - assignments: usize, - approvals: usize, - assignment_packets: usize, - approval_packets: usize, -} - -impl SentMessagesStats { - fn note_assignments_packet(&mut self, assignments: usize) { - self.assignment_packets += 1; - self.assignments += assignments; - } - - fn note_approvals_packet(&mut self, approvals: usize) { - self.approval_packets += 1; - self.approvals += approvals; } } @@ -1643,9 +1598,7 @@ async fn adjust_required_routing_and_propagate( topologies: &SessionTopologies, block_filter: impl Fn(&mut BlockEntry) -> bool, routing_modifier: impl Fn(&mut RequiredRouting, bool, &ValidatorIndex), -) -> SentMessagesStats { - let mut stats = SentMessagesStats::default(); - +) { let mut peer_assignments = HashMap::new(); let mut peer_approvals = HashMap::new(); @@ -1725,7 +1678,6 @@ async fn adjust_required_routing_and_propagate( // Send messages in accumulated packets, assignments preceding approvals. for (peer, assignments_packet) in peer_assignments { - stats.note_assignments_packet(assignments_packet.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1736,7 +1688,6 @@ async fn adjust_required_routing_and_propagate( } for (peer, approvals_packet) in peer_approvals { - stats.note_approvals_packet(approvals_packet.len()); ctx.send_message(NetworkBridgeMessage::SendValidationMessage( vec![peer], protocol_v1::ValidationProtocol::ApprovalDistribution( @@ -1745,8 +1696,6 @@ async fn adjust_required_routing_and_propagate( )) .await; } - - stats } /// Modify the reputation of a peer based on its behavior. diff --git a/node/network/approval-distribution/src/metrics.rs b/node/network/approval-distribution/src/metrics.rs index 835315a959d0..c0887b25f7f4 100644 --- a/node/network/approval-distribution/src/metrics.rs +++ b/node/network/approval-distribution/src/metrics.rs @@ -14,7 +14,6 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::SentMessagesStats; use polkadot_node_subsystem_util::metrics::{prometheus, Metrics as MetricsTrait}; /// Approval Distribution metrics. @@ -32,19 +31,6 @@ struct MetricsInner { time_unify_with_peer: prometheus::Histogram, time_import_pending_now_known: prometheus::Histogram, time_awaiting_approval_voting: prometheus::Histogram, - - // TODO [now]: these metrics are (for the most part) temporary for figuring - // out why there are so many messages. - basic_circulation_messages_total: prometheus::CounterVec, - basic_circulation_packets_total: prometheus::CounterVec, - unify_with_peer_messages_total: prometheus::CounterVec, - unify_with_peer_packets_total: prometheus::CounterVec, - resend_messages_total: prometheus::CounterVec, - resend_packets_total: prometheus::CounterVec, - aggression_messages_total: prometheus::CounterVec, - aggression_packets_total: prometheus::CounterVec, - new_topology_messages_total: prometheus::CounterVec, - new_topology_packets_total: prometheus::CounterVec, } impl Metrics { @@ -97,76 +83,6 @@ impl Metrics { metrics.aggression_l2_messages_total.inc(); } } - - pub(crate) fn note_basic_circulation_stats(&self, stats: SentMessagesStats) { - if let Some(metrics) = &self.0 { - note_sent_message_stats( - stats, - &metrics.basic_circulation_messages_total, - &metrics.basic_circulation_packets_total, - ) - } - } - - pub(crate) fn note_unify_with_peer_stats(&self, stats: SentMessagesStats) { - if let Some(metrics) = &self.0 { - note_sent_message_stats( - stats, - &metrics.unify_with_peer_messages_total, - &metrics.unify_with_peer_packets_total, - ) - } - } - - pub(crate) fn note_resend_stats(&self, stats: SentMessagesStats) { - if let Some(metrics) = &self.0 { - note_sent_message_stats( - stats, - &metrics.resend_messages_total, - &metrics.resend_packets_total, - ) - } - } - - pub(crate) fn note_aggression_stats(&self, stats: SentMessagesStats) { - if let Some(metrics) = &self.0 { - note_sent_message_stats( - stats, - &metrics.aggression_messages_total, - &metrics.aggression_packets_total, - ) - } - } - - pub(crate) fn note_new_topology_stats(&self, stats: SentMessagesStats) { - if let Some(metrics) = &self.0 { - note_sent_message_stats( - stats, - &metrics.new_topology_messages_total, - &metrics.new_topology_packets_total, - ) - } - } -} - -fn note_sent_message_stats( - stats: SentMessagesStats, - message_counters: &prometheus::CounterVec, - packet_counters: &prometheus::CounterVec, -) { - message_counters - .with_label_values(&["assignments"]) - .inc_by(stats.assignments as u64); - message_counters - .with_label_values(&["approvals"]) - .inc_by(stats.approvals as u64); - - packet_counters - .with_label_values(&["assignments"]) - .inc_by(stats.assignment_packets as u64); - packet_counters - .with_label_values(&["approvals"]) - .inc_by(stats.approval_packets as u64); } impl MetricsTrait for Metrics { @@ -228,106 +144,6 @@ impl MetricsTrait for Metrics { ))?, registry, )?, - basic_circulation_messages_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_basic_circulation_messages_total", - "Number of assignments and approvals sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - basic_circulation_packets_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_basic_circulation_packets_total", - "Number of packets sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - unify_with_peer_messages_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_unify_with_peer_messages_total", - "Number of assignments and approvals sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - unify_with_peer_packets_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_unify_with_peer_packets_total", - "Number of packets sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - resend_messages_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_resend_messages_total", - "Number of assignments and approvals sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - resend_packets_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_resend_packets_total", - "Number of packets sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - aggression_messages_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_aggression_messages_total", - "Number of assignments and approvals sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - aggression_packets_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_aggression_packets_total", - "Number of packets sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - new_topology_messages_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_new_topology_messages_total", - "Number of assignments and approvals sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, - new_topology_packets_total: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_approval_new_topology_packets_total", - "Number of packets sent by basic circulation", - ), - &["kind"] - )?, - registry, - )?, }; Ok(Metrics(Some(metrics))) } From c8782c6c7c736ba67f95afabbe97fb85d9c31539 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 19 Apr 2022 10:44:53 -0500 Subject: [PATCH 70/72] add some guide stuff --- .../src/node/approval/approval-distribution.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/roadmap/implementers-guide/src/node/approval/approval-distribution.md b/roadmap/implementers-guide/src/node/approval/approval-distribution.md index c8d8f60ae699..9afc53c7e777 100644 --- a/roadmap/implementers-guide/src/node/approval/approval-distribution.md +++ b/roadmap/implementers-guide/src/node/approval/approval-distribution.md @@ -22,6 +22,16 @@ For assignments, what we need to be checking is whether we are aware of the (blo However, awareness on its own of a (block, candidate) pair would imply that even ancient candidates all the way back to the genesis are relevant. We are actually not interested in anything before finality. +We gossip assignments along a grid topology produced by the [Gossip Support Subsystem](../utility/gossip-support.md) and also to a few random peers. The first time we accept an assignment or approval, regardless of the source, which originates from a validator peer in a shared dimension of the grid, we propagate the message to validator peers in the unshared dimension as well as a few random peers. + +But, in case these mechanisms don't work on their own, we need to trade bandwidth for protocol liveness by introducing aggression. + +Aggression has 3 levels: + Aggression Level 0: The basic behaviors described above. + Aggression Level 1: The originator of a message sends to all peers. Other peers follow the rules above. + Aggression Level 2: All peers send all messages to all their row and column neighbors. This means that each validator will, on average, receive each message approximately 2*sqrt(n) times. + +These aggression levels are chosen based on how long a block has taken to finalize: assignments and approvals related to the unfinalized block will be propagated with more aggression. In particular, it's only the earliest unfinalized blocks that aggression should be applied to, because descendants may be unfinalized only by virtue of being descendants. ## Protocol From a8b7bf8e5efd97033efa7ddfc5a5d5c9603172ed Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 19 Apr 2022 10:47:22 -0500 Subject: [PATCH 71/72] fmt --- node/network/approval-distribution/src/lib.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 956b7828915b..1b54ce21bd56 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -475,12 +475,8 @@ impl State { }, NetworkBridgeEvent::NewGossipTopology(topology) => { let session = topology.session; - self.handle_new_session_topology( - ctx, - session, - SessionTopology::from(topology), - ) - .await; + self.handle_new_session_topology(ctx, session, SessionTopology::from(topology)) + .await; }, NetworkBridgeEvent::PeerViewChange(peer_id, view) => { self.handle_peer_view_change(ctx, metrics, peer_id, view, rng).await; From 2f790ef89106b08d5527037b0737cb2987298d0a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 19 Apr 2022 11:47:47 -0500 Subject: [PATCH 72/72] update runtime API in test-runtim --- runtime/test-runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/test-runtime/src/lib.rs b/runtime/test-runtime/src/lib.rs index 045844c9d4ae..e9ceaf5cc713 100644 --- a/runtime/test-runtime/src/lib.rs +++ b/runtime/test-runtime/src/lib.rs @@ -796,7 +796,7 @@ sp_api::impl_runtime_apis! { impl authority_discovery_primitives::AuthorityDiscoveryApi for Runtime { fn authorities() -> Vec { - AuthorityDiscovery::authorities() + runtime_impl::relevant_authority_ids::() } }