bitfield_distribution: fix subsystem clogged at begining of a session (#9094)

alexggh · github-actions[bot] · alvicsam · commit f975a0c2d1a9 · 2025-10-17T14:22:12.000+02:00
`handle_peer_view_change` gets called on NewGossipTopology with the
existing view of the peer to cover for the case when the topology might
arrive late, but in that case in the view will contain old blocks from
previous session, so since the X/Y neighbour change because of the
topology change you end up sending a lot of messages for blocks before
the session changed.

Fix it by checking the send message only for relay chains that are in
the same session as the current topology.

---------

Signed-off-by: Alexandru Gheorghe &lt;alexandru.gheorghe@parity.io&gt;
Co-authored-by: cmd[bot] &lt;41898282+github-actions[bot]@users.noreply.github.com&gt;
diff --git a/polkadot/node/network/bitfield-distribution/src/lib.rs b/polkadot/node/network/bitfield-distribution/src/lib.rs
@@ -789,9 +789,11 @@ async fn handle_peer_view_change<Context>(
 	};
 
 	let added = peer_data.view.replace_difference(view).cloned().collect::<Vec<_>>();
+	let current_session_index = state.topologies.get_current_session_index();
 
 	let topology = state.topologies.get_current_topology().local_grid_neighbors();
 	let is_gossip_peer = topology.route_to_peer(RequiredRouting::GridXY, &origin);
+
 	let lucky = is_gossip_peer ||
 		util::gen_ratio_rng(
 			util::MIN_GOSSIP_PEERS.saturating_sub(topology.len()),
@@ -809,7 +811,11 @@ async fn handle_peer_view_change<Context>(
 	let delta_set: Vec<(ValidatorId, BitfieldGossipMessage)> = added
 		.into_iter()
 		.filter_map(|new_relay_parent_interest| {
-			if let Some(job_data) = state.per_relay_parent.get(&new_relay_parent_interest) {
+			if let Some(job_data) = state
+				.per_relay_parent
+				.get(&new_relay_parent_interest)
+				.filter(|job_data| job_data.signing_context.session_index == current_session_index)
+			{
 				// Send all jointly known messages for a validator (given the current relay parent)
 				// to the peer `origin`...
 				let one_per_validator = job_data.one_per_validator.clone();
diff --git a/polkadot/node/network/protocol/src/grid_topology.rs b/polkadot/node/network/protocol/src/grid_topology.rs
@@ -496,6 +496,11 @@ impl SessionBoundGridTopologyStorage {
 		&self.current_topology.entry
 	}
 
+	/// Returns the current session index.
+	pub fn get_current_session_index(&self) -> SessionIndex {
+		self.current_topology.session_index
+	}
+
 	/// Access the current grid topology mutably. Dangerous and intended
 	/// to be used in tests.
 	pub fn get_current_topology_mut(&mut self) -> &mut SessionGridTopologyEntry {
diff --git a/prdoc/pr_9094.prdoc b/prdoc/pr_9094.prdoc
@@ -0,0 +1,12 @@
+title: 'bitfield_distribution: fix subsystem clogged at begining of a session'
+doc:
+- audience: Node Dev
+  description: |-
+    `handle_peer_view_change` gets called on NewGossipTopology with the existing view of the peer to cover for the case when the topology might arrive late, but in that case in the view will contain old blocks from previous session, so since the X/Y neighbour change because of the topology change you end up sending a lot of messages for blocks before the session changed.
+
+    Fix it by checking the send message only for relay chains that are in the same session as the current topology.
+crates:
+- name: polkadot-availability-bitfield-distribution
+  bump: patch
+- name: polkadot-node-network-protocol
+  bump: minor