From cfe043883f52d95bfd204c47d63c4088457a8c31 Mon Sep 17 00:00:00 2001 From: Alexandru Vasile Date: Wed, 1 Apr 2026 12:23:25 +0000 Subject: [PATCH 1/4] collator-protocol: Enhance visibility into failed to fetch collations Signed-off-by: Alexandru Vasile --- .../src/validator_side/mod.rs | 121 ++++++++++++++++-- 1 file changed, 113 insertions(+), 8 deletions(-) diff --git a/polkadot/node/network/collator-protocol/src/validator_side/mod.rs b/polkadot/node/network/collator-protocol/src/validator_side/mod.rs index e320b6a50aba2..d4b42d985ec18 100644 --- a/polkadot/node/network/collator-protocol/src/validator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/validator_side/mod.rs @@ -359,6 +359,12 @@ impl PeerData { implicit_view, leaf_claim_queues.keys(), ) { + gum::debug!( + target: LOG_TARGET, + ?on_scheduling_parent, + "Peer advertised a collation for a relay parent that is out of our view", + ); + return Err(InsertAdvertisementError::OutOfOurView); } @@ -368,6 +374,13 @@ impl PeerData { .get(&on_scheduling_parent) .map_or(false, |candidates| candidates.contains(&candidate_hash)) { + gum::debug!( + target: LOG_TARGET, + ?on_scheduling_parent, + ?candidate_hash, + "Peer has already advertised this collation", + ); + return Err(InsertAdvertisementError::Duplicate); } @@ -383,9 +396,27 @@ impl PeerData { .unwrap_or(0); if candidates.len() > max_ads { + gum::debug!( + target: LOG_TARGET, + ?on_scheduling_parent, + ?candidate_hash, + current_ads = candidates.len(), + max_ads, + "Peer has advertised too many collations for this scheduling parent", + ); + return Err(InsertAdvertisementError::PeerLimitReached); } + gum::debug!( + target: LOG_TARGET, + ?on_scheduling_parent, + ?candidate_hash, + current_ads = candidates.len(), + max_ads, + "Tracking new advertisement from peer", + ); + candidates.insert(candidate_hash); } else { if self.version != CollationVersion::V1 { @@ -1653,22 +1684,58 @@ where Sender: CollatorProtocolSenderTrait, { // Basic peer and protocol validation - let peer_data = state.peer_data.get_mut(&peer_id).ok_or(AdvertisementError::UnknownPeer)?; + let peer_data = state.peer_data.get_mut(&peer_id).ok_or_else(|| { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?prospective_candidate, + "Received advertisement from unknown peer", + ); + + AdvertisementError::UnknownPeer + })?; // V1 protocol requires relay_parent to be an active leaf (no async backing support) if peer_data.version == CollationVersion::V1 && !state.leaf_claim_queues.contains_key(&scheduling_parent) { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?prospective_candidate, + "V1 advertisement must use an active leaf as relay parent", + ); + return Err(AdvertisementError::ProtocolMisuse); } // Ensure peer has declared as a collator - peer_data.collating_para().ok_or(AdvertisementError::UndeclaredCollator)?; + peer_data.collating_para().ok_or_else(|| { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?prospective_candidate, + "Peer has not declared as collator", + ); - let per_scheduling_parent = state - .per_scheduling_parent - .get(&scheduling_parent) - .ok_or(AdvertisementError::SchedulingParentUnknown)?; + AdvertisementError::UndeclaredCollator + })?; + + let per_scheduling_parent = + state.per_scheduling_parent.get(&scheduling_parent).ok_or_else(|| { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?prospective_candidate, + "Scheduling parent is unknown", + ); + + AdvertisementError::SchedulingParentUnknown + })?; // Always insert advertisements that pass all the checks for spam protection. let candidate_hash = prospective_candidate.map(|(hash, ..)| hash); @@ -1680,7 +1747,18 @@ where &per_scheduling_parent, &state.leaf_claim_queues, ) - .map_err(AdvertisementError::Invalid)?; + .map_err(|error| { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?prospective_candidate, + ?error, + "Failed to insert advertisement", + ); + + AdvertisementError::Invalid(error) + })?; if hold_off_asset_hub_collation_if_needed( state, @@ -1691,6 +1769,14 @@ where scheduling_parent, // For V1/V2, the relay parent is the same as the scheduling parent None, // V1/V2 don't have advertised descriptor version ) { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?prospective_candidate, + "Collation held off, not processing advertisement further", + ); + return Ok(()); } @@ -1802,7 +1888,16 @@ where // Check if there's a free slot accounting for obsolete positions and capacity. // This happens AFTER hold-off logic (for AssetHub) has run, so held-off advertisements // can be queued even when capacity is temporarily full. - is_slot_available(&scheduling_parent, para_id, state)?; + is_slot_available(&scheduling_parent, para_id, state).inspect_err(|error| { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?para_id, + ?error, + "Slot is not available", + ); + })?; if let Some((candidate_hash, parent_head_data_hash)) = prospective_candidate { // Check if backing subsystem allows to second this candidate. @@ -1813,6 +1908,16 @@ where .await; if !can_second { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?scheduling_parent, + ?para_id, + ?candidate_hash, + ?parent_head_data_hash, + "Backing subsystem does not allow seconding this candidate", + ); + return Err(AdvertisementError::BlockedByBacking); } } From 0046806ff1097977a56704eb12aa6d20ddab48be Mon Sep 17 00:00:00 2001 From: "cmd[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 3 Apr 2026 08:38:37 +0000 Subject: [PATCH 2/4] Update from github-actions[bot] running command 'prdoc --audience node_dev --bump patch' --- prdoc/pr_11629.prdoc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 prdoc/pr_11629.prdoc diff --git a/prdoc/pr_11629.prdoc b/prdoc/pr_11629.prdoc new file mode 100644 index 0000000000000..56a752b6c86b0 --- /dev/null +++ b/prdoc/pr_11629.prdoc @@ -0,0 +1,20 @@ +title: 'collator-protocol: Enhance visibility into failed to fetch collations' +doc: +- audience: Node Dev + description: |- + This PR enhances the observability of our logs when validators refuse to fetch advertised collations. + + The validator keeps track of advertised collations from collators. + Then, the validator checks if it has sufficient slots (`is_slot_available`) and if the collation can be seconded (`can_second`). + If either of the mentioned functions fails, the validator silently ignores the collation. + + This PR aims to surface this behavior and help investigate further issues. + + This PR is already deployed on validators and has surfaced: + - https://github.com/paritytech/polkadot-sdk/issues/11625 + + Part of: + - https://github.com/paritytech/polkadot-sdk/issues/11377 +crates: +- name: polkadot-collator-protocol + bump: patch From 5470b561236a09934e1183a9054e418fe507bb80 Mon Sep 17 00:00:00 2001 From: Alexandru Vasile Date: Mon, 6 Apr 2026 15:36:29 +0000 Subject: [PATCH 3/4] Apply feedback Signed-off-by: Alexandru Vasile --- .../src/validator_side/mod.rs | 62 ++----------------- 1 file changed, 6 insertions(+), 56 deletions(-) diff --git a/polkadot/node/network/collator-protocol/src/validator_side/mod.rs b/polkadot/node/network/collator-protocol/src/validator_side/mod.rs index d4b42d985ec18..0dee964146290 100644 --- a/polkadot/node/network/collator-protocol/src/validator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/validator_side/mod.rs @@ -359,12 +359,6 @@ impl PeerData { implicit_view, leaf_claim_queues.keys(), ) { - gum::debug!( - target: LOG_TARGET, - ?on_scheduling_parent, - "Peer advertised a collation for a relay parent that is out of our view", - ); - return Err(InsertAdvertisementError::OutOfOurView); } @@ -374,13 +368,6 @@ impl PeerData { .get(&on_scheduling_parent) .map_or(false, |candidates| candidates.contains(&candidate_hash)) { - gum::debug!( - target: LOG_TARGET, - ?on_scheduling_parent, - ?candidate_hash, - "Peer has already advertised this collation", - ); - return Err(InsertAdvertisementError::Duplicate); } @@ -396,15 +383,6 @@ impl PeerData { .unwrap_or(0); if candidates.len() > max_ads { - gum::debug!( - target: LOG_TARGET, - ?on_scheduling_parent, - ?candidate_hash, - current_ads = candidates.len(), - max_ads, - "Peer has advertised too many collations for this scheduling parent", - ); - return Err(InsertAdvertisementError::PeerLimitReached); } @@ -1684,17 +1662,7 @@ where Sender: CollatorProtocolSenderTrait, { // Basic peer and protocol validation - let peer_data = state.peer_data.get_mut(&peer_id).ok_or_else(|| { - gum::debug!( - target: LOG_TARGET, - ?peer_id, - ?scheduling_parent, - ?prospective_candidate, - "Received advertisement from unknown peer", - ); - - AdvertisementError::UnknownPeer - })?; + let peer_data = state.peer_data.get_mut(&peer_id).ok_or(AdvertisementError::UnknownPeer)?; // V1 protocol requires relay_parent to be an active leaf (no async backing support) if peer_data.version == CollationVersion::V1 && @@ -1712,30 +1680,12 @@ where } // Ensure peer has declared as a collator - peer_data.collating_para().ok_or_else(|| { - gum::debug!( - target: LOG_TARGET, - ?peer_id, - ?scheduling_parent, - ?prospective_candidate, - "Peer has not declared as collator", - ); - - AdvertisementError::UndeclaredCollator - })?; - - let per_scheduling_parent = - state.per_scheduling_parent.get(&scheduling_parent).ok_or_else(|| { - gum::debug!( - target: LOG_TARGET, - ?peer_id, - ?scheduling_parent, - ?prospective_candidate, - "Scheduling parent is unknown", - ); + peer_data.collating_para().ok_or(AdvertisementError::UndeclaredCollator)?; - AdvertisementError::SchedulingParentUnknown - })?; + let per_scheduling_parent = state + .per_scheduling_parent + .get(&scheduling_parent) + .ok_or(AdvertisementError::SchedulingParentUnknown)?; // Always insert advertisements that pass all the checks for spam protection. let candidate_hash = prospective_candidate.map(|(hash, ..)| hash); From 2cf7c22e92dc663bceaec410f9b281448f640487 Mon Sep 17 00:00:00 2001 From: Alexandru Vasile <60601340+lexnv@users.noreply.github.com> Date: Tue, 7 Apr 2026 14:46:35 +0300 Subject: [PATCH 4/4] Update polkadot/node/network/collator-protocol/src/validator_side/mod.rs Co-authored-by: Tsvetomir Dimitrov --- .../node/network/collator-protocol/src/validator_side/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/network/collator-protocol/src/validator_side/mod.rs b/polkadot/node/network/collator-protocol/src/validator_side/mod.rs index 0dee964146290..a205998e7592a 100644 --- a/polkadot/node/network/collator-protocol/src/validator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/validator_side/mod.rs @@ -1724,7 +1724,7 @@ where ?peer_id, ?scheduling_parent, ?prospective_candidate, - "Collation held off, not processing advertisement further", + "Collation held off, not processing advertisement further for now", ); return Ok(());