Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions polkadot/node/network/collator-protocol/src/validator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,15 @@ impl PeerData {
return Err(InsertAdvertisementError::PeerLimitReached);
}

gum::debug!(
target: LOG_TARGET,
?on_scheduling_parent,
?candidate_hash,
current_ads = candidates.len(),
max_ads,
"Tracking new advertisement from peer",
);

candidates.insert(candidate_hash);
} else {
if self.version != CollationVersion::V1 {
Expand Down Expand Up @@ -1659,6 +1668,14 @@ where
if peer_data.version == CollationVersion::V1 &&
!state.leaf_claim_queues.contains_key(&scheduling_parent)
{
gum::debug!(
target: LOG_TARGET,
?peer_id,
?scheduling_parent,
?prospective_candidate,
"V1 advertisement must use an active leaf as relay parent",
);

return Err(AdvertisementError::ProtocolMisuse);
}

Expand All @@ -1680,7 +1697,18 @@ where
&per_scheduling_parent,
&state.leaf_claim_queues,
)
.map_err(AdvertisementError::Invalid)?;
.map_err(|error| {
gum::debug!(
target: LOG_TARGET,
?peer_id,
?scheduling_parent,
?prospective_candidate,
?error,
"Failed to insert advertisement",
);

AdvertisementError::Invalid(error)
})?;

if hold_off_asset_hub_collation_if_needed(
state,
Expand All @@ -1691,6 +1719,14 @@ where
scheduling_parent, // For V1/V2, the relay parent is the same as the scheduling parent
None, // V1/V2 don't have advertised descriptor version
) {
gum::debug!(
target: LOG_TARGET,
?peer_id,
?scheduling_parent,
?prospective_candidate,
"Collation held off, not processing advertisement further for now",
);

return Ok(());
}

Expand Down Expand Up @@ -1802,7 +1838,16 @@ where
// Check if there's a free slot accounting for obsolete positions and capacity.
// This happens AFTER hold-off logic (for AssetHub) has run, so held-off advertisements
// can be queued even when capacity is temporarily full.
is_slot_available(&scheduling_parent, para_id, state)?;
is_slot_available(&scheduling_parent, para_id, state).inspect_err(|error| {
gum::debug!(
target: LOG_TARGET,
?peer_id,
?scheduling_parent,
?para_id,
?error,
"Slot is not available",
);
})?;

if let Some((candidate_hash, parent_head_data_hash)) = prospective_candidate {
// Check if backing subsystem allows to second this candidate.
Expand All @@ -1813,6 +1858,16 @@ where
.await;

if !can_second {
gum::debug!(
target: LOG_TARGET,
?peer_id,
?scheduling_parent,
?para_id,
?candidate_hash,
?parent_head_data_hash,
"Backing subsystem does not allow seconding this candidate",
);

return Err(AdvertisementError::BlockedByBacking);
}
}
Expand Down
20 changes: 20 additions & 0 deletions prdoc/pr_11629.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
title: 'collator-protocol: Enhance visibility into failed to fetch collations'
doc:
- audience: Node Dev
description: |-
This PR enhances the observability of our logs when validators refuse to fetch advertised collations.

The validator keeps track of advertised collations from collators.
Then, the validator checks if it has sufficient slots (`is_slot_available`) and if the collation can be seconded (`can_second`).
If either of the mentioned functions fails, the validator silently ignores the collation.

This PR aims to surface this behavior and help investigate further issues.

This PR is already deployed on validators and has surfaced:
- https://github.com/paritytech/polkadot-sdk/issues/11625

Part of:
- https://github.com/paritytech/polkadot-sdk/issues/11377
crates:
- name: polkadot-collator-protocol
bump: patch
Loading