Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
490b627
Penalize if invalid EL block
pawanjay176 Aug 13, 2025
836f9c6
Priorotize status v2
pawanjay176 Aug 13, 2025
156449c
Increase columns_by_root quota
pawanjay176 Aug 14, 2025
6bd8944
Reduce backfill buffer size
pawanjay176 Aug 15, 2025
9455153
Without retries
pawanjay176 Aug 18, 2025
5337e46
Add a function to retry column requests that could not be made
pawanjay176 Aug 19, 2025
ca9cfd5
Small fixes
pawanjay176 Aug 19, 2025
68cce37
Try to avoid chains failing for rpc errors
pawanjay176 Aug 20, 2025
6da924b
Fix bug in initialization code
pawanjay176 Aug 20, 2025
1a0df30
Also penalize all batch peers for availability check errors
pawanjay176 Aug 20, 2025
17c4e34
Avoid root requests for backfill sync
pawanjay176 Aug 20, 2025
fdce537
Implement responsible peer tracking
pawanjay176 Aug 21, 2025
4540195
Request columns from global peer pool
pawanjay176 Aug 14, 2025
521778b
Random logs
pawanjay176 Aug 21, 2025
da27441
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Aug 21, 2025
52762b9
Handle 0 blobs per epoch case
pawanjay176 Aug 22, 2025
7c214f5
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Aug 25, 2025
90d319f
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Aug 26, 2025
27d0b36
Remove debug statements
pawanjay176 Aug 26, 2025
a97cf88
Add docs
pawanjay176 Aug 27, 2025
05adb71
Fix bug with partial column responses before all column requests sent
pawanjay176 Aug 27, 2025
b4bc7fe
Remove more debug logs
pawanjay176 Aug 27, 2025
8386bd9
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Aug 28, 2025
7331323
AwaitingValidation state only needs block peer
pawanjay176 Aug 28, 2025
da1aaba
Revise error tolerance
pawanjay176 Aug 28, 2025
8e1337d
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Aug 29, 2025
19b0a5c
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Aug 29, 2025
b07bc6d
Force requests if batch buffer is full under certain conditions
pawanjay176 Aug 29, 2025
4f60e86
Add logs to debug stuck range sync
pawanjay176 Aug 31, 2025
7a6d0d9
Force processing_target request
pawanjay176 Sep 1, 2025
8458df6
Attempt sending awaitingDownload batches when restarting sync
pawanjay176 Sep 1, 2025
29c2f83
Cleanup SyncingChain
pawanjay176 Sep 2, 2025
7e91eeb
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Sep 5, 2025
e0d8f04
Tests compile
pawanjay176 Sep 5, 2025
6a2a33d
Fix some issues from review
pawanjay176 Sep 5, 2025
e259ecd
More renamings
pawanjay176 Sep 5, 2025
4f62a9c
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Sep 5, 2025
04398ad
Fix some more issues from review
pawanjay176 Sep 8, 2025
bf09d57
Fix some issues from lion's review
pawanjay176 Sep 11, 2025
cffbd34
Reduce code duplication
pawanjay176 Sep 12, 2025
08bba3f
fmt
pawanjay176 Sep 12, 2025
9db4c30
Fix small bug
pawanjay176 Sep 16, 2025
baee27a
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Sep 16, 2025
e3aed89
Remove retry test that we do not use anymore
pawanjay176 Sep 17, 2025
b3b3756
Fix tests
pawanjay176 Sep 17, 2025
2f35c36
Add some metrics
pawanjay176 Sep 17, 2025
4a59d35
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Sep 17, 2025
27195ca
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Sep 24, 2025
aa6a1bc
Create a custom penalize_sync_peer method for clarity
pawanjay176 Sep 24, 2025
4b0b655
Fix nits
pawanjay176 Sep 24, 2025
7650032
Rename DataColumnsFromRange
dapplion Sep 25, 2025
7488755
De-duplicate data columns by root request type
dapplion Sep 25, 2025
c2aa4ae
Revert type change in UnexpectedRequestId
dapplion Sep 25, 2025
cf46d10
Fix issues from review
pawanjay176 Sep 25, 2025
d99df0a
Only send data coumn subnet discovery requests after peerdas is sched…
jimmygchen Sep 25, 2025
3f8998f
Only mark block lookups as pending if block is importing from gossip …
dapplion Sep 25, 2025
421e954
Revert "Revert type change in UnexpectedRequestId"
pawanjay176 Sep 25, 2025
826a06e
Fix variant name
pawanjay176 Sep 25, 2025
c491856
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Sep 25, 2025
15df3d2
Merge branch 'unstable' into blocks-then-columns
pawanjay176 Oct 2, 2025
5c562c6
Fix some more issues
pawanjay176 Oct 6, 2025
9b2de09
Rethink peer scoring
pawanjay176 Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion beacon_node/beacon_chain/src/block_verification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ pub enum ExecutionPayloadError {
}

impl ExecutionPayloadError {
pub fn penalize_peer(&self) -> bool {
pub fn penalize_gossip_peer(&self) -> bool {
// This match statement should never have a default case so that we are
// always forced to consider here whether or not to penalize a peer when
// we add a new error condition.
Expand Down Expand Up @@ -447,6 +447,39 @@ impl ExecutionPayloadError {
ExecutionPayloadError::UnverifiedNonOptimisticCandidate => false,
}
}

pub fn penalize_sync_peer(&self) -> bool {
// This match statement should never have a default case so that we are
// always forced to consider here whether or not to penalize a peer when
// we add a new error condition.
match self {
// The peer has nothing to do with this error, do not penalize them.
ExecutionPayloadError::NoExecutionConnection => false,
// The peer has nothing to do with this error, do not penalize them.
ExecutionPayloadError::RequestFailed(_) => false,
// For the sync case, we do not want a peer to keep sending us blocks that our
// execution engine considers invalid.
//
// Also, we ask peers for blocks over sync/rpc only when they indicate
// that they have fully validated a given block (using their status message).
//
// Hence, we should penalize for this error in the sync case.
ExecutionPayloadError::RejectedByExecutionEngine { .. } => true,
// There is no reason for an honest peer to propagate a block with an invalid
// payload time stamp.
ExecutionPayloadError::InvalidPayloadTimestamp { .. } => true,
// We do not want to receive these blocks over rpc even though the gossip
// case is still allowed.
ExecutionPayloadError::InvalidTerminalPoWBlock { .. } => true,
// We should penalize RPC blocks, since even an optimistic node shouldn't
// verify this block.
ExecutionPayloadError::InvalidActivationEpoch { .. } => true,
// As per `Self::InvalidActivationEpoch`.
ExecutionPayloadError::InvalidTerminalBlockHash { .. } => true,
// Do not penalize the peer since it's not their fault that *we're* optimistic.
ExecutionPayloadError::UnverifiedNonOptimisticCandidate => false,
}
}
}

impl From<execution_layer::Error> for ExecutionPayloadError {
Expand Down
30 changes: 30 additions & 0 deletions beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,36 @@ impl<E: EthSpec> PeerDB<E> {
.map(|(peer_id, _)| peer_id)
}

/// Returns an iterator of all good gossipsub peers that are supposed to be custodying
/// the given subnet id and have the epoch according to their status messages.
pub fn good_custody_subnet_peer_range_sync(
&self,
subnet: DataColumnSubnetId,
epoch: Epoch,
) -> impl Iterator<Item = &PeerId> {
self.peers
.iter()
.filter(move |(_, info)| {
// The custody_subnets hashset can be populated via enr or metadata
let is_custody_subnet_peer = info.is_assigned_to_custody_subnet(&subnet);

info.is_connected()
&& is_custody_subnet_peer
&& match info.sync_status() {
SyncStatus::Synced { info } => {
info.has_slot(epoch.end_slot(E::slots_per_epoch()))
}
SyncStatus::Advanced { info } => {
info.has_slot(epoch.end_slot(E::slots_per_epoch()))
}
SyncStatus::IrrelevantPeer
| SyncStatus::Behind { .. }
| SyncStatus::Unknown => false,
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be worth extracting the duplicate logic we have in has_good_custody_range_sync_peer into a function in PeerInfo so we avoid the danger of these two functions going out of sync? maybe we can even just put it in has_slot()

            info.is_connected()
                && match info.sync_status() {
                    SyncStatus::Synced { info } | SyncStatus::Advanced { info } => {
                        info.has_slot(epoch.end_slot(E::slots_per_epoch()))
                    }
                    SyncStatus::IrrelevantPeer
                    | SyncStatus::Behind { .. }
                    | SyncStatus::Unknown => false,
                }

I think we may also be able to request backfill batches from a peer that is Behind if it has the slot? we'll have to add check against head_slot into the function though

})
.map(|(peer_id, _)| peer_id)
}

/// Checks if there is at least one good peer for each specified custody subnet for the given epoch.
/// A "good" peer is one that is both connected and synced (or advanced) for the specified epoch.
pub fn has_good_custody_range_sync_peer(
Expand Down
12 changes: 12 additions & 0 deletions beacon_node/lighthouse_network/src/rpc/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,18 @@ impl<E: EthSpec> DataColumnsByRootRequest<E> {
Ok(Self { data_column_ids })
}

pub fn from_single_block(block_root: Hash256, indices: Vec<u64>) -> Result<Self, &'static str> {
let columns = VariableList::new(indices)
.map_err(|_| "Number of indices exceeds total number of columns")?;
DataColumnsByRootRequest::new(
vec![DataColumnsByRootIdentifier {
block_root,
columns,
}],
1,
)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unused and can be removed?


pub fn max_requested(&self) -> usize {
self.data_column_ids.iter().map(|id| id.columns.len()).sum()
}
Expand Down
42 changes: 42 additions & 0 deletions beacon_node/lighthouse_network/src/service/api_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ use types::{

pub type Id = u32;

#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
pub enum RangeRequestType {
ForwardSync,
BackfillSync,
}

#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
pub struct SingleLookupReqId {
pub lookup_id: Id,
Expand Down Expand Up @@ -38,6 +44,7 @@ pub enum SyncRequestId {
pub struct DataColumnsByRootRequestId {
pub id: Id,
pub requester: DataColumnsByRootRequester,
pub peer: PeerId,
}

#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
Expand All @@ -46,6 +53,20 @@ pub struct BlocksByRangeRequestId {
pub id: Id,
/// The Id of the overall By Range request for block components.
pub parent_request_id: ComponentsByRangeRequestId,
/// The peer that we made this request to
pub peer_id: PeerId,
}

impl BlocksByRangeRequestId {
pub fn batch_id(&self) -> Epoch {
match self.parent_request_id.requester {
RangeRequestId::BackfillSync { batch_id } => batch_id,
RangeRequestId::RangeSync {
chain_id: _,
batch_id,
} => batch_id,
}
}
}

#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
Expand Down Expand Up @@ -86,12 +107,31 @@ pub enum RangeRequestId {
RangeSync { chain_id: Id, batch_id: Epoch },
BackfillSync { batch_id: Epoch },
}
impl RangeRequestId {
pub fn batch_id(&self) -> Epoch {
match &self {
RangeRequestId::BackfillSync { batch_id } => *batch_id,
RangeRequestId::RangeSync {
chain_id: _,
batch_id,
} => *batch_id,
}
}

pub fn batch_type(&self) -> RangeRequestType {
match &self {
RangeRequestId::BackfillSync { .. } => RangeRequestType::BackfillSync,
RangeRequestId::RangeSync { .. } => RangeRequestType::ForwardSync,
}
}
}

// TODO(das) refactor in a separate PR. We might be able to remove this and replace
// [`DataColumnsByRootRequestId`] with a [`SingleLookupReqId`].
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
pub enum DataColumnsByRootRequester {
Custody(CustodyId),
RangeSync { parent: ComponentsByRangeRequestId },
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TODO comment above is no longer relevant here

}

#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
Expand Down Expand Up @@ -222,6 +262,7 @@ impl Display for DataColumnsByRootRequester {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Custody(id) => write!(f, "Custody/{id}"),
Self::RangeSync { parent } => write!(f, "Range/{parent}"),
}
}
}
Expand Down Expand Up @@ -255,6 +296,7 @@ mod tests {
lookup_id: 101,
}),
}),
peer: PeerId::random(),
};
assert_eq!(format!("{id}"), "123/Custody/121/Lookup/101");
}
Expand Down
6 changes: 6 additions & 0 deletions beacon_node/network/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,12 @@ pub static SYNC_ACTIVE_NETWORK_REQUESTS: LazyLock<Result<IntGaugeVec>> = LazyLoc
&["type"],
)
});
pub static SYNC_PENDING_ROOT_RANGE_REQUESTS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge(
"sync_pending_root_range_requests",
"Current count of pending columns by root requests waiting for peers",
)
});
pub static SYNC_UNKNOWN_NETWORK_REQUESTS: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
try_create_int_counter_vec(
"sync_unknwon_network_request",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1330,7 +1330,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
return None;
}
Err(ref e @ BlockError::ExecutionPayloadError(ref epe)) if !epe.penalize_peer() => {
Err(ref e @ BlockError::ExecutionPayloadError(ref epe))
if !epe.penalize_gossip_peer() =>
{
debug!(error = %e, "Could not verify block for gossip. Ignoring the block");
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
return None;
Expand Down Expand Up @@ -1562,7 +1564,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"Block with unknown parent attempted to be processed"
);
}
Err(e @ BlockError::ExecutionPayloadError(epe)) if !epe.penalize_peer() => {
Err(e @ BlockError::ExecutionPayloadError(epe)) if !epe.penalize_gossip_peer() => {
debug!(
error = %e,
"Failed to verify execution payload"
Expand Down
Loading
Loading