From 490b627074fbd60ac27ace62b38089ac61d4f764 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 13 Aug 2025 16:36:56 -0700
Subject: [PATCH 01/49] Penalize if invalid EL block

---
 .../lighthouse_network/src/service/mod.rs       |  2 +-
 .../src/network_beacon_processor/rpc_methods.rs |  2 +-
 .../network_beacon_processor/sync_methods.rs    | 17 +++++++++++++++--
 3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs
index 54c01ae16a5..bf233a3ec31 100644
--- a/beacon_node/lighthouse_network/src/service/mod.rs
+++ b/beacon_node/lighthouse_network/src/service/mod.rs
@@ -1911,7 +1911,7 @@ impl<E: EthSpec> Network<E> {
                         }
                     },
                 };
-                debug!(our_addr = %local_addr, from = %send_back_addr, error = error_repr, "Failed incoming connection");
+                tracing::trace!(our_addr = %local_addr, from = %send_back_addr, error = error_repr, "Failed incoming connection");
                 None
             }
             SwarmEvent::OutgoingConnectionError {
diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
index 117377c9245..e38fa6f842c 100644
--- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
@@ -212,7 +212,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     send_block_count += 1;
                 }
                 Ok(None) => {
-                    debug!(
+                    tracing::trace!(
                         %peer_id,
                         request_root = ?root,
                         "Peer requested unknown block"
diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
index 306a184627e..9967f9c5e2b 100644
--- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
@@ -10,7 +10,8 @@ use beacon_chain::data_availability_checker::AvailabilityCheckError;
 use beacon_chain::data_availability_checker::MaybeAvailableBlock;
 use beacon_chain::{
     AvailabilityProcessingStatus, BeaconChainTypes, BlockError, ChainSegmentResult,
-    HistoricalBlockError, NotifyExecutionLayer, validator_monitor::get_slot_delay_ms,
+    ExecutionPayloadError, HistoricalBlockError, NotifyExecutionLayer,
+    validator_monitor::get_slot_delay_ms,
 };
 use beacon_processor::{
     AsyncFn, BlockingFn, DuplicateCache,
@@ -774,7 +775,19 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 })
             }
             ref err @ BlockError::ExecutionPayloadError(ref epe) => {
-                if !epe.penalize_peer() {
+                if matches!(epe, ExecutionPayloadError::RejectedByExecutionEngine { .. }) {
+                    debug!(
+                        error = ?err,
+                        "Invalid execution payload rejected by EE"
+                    );
+                    Err(ChainSegmentFailed {
+                        message: format!(
+                            "Peer sent a block containing invalid execution payload. Reason: {:?}",
+                            err
+                        ),
+                        peer_action: Some(PeerAction::LowToleranceError),
+                    })
+                } else if !epe.penalize_peer() {
                     // These errors indicate an issue with the EL and not the `ChainSegment`.
                     // Pause the syncing while the EL recovers
                     debug!(

From 836f9c6c1979a9f5bb4f5efd7674e4e74a314094 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 13 Aug 2025 16:37:12 -0700
Subject: [PATCH 02/49] Priorotize status v2

---
 .../src/peer_manager/mod.rs                   | 50 ++++++++++++++++++-
 .../lighthouse_network/src/rpc/protocol.rs    |  2 +-
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs
index 13367a3e997..d0f3fd1c7c3 100644
--- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs
+++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs
@@ -727,7 +727,7 @@ impl<E: EthSpec> PeerManager<E> {
                 }
             } else {
                 // we have no meta-data for this peer, update
-                debug!(%peer_id, new_seq_no = meta_data.seq_number(), "Obtained peer's metadata");
+                debug!(%peer_id, new_seq_no = meta_data.seq_number(), cgc=?meta_data.custody_group_count().ok(), "Obtained peer's metadata");
             }
 
             let known_custody_group_count = peer_info
@@ -743,7 +743,7 @@ impl<E: EthSpec> PeerManager<E> {
                 if let Some(custody_group_count) = custody_group_count_opt {
                     match self.compute_peer_custody_groups(peer_id, custody_group_count) {
                         Ok(custody_groups) => {
-                            let custody_subnets = custody_groups
+                            let custody_subnets: HashSet<DataColumnSubnetId> = custody_groups
                                 .into_iter()
                                 .flat_map(|custody_index| {
                                     self.subnets_by_custody_group
@@ -759,6 +759,13 @@ impl<E: EthSpec> PeerManager<E> {
                                         })
                                 })
                                 .collect();
+                            let cgc = if custody_subnets.len() == 128 {
+                                "supernode".to_string()
+                            } else {
+                                format!("{:?}", custody_subnets)
+                            };
+
+                            debug!(cgc, ?peer_id, "Peer custodied subnets");
                             peer_info.set_custody_subnets(custody_subnets);
 
                             updated_cgc = Some(custody_group_count) != known_custody_group_count;
@@ -947,6 +954,42 @@ impl<E: EthSpec> PeerManager<E> {
         }
     }
 
+    /// Run discovery query for additional custody peers if we fall below `TARGET_PEERS`.
+    fn maintain_custody_peers(&mut self) {
+        let subnets_to_discover: Vec<SubnetDiscovery> = self
+            .network_globals
+            .sampling_subnets()
+            .iter()
+            .filter_map(|custody_subnet| {
+                if self
+                    .network_globals
+                    .peers
+                    .read()
+                    .good_range_sync_custody_subnet_peers(*custody_subnet)
+                    .count()
+                    < 2
+                {
+                    Some(SubnetDiscovery {
+                        subnet: Subnet::DataColumn(*custody_subnet),
+                        min_ttl: None,
+                    })
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        // request the subnet query from discovery
+        if !subnets_to_discover.is_empty() {
+            debug!(
+                subnets = ?subnets_to_discover.iter().map(|s| s.subnet).collect::<Vec<_>>(),
+                "Making subnet queries for maintaining custody peers"
+            );
+            self.events
+                .push(PeerManagerEvent::DiscoverSubnetPeers(subnets_to_discover));
+        }
+    }
+
     fn maintain_trusted_peers(&mut self) {
         let trusted_peers = self.trusted_peers.clone();
         for trusted_peer in trusted_peers {
@@ -1269,6 +1312,9 @@ impl<E: EthSpec> PeerManager<E> {
         // Update peer score metrics;
         self.update_peer_score_metrics();
 
+        // Maintain minimum count for custody peers.
+        self.maintain_custody_peers();
+
         // Maintain minimum count for sync committee peers.
         self.maintain_sync_committee_peers();
 
diff --git a/beacon_node/lighthouse_network/src/rpc/protocol.rs b/beacon_node/lighthouse_network/src/rpc/protocol.rs
index 388dbe63ef0..52dae3af351 100644
--- a/beacon_node/lighthouse_network/src/rpc/protocol.rs
+++ b/beacon_node/lighthouse_network/src/rpc/protocol.rs
@@ -825,8 +825,8 @@ impl<E: EthSpec> RequestType<E> {
         match self {
             // add more protocols when versions/encodings are supported
             RequestType::Status(_) => vec![
-                ProtocolId::new(SupportedProtocol::StatusV1, Encoding::SSZSnappy),
                 ProtocolId::new(SupportedProtocol::StatusV2, Encoding::SSZSnappy),
+                ProtocolId::new(SupportedProtocol::StatusV1, Encoding::SSZSnappy),
             ],
             RequestType::Goodbye(_) => vec![ProtocolId::new(
                 SupportedProtocol::GoodbyeV1,

From 156449ca265a66b44d65f1862e53de4ed228e2c9 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 14 Aug 2025 09:10:28 -0700
Subject: [PATCH 03/49] Increase columns_by_root quota

---
 beacon_node/lighthouse_network/src/rpc/config.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/rpc/config.rs b/beacon_node/lighthouse_network/src/rpc/config.rs
index ef9347a1197..cdfb06284e3 100644
--- a/beacon_node/lighthouse_network/src/rpc/config.rs
+++ b/beacon_node/lighthouse_network/src/rpc/config.rs
@@ -120,10 +120,10 @@ impl RateLimiterConfig {
     // Range sync load balances when requesting blocks, and each batch is 32 blocks.
     pub const DEFAULT_DATA_COLUMNS_BY_RANGE_QUOTA: Quota =
         Quota::n_every(NonZeroU64::new(5120).unwrap(), 10);
-    // 512 columns per request from spec. This should be plenty as peers are unlikely to send all
-    // sampling requests to a single peer.
+    // 128 columns * 32 blocks in an epoch. Many clients try to request an entire batch by
+    // root instead of by range, so we should account for honest behaviour.
     pub const DEFAULT_DATA_COLUMNS_BY_ROOT_QUOTA: Quota =
-        Quota::n_every(NonZeroU64::new(512).unwrap(), 10);
+        Quota::n_every(NonZeroU64::new(4096).unwrap(), 10);
     pub const DEFAULT_LIGHT_CLIENT_BOOTSTRAP_QUOTA: Quota = Quota::one_every(10);
     pub const DEFAULT_LIGHT_CLIENT_OPTIMISTIC_UPDATE_QUOTA: Quota = Quota::one_every(10);
     pub const DEFAULT_LIGHT_CLIENT_FINALITY_UPDATE_QUOTA: Quota = Quota::one_every(10);

From 6bd8944cef7dfeb6850742a85c8de5c82d659e27 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Fri, 15 Aug 2025 14:23:48 -0700
Subject: [PATCH 04/49] Reduce backfill buffer size

---
 beacon_node/network/src/sync/backfill_sync/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index ae9ac2e7705..dc70b08aec5 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -40,7 +40,7 @@ use types::{ColumnIndex, Epoch, EthSpec};
 pub const BACKFILL_EPOCHS_PER_BATCH: u64 = 1;
 
 /// The maximum number of batches to queue before requesting more.
-const BACKFILL_BATCH_BUFFER_SIZE: u8 = 20;
+const BACKFILL_BATCH_BUFFER_SIZE: u8 = 3;
 
 /// The number of times to retry a batch before it is considered failed.
 const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 10;

From 9455153542874d18ed1011a8f8b9037777ba79de Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Mon, 18 Aug 2025 14:48:44 -0700
Subject: [PATCH 05/49] Without retries

---
 .../src/peer_manager/peerdb.rs                |  28 ++
 .../src/service/api_types.rs                  |  15 ++
 .../src/sync/block_sidecar_coupling.rs        | 142 ++++++++++
 beacon_node/network/src/sync/manager.rs       |  25 +-
 .../network/src/sync/network_context.rs       | 250 +++++++++++++++++-
 .../src/sync/network_context/requests.rs      |   3 +-
 .../requests/data_columns_by_root.rs          |  95 ++++++-
 .../network/src/sync/range_sync/chain.rs      |   2 +-
 8 files changed, 545 insertions(+), 15 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
index 430ad2f6dae..1fa11357dc0 100644
--- a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
+++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
@@ -323,6 +323,34 @@ impl<E: EthSpec> PeerDB<E> {
             .map(|(peer_id, _)| peer_id)
     }
 
+    pub fn good_custody_subnet_peer_range_sync(
+        &self,
+        subnet: DataColumnSubnetId,
+        epoch: Epoch,
+    ) -> impl Iterator<Item = &PeerId> {
+        self.peers
+            .iter()
+            .filter(move |(_, info)| {
+                // The custody_subnets hashset can be populated via enr or metadata
+                let is_custody_subnet_peer = info.is_assigned_to_custody_subnet(&subnet);
+
+                info.is_connected()
+                    && is_custody_subnet_peer
+                    && match info.sync_status() {
+                        SyncStatus::Synced { info } => {
+                            info.has_slot(epoch.end_slot(E::slots_per_epoch()))
+                        }
+                        SyncStatus::Advanced { info } => {
+                            info.has_slot(epoch.end_slot(E::slots_per_epoch()))
+                        }
+                        SyncStatus::IrrelevantPeer
+                        | SyncStatus::Behind { .. }
+                        | SyncStatus::Unknown => false,
+                    }
+            })
+            .map(|(peer_id, _)| peer_id)
+    }
+
     /// Returns an iterator of all peers that are supposed to be custodying
     /// the given subnet id.
     pub fn good_range_sync_custody_subnet_peers(
diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs
index 0f5fd99c279..41119029904 100644
--- a/beacon_node/lighthouse_network/src/service/api_types.rs
+++ b/beacon_node/lighthouse_network/src/service/api_types.rs
@@ -38,6 +38,7 @@ pub enum SyncRequestId {
 pub struct DataColumnsByRootRequestId {
     pub id: Id,
     pub requester: DataColumnsByRootRequester,
+    pub peer: PeerId,
 }
 
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
@@ -48,6 +49,18 @@ pub struct BlocksByRangeRequestId {
     pub parent_request_id: ComponentsByRangeRequestId,
 }
 
+impl BlocksByRangeRequestId {
+    pub fn batch_id(&self) -> Epoch {
+        match self.parent_request_id.requester {
+            RangeRequestId::BackfillSync { batch_id } => batch_id,
+            RangeRequestId::RangeSync {
+                chain_id: _,
+                batch_id,
+            } => batch_id,
+        }
+    }
+}
+
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
 pub struct BlobsByRangeRequestId {
     /// Id to identify this attempt at a blobs_by_range request for `parent_request_id`
@@ -92,6 +105,7 @@ pub enum RangeRequestId {
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
 pub enum DataColumnsByRootRequester {
     Custody(CustodyId),
+    RangeSync { parent: ComponentsByRangeRequestId },
 }
 
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
@@ -222,6 +236,7 @@ impl Display for DataColumnsByRootRequester {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::Custody(id) => write!(f, "Custody/{id}"),
+            Self::RangeSync { parent } => write!(f, "Range/{parent}"),
         }
     }
 }
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 605da3b4bda..b807c2a0b21 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -5,6 +5,7 @@ use lighthouse_network::{
     PeerAction, PeerId,
     service::api_types::{
         BlobsByRangeRequestId, BlocksByRangeRequestId, DataColumnsByRangeRequestId,
+        DataColumnsByRootRequestId,
     },
 };
 use std::{collections::HashMap, sync::Arc};
@@ -51,6 +52,17 @@ enum RangeBlockDataRequest<E: EthSpec> {
         expected_custody_columns: Vec<ColumnIndex>,
         attempt: usize,
     },
+    DataColumnsFromRoot {
+        requests: HashMap<
+            DataColumnsByRootRequestId,
+            ByRangeRequest<DataColumnsByRootRequestId, DataColumnSidecarList<E>>,
+        >,
+        init: bool,
+        /// The column indices corresponding to the request
+        column_peers: HashMap<DataColumnsByRootRequestId, Vec<ColumnIndex>>,
+        expected_custody_columns: Vec<ColumnIndex>,
+        attempt: usize,
+    },
 }
 
 #[derive(Debug)]
@@ -81,6 +93,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             Vec<(DataColumnsByRangeRequestId, Vec<ColumnIndex>)>,
             Vec<ColumnIndex>,
         )>,
+        data_columns_from_root: bool,
     ) -> Self {
         let block_data_request = if let Some(blobs_req_id) = blobs_req_id {
             RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
@@ -95,6 +108,14 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 expected_custody_columns,
                 attempt: 0,
             }
+        } else if data_columns_from_root {
+            RangeBlockDataRequest::DataColumnsFromRoot {
+                requests: HashMap::new(),
+                init: false,
+                attempt: 0,
+                column_peers: HashMap::new(),
+                expected_custody_columns: Vec::new(),
+            }
         } else {
             RangeBlockDataRequest::NoData
         };
@@ -128,6 +149,35 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
     }
 
+    /// `column_requests`: each element represents a request id and the columns requested under that request.
+    pub fn insert_column_request_after_block_request(
+        &mut self,
+        column_requests: Vec<(DataColumnsByRootRequestId, Vec<ColumnIndex>)>,
+        custody_columns: &[ColumnIndex],
+    ) -> Result<(), String> {
+        match &mut self.block_data_request {
+            RangeBlockDataRequest::DataColumnsFromRoot {
+                init,
+                requests,
+                attempt: _,
+                column_peers,
+                expected_custody_columns,
+            } => {
+                *init = true;
+                for (request, peers) in column_requests {
+                    requests.insert(request, ByRangeRequest::Active(request));
+                    column_peers.insert(request, peers);
+                }
+                for column in custody_columns {
+                    expected_custody_columns.push(*column);
+                }
+
+                Ok(())
+            }
+            _ => Err("Invalid initialization".to_string()),
+        }
+    }
+
     /// Adds received blocks to the request.
     ///
     /// Returns an error if the request ID doesn't match the expected blocks request.
@@ -150,6 +200,9 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
     ) -> Result<(), String> {
         match &mut self.block_data_request {
             RangeBlockDataRequest::NoData => Err("received blobs but expected no data".to_owned()),
+            RangeBlockDataRequest::DataColumnsFromRoot { .. } => {
+                Err("received blobs but expected no data columns by root".to_owned())
+            }
             RangeBlockDataRequest::Blobs(req) => req.finish(req_id, blobs),
             RangeBlockDataRequest::DataColumns { .. } => {
                 Err("received blobs but expected data columns".to_owned())
@@ -173,6 +226,9 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::Blobs(_) => {
                 Err("received data columns but expected blobs".to_owned())
             }
+            RangeBlockDataRequest::DataColumnsFromRoot { .. } => {
+                Err("received data columns by root but expected range".to_owned())
+            }
             RangeBlockDataRequest::DataColumns { requests, .. } => {
                 let req = requests
                     .get_mut(&req_id)
@@ -182,6 +238,34 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
     }
 
+    /// Adds received custody columns to the request.
+    ///
+    /// Returns an error if this request expects blobs instead of data columns,
+    /// or if the request ID is unknown.
+    pub fn add_custody_columns_by_root(
+        &mut self,
+        req_id: DataColumnsByRootRequestId,
+        columns: Vec<Arc<DataColumnSidecar<E>>>,
+    ) -> Result<(), String> {
+        match &mut self.block_data_request {
+            RangeBlockDataRequest::NoData => {
+                Err("received data columns but expected no data".to_owned())
+            }
+            RangeBlockDataRequest::Blobs(_) => {
+                Err("received data columns but expected blobs".to_owned())
+            }
+            RangeBlockDataRequest::DataColumns { .. } => {
+                Err("received data columns by range but expected root".to_owned())
+            }
+            RangeBlockDataRequest::DataColumnsFromRoot { requests, .. } => {
+                let req = requests
+                    .get_mut(&req_id)
+                    .ok_or(format!("unknown data columns by range req_id {req_id}"))?;
+                req.finish(req_id, columns)
+            }
+        }
+    }
+
     /// Attempts to construct RPC blocks from all received components.
     ///
     /// Returns `None` if not all expected requests have completed.
@@ -210,6 +294,64 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     spec,
                 ))
             }
+            RangeBlockDataRequest::DataColumnsFromRoot {
+                init,
+                attempt,
+                column_peers,
+                expected_custody_columns,
+                requests,
+            } => {
+                if !*init {
+                    return None;
+                }
+
+                let mut data_columns = vec![];
+                let mut column_to_peer_id: HashMap<u64, PeerId> = HashMap::new();
+                for req in requests.values() {
+                    let Some(data) = req.to_finished() else {
+                        return None;
+                    };
+                    data_columns.extend(data.clone())
+                }
+
+                // An "attempt" is complete here after we have received a response for all the
+                // requests we made. i.e. `req.to_finished()` returns Some for all requests.
+                *attempt += 1;
+
+                // Note: this assumes that only 1 peer is responsible for a column
+                // with a batch.
+                for (id, columns) in column_peers {
+                    for column in columns {
+                        column_to_peer_id.insert(*column, id.peer);
+                    }
+                }
+
+                let resp = Self::responses_with_custody_columns(
+                    blocks.to_vec(),
+                    data_columns,
+                    column_to_peer_id,
+                    expected_custody_columns,
+                    *attempt,
+                    spec,
+                );
+
+                if let Err(CouplingError::DataColumnPeerFailure {
+                    error: _,
+                    faulty_peers,
+                    action: _,
+                    exceeded_retries: _,
+                }) = &resp
+                {
+                    for (_, peer) in faulty_peers.iter() {
+                        // find the req id associated with the peer and
+                        // delete it from the entries as we are going to make
+                        // a separate attempt for those components.
+                        requests.retain(|&k, _| k.peer != *peer);
+                    }
+                }
+
+                Some(resp)
+            }
             RangeBlockDataRequest::DataColumns {
                 requests,
                 expected_custody_columns,
diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index 448e784ab6d..cc1a6a51d89 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -1116,12 +1116,12 @@ impl<T: BeaconChainTypes> SyncManager<T> {
         peer_id: PeerId,
         data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
     ) {
-        if let Some(resp) =
-            self.network
-                .on_data_columns_by_root_response(req_id, peer_id, data_column)
-        {
-            match req_id.requester {
-                DataColumnsByRootRequester::Custody(custody_id) => {
+        match req_id.requester {
+            DataColumnsByRootRequester::Custody(custody_id) => {
+                if let Some(resp) =
+                    self.network
+                        .on_data_columns_by_root_response(req_id, peer_id, data_column)
+                {
                     if let Some(result) = self
                         .network
                         .on_custody_by_root_response(custody_id, req_id, peer_id, resp)
@@ -1130,6 +1130,19 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                     }
                 }
             }
+            DataColumnsByRootRequester::RangeSync { parent } => {
+                if let Some(resp) = self.network.on_data_columns_by_root_range_response(
+                    req_id,
+                    peer_id,
+                    data_column,
+                ) {
+                    self.on_range_components_response(
+                        parent,
+                        peer_id,
+                        RangeBlockComponent::CustodyColumnsFromRoot(req_id, resp),
+                    );
+                }
+            }
         }
     }
 
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 76e5ed3f5d9..0e9ce0676f2 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -15,12 +15,16 @@ use crate::service::NetworkMessage;
 use crate::status::ToStatusMessage;
 use crate::sync::block_lookups::SingleLookupId;
 use crate::sync::block_sidecar_coupling::CouplingError;
-use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
+use crate::sync::network_context::requests::{
+    BlobsByRootSingleBlockRequest, DataColumnsByRootRangeRequestItems,
+};
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
 use custody::CustodyRequestResult;
 use fnv::FnvHashMap;
-use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest};
+use lighthouse_network::rpc::methods::{
+    BlobsByRangeRequest, DataColumnsByRangeRequest, DataColumnsByRootRequest,
+};
 use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError, RequestType};
 pub use lighthouse_network::service::api_types::RangeRequestId;
 use lighthouse_network::service::api_types::{
@@ -33,7 +37,8 @@ use parking_lot::RwLock;
 pub use requests::LookupVerifyError;
 use requests::{
     ActiveRequests, BlobsByRangeRequestItems, BlobsByRootRequestItems, BlocksByRangeRequestItems,
-    BlocksByRootRequestItems, DataColumnsByRangeRequestItems, DataColumnsByRootRequestItems,
+    BlocksByRootRequestItems, DataColumnsByRangeRequestItems, DataColumnsByRootBatchBlockRequest,
+    DataColumnsByRootRequestItems,
 };
 #[cfg(test)]
 use slot_clock::SlotClock;
@@ -48,8 +53,8 @@ use tokio::sync::mpsc;
 use tracing::{debug, error, warn};
 use types::blob_sidecar::FixedBlobSidecarList;
 use types::{
-    BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, ForkContext,
-    Hash256, SignedBeaconBlock, Slot,
+    BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, DataColumnSubnetId,
+    EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
 };
 
 pub mod custody;
@@ -198,7 +203,8 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
     /// A mapping of active DataColumnsByRange requests
     data_columns_by_range_requests:
         ActiveRequests<DataColumnsByRangeRequestId, DataColumnsByRangeRequestItems<T::EthSpec>>,
-
+    data_columns_by_root_range_requests:
+        ActiveRequests<DataColumnsByRootRequestId, DataColumnsByRootRangeRequestItems<T::EthSpec>>,
     /// Mapping of active custody column requests for a block root
     custody_by_root_requests: FnvHashMap<CustodyRequester, ActiveCustodyRequest<T>>,
 
@@ -232,6 +238,10 @@ pub enum RangeBlockComponent<E: EthSpec> {
         DataColumnsByRangeRequestId,
         RpcResponseResult<Vec<Arc<DataColumnSidecar<E>>>>,
     ),
+    CustodyColumnsFromRoot(
+        DataColumnsByRootRequestId,
+        RpcResponseResult<Vec<Arc<DataColumnSidecar<E>>>>,
+    ),
 }
 
 #[cfg(test)]
@@ -277,6 +287,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             blocks_by_root_requests: ActiveRequests::new("blocks_by_root"),
             blobs_by_root_requests: ActiveRequests::new("blobs_by_root"),
             data_columns_by_root_requests: ActiveRequests::new("data_columns_by_root"),
+            data_columns_by_root_range_requests: ActiveRequests::new("data_columns_by_root_range"),
             blocks_by_range_requests: ActiveRequests::new("blocks_by_range"),
             blobs_by_range_requests: ActiveRequests::new("blobs_by_range"),
             data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
@@ -307,6 +318,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             blocks_by_range_requests,
             blobs_by_range_requests,
             data_columns_by_range_requests,
+            data_columns_by_root_range_requests,
             // custody_by_root_requests is a meta request of data_columns_by_root_requests
             custody_by_root_requests: _,
             // components_by_range_requests is a meta request of various _by_range requests
@@ -342,12 +354,18 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .into_iter()
             .map(|req_id| SyncRequestId::DataColumnsByRange(*req_id));
 
+        let data_column_by_root_range_ids = data_columns_by_root_range_requests
+            .active_requests_of_peer(peer_id)
+            .into_iter()
+            .map(|req_id| SyncRequestId::DataColumnsByRoot(*req_id));
+
         blocks_by_root_ids
             .chain(blobs_by_root_ids)
             .chain(data_column_by_root_ids)
             .chain(blocks_by_range_ids)
             .chain(blobs_by_range_ids)
             .chain(data_column_by_range_ids)
+            .chain(data_column_by_root_range_ids)
             .collect()
     }
 
@@ -404,6 +422,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             blocks_by_range_requests,
             blobs_by_range_requests,
             data_columns_by_range_requests,
+            data_columns_by_root_range_requests,
             // custody_by_root_requests is a meta request of data_columns_by_root_requests
             custody_by_root_requests: _,
             // components_by_range_requests is a meta request of various _by_range requests
@@ -425,6 +444,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .chain(blocks_by_range_requests.iter_request_peers())
             .chain(blobs_by_range_requests.iter_request_peers())
             .chain(data_columns_by_range_requests.iter_request_peers())
+            .chain(data_columns_by_root_range_requests.iter_request_peers())
         {
             *active_request_count_by_peer.entry(peer_id).or_default() += 1;
         }
@@ -605,6 +625,73 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     self.chain.sampling_columns_for_epoch(epoch).to_vec(),
                 )
             }),
+            false,
+        );
+        self.components_by_range_requests.insert(id, info);
+
+        Ok(id.id)
+    }
+
+    /// A blocks by range request sent by the range sync algorithm
+    pub fn block_components_by_range_request_without_components(
+        &mut self,
+        batch_type: ByRangeRequestType,
+        request: BlocksByRangeRequest,
+        requester: RangeRequestId,
+        peers: &HashSet<PeerId>,
+        peers_to_deprioritize: &HashSet<PeerId>,
+    ) -> Result<Id, RpcRequestSendError> {
+        let active_request_count_by_peer = self.active_request_count_by_peer();
+
+        let Some(block_peer) = peers
+            .iter()
+            .map(|peer| {
+                (
+                    // If contains -> 1 (order after), not contains -> 0 (order first)
+                    peers_to_deprioritize.contains(peer),
+                    // Prefer peers with less overall requests
+                    active_request_count_by_peer.get(peer).copied().unwrap_or(0),
+                    // Random factor to break ties, otherwise the PeerID breaks ties
+                    rand::random::<u32>(),
+                    peer,
+                )
+            })
+            .min()
+            .map(|(_, _, _, peer)| *peer)
+        else {
+            // Backfill and forward sync handle this condition gracefully.
+            // - Backfill sync: will pause waiting for more peers to join
+            // - Forward sync: can never happen as the chain is dropped when removing the last peer.
+            return Err(RpcRequestSendError::NoPeer(NoPeerError::BlockPeer));
+        };
+
+        // Create the overall components_by_range request ID before its individual components
+        let id = ComponentsByRangeRequestId {
+            id: self.next_id(),
+            requester,
+        };
+
+        let blocks_req_id = self.send_blocks_by_range_request(block_peer, request.clone(), id)?;
+
+        let blobs_req_id = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) {
+            Some(self.send_blobs_by_range_request(
+                block_peer,
+                BlobsByRangeRequest {
+                    start_slot: *request.start_slot(),
+                    count: *request.count(),
+                },
+                id,
+            )?)
+        } else {
+            None
+        };
+
+        let data_columns_by_root = matches!(batch_type, ByRangeRequestType::BlocksAndColumns);
+        let info = RangeBlockComponentsRequest::new(
+            blocks_req_id,
+            blobs_req_id,
+            None,
+            data_columns_by_root,
         );
         self.components_by_range_requests.insert(id, info);
 
@@ -706,6 +793,17 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                             })
                     })
                 }
+                RangeBlockComponent::CustodyColumnsFromRoot(req_id, resp) => {
+                    resp.and_then(|(custody_columns, _)| {
+                        request
+                            .add_custody_columns_by_root(req_id, custody_columns)
+                            .map_err(|e| {
+                                RpcResponseError::BlockComponentCouplingError(
+                                    CouplingError::InternalError(e),
+                                )
+                            })
+                    })
+                }
             }
         } {
             entry.remove();
@@ -943,6 +1041,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let id = DataColumnsByRootRequestId {
             id: self.next_id(),
             requester,
+            peer: peer_id,
         };
 
         self.send_network_msg(NetworkMessage::SendRequest {
@@ -1180,6 +1279,48 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         Ok((id, requested_columns))
     }
 
+    fn send_data_columns_by_root_range_requests(
+        &mut self,
+        peer_id: PeerId,
+        request: DataColumnsByRootBatchBlockRequest,
+        requester: DataColumnsByRootRequester,
+    ) -> Result<DataColumnsByRootRequestId, RpcRequestSendError> {
+        let id = DataColumnsByRootRequestId {
+            id: self.next_id(),
+            requester,
+            peer: peer_id,
+        };
+
+        self.send_network_msg(NetworkMessage::SendRequest {
+            peer_id,
+            request: RequestType::DataColumnsByRoot(
+                request
+                    .clone()
+                    .try_into_request(self.fork_context.current_fork_name(), &self.chain.spec)
+                    .expect("should work"),
+            ),
+            app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRoot(id)),
+        })
+        .map_err(|_| RpcRequestSendError::InternalError("network send error".to_owned()))?;
+
+        debug!(
+            method = "DataColumnsByRoot",
+            ?request,
+            peer = %peer_id,
+            %id,
+            "Sync RPC request sent"
+        );
+
+        self.data_columns_by_root_range_requests.insert(
+            id,
+            peer_id,
+            // true = we are only requesting if we know there are blobs.
+            true,
+            DataColumnsByRootRangeRequestItems::new(request),
+        );
+        Ok(id)
+    }
+
     pub fn is_execution_engine_online(&self) -> bool {
         self.execution_engine_state == EngineState::Online
     }
@@ -1369,6 +1510,19 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         self.on_rpc_response_result(id, "DataColumnsByRoot", resp, peer_id, |_| 1)
     }
 
+    #[allow(clippy::type_complexity)]
+    pub(crate) fn on_data_columns_by_root_range_response(
+        &mut self,
+        id: DataColumnsByRootRequestId,
+        peer_id: PeerId,
+        rpc_event: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
+    ) -> Option<RpcResponseResult<Vec<Arc<DataColumnSidecar<T::EthSpec>>>>> {
+        let resp = self
+            .data_columns_by_root_range_requests
+            .on_response(id, rpc_event);
+        self.on_rpc_response_result(id, "DataColumnsByRootRange", resp, peer_id, |_| 1)
+    }
+
     #[allow(clippy::type_complexity)]
     pub(crate) fn on_blocks_by_range_response(
         &mut self,
@@ -1377,6 +1531,89 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         rpc_event: RpcEvent<Arc<SignedBeaconBlock<T::EthSpec>>>,
     ) -> Option<RpcResponseResult<Vec<Arc<SignedBeaconBlock<T::EthSpec>>>>> {
         let resp = self.blocks_by_range_requests.on_response(id, rpc_event);
+        match &resp {
+            // todo(pawan): send the data column request as soon as you get each chunk to spread out requests
+            Some(Ok((blocks, _))) => {
+                // We have blocks here, check if they need data columns and request them
+                let mut block_roots = Vec::new();
+                let batch_epoch = id.batch_id();
+                if !self.chain.spec.is_peer_das_enabled_for_epoch(batch_epoch) {
+                    return self
+                        .on_rpc_response_result(id, "BlocksByRange", resp, peer_id, |b| b.len());
+                }
+                for block in blocks.iter() {
+                    // Request columns only if the blob_kzg_commitments is non-empty
+                    if let Ok(commitments) = block.message().body().blob_kzg_commitments() {
+                        if !commitments.is_empty() {
+                            block_roots.push(block.canonical_root());
+                        }
+                    }
+                }
+                // Generate the data column by root requests
+                let mut peer_to_columns: HashMap<PeerId, Vec<ColumnIndex>> = HashMap::new();
+                let mut no_peers_for_column: Vec<ColumnIndex> = Vec::new();
+                for column in self.chain.sampling_columns_for_epoch(batch_epoch).iter() {
+                    let data_column = DataColumnSubnetId::new(*column);
+                    if let Some(custody_peer) = self
+                        .network_globals()
+                        .peers
+                        .read()
+                        .good_custody_subnet_peer_range_sync(data_column, batch_epoch)
+                        .next()
+                    {
+                        peer_to_columns
+                            .entry(*custody_peer)
+                            .or_default()
+                            .push(*column);
+                    } else {
+                        debug!(
+                            ?data_column,
+                            ?id,
+                            "Not enough column peers for batch, need to retry"
+                        );
+                        no_peers_for_column.push(*column);
+                        continue;
+                    }
+                }
+
+                // todo(pawan): no_peers_for_column nned to be requested once peers
+                // become available
+                let mut data_column_requests = Vec::new();
+                for (peer, indices) in peer_to_columns.into_iter() {
+                    let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
+                        block_roots: block_roots.clone(),
+                        indices: indices.clone(),
+                    };
+
+                    let requester = DataColumnsByRootRequester::RangeSync {
+                        parent: id.parent_request_id,
+                    };
+
+                    data_column_requests.push((
+                        self.send_data_columns_by_root_range_requests(
+                            peer,
+                            data_columns_by_root_request,
+                            requester,
+                        )
+                        .expect("should be able to send request"),
+                        indices,
+                    ));
+                }
+
+                if let Some(req) = self
+                    .components_by_range_requests
+                    .get_mut(&id.parent_request_id)
+                {
+                    req.insert_column_request_after_block_request(
+                        data_column_requests,
+                        self.chain.sampling_columns_for_epoch(batch_epoch),
+                    )
+                    .expect("should be in the right state");
+                }
+            }
+            None => {}
+            Some(Err(_)) => {}
+        }
         self.on_rpc_response_result(id, "BlocksByRange", resp, peer_id, |b| b.len())
     }
 
@@ -1431,6 +1668,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 );
             }
         }
+
         if let Some(Err(RpcResponseError::VerifyError(e))) = &resp {
             self.report_peer(peer_id, PeerAction::LowToleranceError, e.into());
         }
diff --git a/beacon_node/network/src/sync/network_context/requests.rs b/beacon_node/network/src/sync/network_context/requests.rs
index f42595fb690..cea636157c6 100644
--- a/beacon_node/network/src/sync/network_context/requests.rs
+++ b/beacon_node/network/src/sync/network_context/requests.rs
@@ -12,7 +12,8 @@ pub use blocks_by_range::BlocksByRangeRequestItems;
 pub use blocks_by_root::{BlocksByRootRequestItems, BlocksByRootSingleRequest};
 pub use data_columns_by_range::DataColumnsByRangeRequestItems;
 pub use data_columns_by_root::{
-    DataColumnsByRootRequestItems, DataColumnsByRootSingleBlockRequest,
+    DataColumnsByRootBatchBlockRequest, DataColumnsByRootRequestItems,
+    DataColumnsByRootSingleBlockRequest, DataColumnsByRootRangeRequestItems
 };
 
 use crate::metrics;
diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
index 09d7f4b3b77..fc5d4fa5e9f 100644
--- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
@@ -1,5 +1,5 @@
 use lighthouse_network::rpc::methods::DataColumnsByRootRequest;
-use std::sync::Arc;
+use std::{collections::HashMap, sync::Arc};
 use types::{
     ChainSpec, DataColumnSidecar, DataColumnsByRootIdentifier, EthSpec, ForkName, Hash256,
     RuntimeVariableList,
@@ -7,6 +7,38 @@ use types::{
 
 use super::{ActiveRequestItems, LookupVerifyError};
 
+#[derive(Debug, Clone)]
+pub struct DataColumnsByRootBatchBlockRequest {
+    pub block_roots: Vec<Hash256>,
+    pub indices: Vec<u64>,
+}
+
+impl DataColumnsByRootBatchBlockRequest {
+    pub fn try_into_request(
+        self,
+        fork_name: ForkName,
+        spec: &ChainSpec,
+    ) -> Result<DataColumnsByRootRequest, &'static str> {
+        let number_of_columns = spec.number_of_columns as usize;
+        let columns = RuntimeVariableList::new(self.indices, number_of_columns)
+            .map_err(|_| "Number of indices exceeds total number of columns")?;
+        let ids: Vec<_> = self
+            .block_roots
+            .into_iter()
+            .map(|block_root| DataColumnsByRootIdentifier {
+                block_root,
+                columns: columns.clone(),
+            })
+            .collect();
+        tracing::debug!(?ids, "Length ids");
+        assert!(ids.len() <= 32);
+        Ok(DataColumnsByRootRequest::new(
+            ids,
+            spec.max_request_blocks(fork_name),
+        ))
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct DataColumnsByRootSingleBlockRequest {
     pub block_root: Hash256,
@@ -79,3 +111,64 @@ impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRequestItems<E> {
         std::mem::take(&mut self.items)
     }
 }
+
+pub struct DataColumnsByRootRangeRequestItems<E: EthSpec> {
+    request: DataColumnsByRootBatchBlockRequest,
+    items: HashMap<Hash256, Vec<Arc<DataColumnSidecar<E>>>>,
+}
+
+impl<E: EthSpec> DataColumnsByRootRangeRequestItems<E> {
+    pub fn new(request: DataColumnsByRootBatchBlockRequest) -> Self {
+        Self {
+            request,
+            items: HashMap::new(),
+        }
+    }
+}
+
+impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRangeRequestItems<E> {
+    type Item = Arc<DataColumnSidecar<E>>;
+
+    /// Appends a chunk to this multi-item request. If all expected chunks are received, this
+    /// method returns `Some`, resolving the request before the stream terminator.
+    /// The active request SHOULD be dropped after `add_response` returns an error
+    fn add(&mut self, data_column: Self::Item) -> Result<bool, LookupVerifyError> {
+        let block_root = data_column.block_root();
+        if !self.request.block_roots.contains(&block_root) {
+            return Err(LookupVerifyError::UnrequestedBlockRoot(block_root));
+        }
+        if !data_column.verify_inclusion_proof() {
+            return Err(LookupVerifyError::InvalidInclusionProof);
+        }
+        if !self.request.indices.contains(&data_column.index) {
+            return Err(LookupVerifyError::UnrequestedIndex(data_column.index));
+        }
+        if self
+            .items
+            .values()
+            .flatten()
+            .any(|d| d.index == data_column.index)
+        {
+            return Err(LookupVerifyError::DuplicatedData(
+                data_column.slot(),
+                data_column.index,
+            ));
+        }
+
+        self.items.entry(block_root).or_default().push(data_column);
+
+        Ok(self
+            .items
+            .values()
+            .map(|columns| columns.len())
+            .sum::<usize>()
+            >= self.request.indices.len() * self.request.block_roots.len())
+    }
+
+    fn consume(&mut self) -> Vec<Self::Item> {
+        std::mem::take(&mut self.items)
+            .into_values()
+            .flatten()
+            .collect()
+    }
+}
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index cdbb9f25883..0656ecf9cc9 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -924,7 +924,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 .cloned()
                 .collect::<HashSet<_>>();
 
-            match network.block_components_by_range_request(
+            match network.block_components_by_range_request_without_components(
                 batch_type,
                 request,
                 RangeRequestId::RangeSync {

From 5337e4602e0bb46243e8c94ffa72f2babf4f9e4d Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Mon, 18 Aug 2025 22:59:38 -0700
Subject: [PATCH 06/49] Add a function to retry column requests that could not
 be made

---
 .../src/service/api_types.rs                  |  11 ++
 beacon_node/network/src/sync/manager.rs       |   3 +
 .../network/src/sync/network_context.rs       | 103 +++++++++++++++++-
 .../requests/data_columns_by_root.rs          |   8 +-
 4 files changed, 119 insertions(+), 6 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs
index 41119029904..77d29133709 100644
--- a/beacon_node/lighthouse_network/src/service/api_types.rs
+++ b/beacon_node/lighthouse_network/src/service/api_types.rs
@@ -99,6 +99,17 @@ pub enum RangeRequestId {
     RangeSync { chain_id: Id, batch_id: Epoch },
     BackfillSync { batch_id: Epoch },
 }
+impl RangeRequestId {
+    pub fn batch_id(&self) -> Epoch {
+        match &self {
+            RangeRequestId::BackfillSync { batch_id } => *batch_id,
+            RangeRequestId::RangeSync {
+                chain_id: _,
+                batch_id,
+            } => *batch_id,
+        }
+    }
+}
 
 // TODO(das) refactor in a separate PR. We might be able to remove this and replace
 // [`DataColumnsByRootRequestId`] with a [`SingleLookupReqId`].
diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index cc1a6a51d89..9c5e116efeb 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -403,6 +403,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
         for (id, result) in self.network.continue_custody_by_root_requests() {
             self.on_custody_by_root_result(id, result);
         }
+
+        // Try to make range requests that we failed to make because of lack of peers.
+        self.network.retry_pending_requests();
     }
 
     /// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 0e9ce0676f2..750bd8c4adb 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -22,9 +22,7 @@ use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
 use custody::CustodyRequestResult;
 use fnv::FnvHashMap;
-use lighthouse_network::rpc::methods::{
-    BlobsByRangeRequest, DataColumnsByRangeRequest, DataColumnsByRootRequest,
-};
+use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest};
 use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError, RequestType};
 pub use lighthouse_network::service::api_types::RangeRequestId;
 use lighthouse_network::service::api_types::{
@@ -212,6 +210,14 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
     components_by_range_requests:
         FnvHashMap<ComponentsByRangeRequestId, RangeBlockComponentsRequest<T::EthSpec>>,
 
+    // todo(pawan): make this a bounded queue, make the types better, add better docs
+    // A hashmap with the key being the parent request and the value being the data column by root
+    // requests that we have to retry because of one of the following reasons:
+    // 1. The root requests couldn't be made after the parent blocks request because there were no
+    // column peers available
+    // 2. The root request errored (either peer sent an RPC error or an empty response)
+    requests_to_retry: HashMap<ComponentsByRangeRequestId, DataColumnsByRootBatchBlockRequest>,
+
     /// Whether the ee is online. If it's not, we don't allow access to the
     /// `beacon_processor_send`.
     execution_engine_state: EngineState,
@@ -293,6 +299,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
             custody_by_root_requests: <_>::default(),
             components_by_range_requests: FnvHashMap::default(),
+            requests_to_retry: Default::default(),
             network_beacon_processor,
             chain,
             fork_context,
@@ -323,6 +330,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             custody_by_root_requests: _,
             // components_by_range_requests is a meta request of various _by_range requests
             components_by_range_requests: _,
+            requests_to_retry: _,
             execution_engine_state: _,
             network_beacon_processor: _,
             chain: _,
@@ -429,6 +437,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             components_by_range_requests: _,
             execution_engine_state: _,
             network_beacon_processor: _,
+            requests_to_retry: _,
             chain: _,
             fork_context: _,
             // Don't use a fallback match. We want to be sure that all requests are considered when
@@ -522,6 +531,84 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         Ok(())
     }
 
+    /// Try to make all the requests that we failed to make earlier because of lack of peers
+    /// in the required subnets.
+    ///
+    /// This function must be manually invoked at regular intervals.
+    pub fn retry_pending_requests(&mut self) -> Result<(), String> {
+        let active_requests = self.active_request_count_by_peer();
+
+        // Collect entries to process and remove from requests_to_retry
+        let entries_to_process: Vec<_> = self.requests_to_retry.drain().collect();
+        let mut entries_to_keep = Vec::new();
+
+        for (parent_request, requests) in entries_to_process {
+            let mut data_column_requests = Vec::new();
+            let requester = DataColumnsByRootRequester::RangeSync {
+                parent: parent_request.clone(),
+            };
+            let custody_indices = requests.indices.iter().cloned().collect();
+            let synced_peers = self
+                .network_globals()
+                .peers
+                .read()
+                .synced_peers_for_epoch(parent_request.requester.batch_id(), None)
+                .cloned()
+                .collect();
+
+            match self.select_columns_by_range_peers_to_request(
+                &custody_indices,
+                &synced_peers,
+                active_requests.clone(),
+                &HashSet::new(),
+            ) {
+                Ok(peer_to_columns) => {
+                    for (peer, indices) in peer_to_columns.into_iter() {
+                        let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
+                            block_roots: requests.block_roots.clone(),
+                            indices: indices.clone(),
+                        };
+
+                        data_column_requests.push((
+                            self.send_data_columns_by_root_range_requests(
+                                peer,
+                                data_columns_by_root_request,
+                                requester,
+                            )
+                            .expect("should be able to send request"),
+                            indices,
+                        ));
+                    }
+                    // we have sent out requests to peers, register these requests with the coupling service.
+                    if let Some(req) = self.components_by_range_requests.get_mut(&parent_request) {
+                        req.insert_column_request_after_block_request(
+                            data_column_requests,
+                            self.chain
+                                .sampling_columns_for_epoch(parent_request.requester.batch_id()),
+                        )
+                        .expect("should be in the right state");
+                    }
+                    debug!(?requests, "Successfully retried requests");
+                    // Successfully processed, don't keep this entry
+                }
+                Err(err) => {
+                    debug!(
+                        ?err,
+                        ?parent_request,
+                        "Failed to retry request, no peers in subnets",
+                    );
+                    // Failed to process, keep this entry for next retry
+                    entries_to_keep.push((parent_request, requests));
+                }
+            }
+        }
+
+        // Re-insert entries that still need to be retried
+        self.requests_to_retry.extend(entries_to_keep);
+
+        Ok(())
+    }
+
     /// A blocks by range request sent by the range sync algorithm
     pub fn block_components_by_range_request(
         &mut self,
@@ -1568,7 +1655,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     } else {
                         debug!(
                             ?data_column,
-                            ?id,
+                            block_request_id=?id,
                             "Not enough column peers for batch, need to retry"
                         );
                         no_peers_for_column.push(*column);
@@ -1600,6 +1687,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     ));
                 }
 
+                let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
+                    block_roots: block_roots.clone(),
+                    indices: no_peers_for_column,
+                };
+
+                self.requests_to_retry
+                    .insert(id.parent_request_id, data_columns_by_root_request);
+
                 if let Some(req) = self
                     .components_by_range_requests
                     .get_mut(&id.parent_request_id)
diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
index fc5d4fa5e9f..c8cf1847c25 100644
--- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
@@ -30,7 +30,6 @@ impl DataColumnsByRootBatchBlockRequest {
                 columns: columns.clone(),
             })
             .collect();
-        tracing::debug!(?ids, "Length ids");
         assert!(ids.len() <= 32);
         Ok(DataColumnsByRootRequest::new(
             ids,
@@ -147,8 +146,13 @@ impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRangeRequestItems<E> {
             .items
             .values()
             .flatten()
-            .any(|d| d.index == data_column.index)
+            .any(|d| d.index == data_column.index && d.block_root() == block_root)
         {
+            tracing::debug!(
+                ?data_column,
+                existing_items=?self.items,
+                "Duplicated data",
+            );
             return Err(LookupVerifyError::DuplicatedData(
                 data_column.slot(),
                 data_column.index,

From ca9cfd5d739bc4ea12013941dfeb0e5ac91490ca Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Tue, 19 Aug 2025 12:17:59 -0700
Subject: [PATCH 07/49] Small fixes

---
 .../src/sync/block_sidecar_coupling.rs        |  9 ++++---
 beacon_node/network/src/sync/manager.rs       |  2 +-
 .../network/src/sync/network_context.rs       | 26 ++++++++++++-------
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index b807c2a0b21..d064ad8ccd1 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -163,14 +163,17 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 column_peers,
                 expected_custody_columns,
             } => {
-                *init = true;
                 for (request, peers) in column_requests {
                     requests.insert(request, ByRangeRequest::Active(request));
                     column_peers.insert(request, peers);
                 }
-                for column in custody_columns {
-                    expected_custody_columns.push(*column);
+                // expected custody columns should be populated only once during initialization
+                if !*init {
+                    for column in custody_columns {
+                        expected_custody_columns.push(*column);
+                    }
                 }
+                *init = true;
 
                 Ok(())
             }
diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index 9c5e116efeb..f9019026524 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -405,7 +405,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
         }
 
         // Try to make range requests that we failed to make because of lack of peers.
-        self.network.retry_pending_requests();
+        let _ = self.network.retry_pending_requests();
     }
 
     /// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 750bd8c4adb..5cc4236394d 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -755,7 +755,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         // Create the overall components_by_range request ID before its individual components
         let id = ComponentsByRangeRequestId {
             id: self.next_id(),
-            requester,
+            requester: requester.clone(),
         };
 
         let blocks_req_id = self.send_blocks_by_range_request(block_peer, request.clone(), id)?;
@@ -774,6 +774,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         };
 
         let data_columns_by_root = matches!(batch_type, ByRangeRequestType::BlocksAndColumns);
+
+        debug!(?requester, data_columns_by_root, "Batch type");
         let info = RangeBlockComponentsRequest::new(
             blocks_req_id,
             blobs_req_id,
@@ -1607,7 +1609,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let resp = self
             .data_columns_by_root_range_requests
             .on_response(id, rpc_event);
-        self.on_rpc_response_result(id, "DataColumnsByRootRange", resp, peer_id, |_| 1)
+        self.on_rpc_response_result(id, "DataColumnsByRootRange", resp, peer_id, |b| b.len())
     }
 
     #[allow(clippy::type_complexity)]
@@ -1624,7 +1626,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 // We have blocks here, check if they need data columns and request them
                 let mut block_roots = Vec::new();
                 let batch_epoch = id.batch_id();
-                if !self.chain.spec.is_peer_das_enabled_for_epoch(batch_epoch) {
+                if !matches!(
+                    self.batch_type(batch_epoch),
+                    ByRangeRequestType::BlocksAndColumns
+                ) {
                     return self
                         .on_rpc_response_result(id, "BlocksByRange", resp, peer_id, |b| b.len());
                 }
@@ -1659,7 +1664,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                             "Not enough column peers for batch, need to retry"
                         );
                         no_peers_for_column.push(*column);
-                        continue;
                     }
                 }
 
@@ -1687,13 +1691,15 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     ));
                 }
 
-                let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
-                    block_roots: block_roots.clone(),
-                    indices: no_peers_for_column,
-                };
+                if !no_peers_for_column.is_empty() {
+                    let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
+                        block_roots: block_roots.clone(),
+                        indices: no_peers_for_column,
+                    };
 
-                self.requests_to_retry
-                    .insert(id.parent_request_id, data_columns_by_root_request);
+                    self.requests_to_retry
+                        .insert(id.parent_request_id, data_columns_by_root_request);
+                }
 
                 if let Some(req) = self
                     .components_by_range_requests

From 68cce376ecfcb735431952a9dbd4b34d004192bd Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 20 Aug 2025 14:14:11 -0700
Subject: [PATCH 08/49] Try to avoid chains failing for rpc errors

---
 .../network/src/sync/range_sync/chain.rs      | 35 +++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 0656ecf9cc9..2da3cfdb699 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -327,7 +327,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     return Ok(KeepChain);
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
-                BatchState::Processing(_) | BatchState::AwaitingDownload | BatchState::Failed => {
+                BatchState::AwaitingDownload => return Ok(KeepChain),
+                BatchState::Processing(_) | BatchState::Failed => {
                     // these are all inconsistent states:
                     // - Processing -> `self.current_processing_batch` is None
                     // - Failed -> non recoverable batch. For an optimistic batch, it should
@@ -361,7 +362,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     // Batch is not ready, nothing to process
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
-                BatchState::Failed | BatchState::AwaitingDownload | BatchState::Processing(_) => {
+                BatchState::AwaitingDownload => return Ok(KeepChain),
+                BatchState::Failed | BatchState::Processing(_) => {
                     // these are all inconsistent states:
                     // - Failed -> non recoverable batch. Chain should have been removed
                     // - AwaitingDownload -> A recoverable failed batch should have been
@@ -559,7 +561,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 batch.processing_completed(BatchProcessingResult::NonFaultyFailure)?;
 
                 // Simply re-download the batch.
-                self.send_batch(network, batch_id)
+                self.attempt_send_awaiting_download_batches(network, "non-faulty-failure")
             }
         }
     }
@@ -729,7 +731,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         }
         // this is our robust `processing_target`. All previous batches must be awaiting
         // validation
-        let mut redownload_queue = Vec::new();
 
         for (id, batch) in self.batches.range_mut(..batch_id) {
             if let BatchOperationOutcome::Failed { blacklist } = batch.validation_failed()? {
@@ -739,18 +740,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     failing_batch: *id,
                 });
             }
-            redownload_queue.push(*id);
         }
 
         // no batch maxed out it process attempts, so now the chain's volatile progress must be
         // reset
         self.processing_target = self.start_epoch;
 
-        for id in redownload_queue {
-            self.send_batch(network, id)?;
-        }
         // finally, re-request the failed batch.
-        self.send_batch(network, batch_id)
+        self.attempt_send_awaiting_download_batches(network, "handle_invalid_batch")
     }
 
     pub fn stop_syncing(&mut self) {
@@ -891,7 +888,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     failing_batch: batch_id,
                 });
             }
-            self.send_batch(network, batch_id)
+            self.attempt_send_awaiting_download_batches(network, "injecting error")
         } else {
             debug!(
                 batch_epoch = %batch_id,
@@ -905,6 +902,24 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         }
     }
 
+    pub fn attempt_send_awaiting_download_batches(
+        &mut self,
+        network: &mut SyncNetworkContext<T>,
+        src: &str,
+    ) -> ProcessingResult {
+        debug!(?src, "In attempt_send_awaiting download batches");
+        // Check all batches in AwaitingDownload state and see if they can be sent
+        for (batch_id, batch) in self.batches.iter() {
+            if matches!(batch.state(), BatchState::AwaitingDownload) {
+                debug!(?src, ?batch_id, "Sending batch");
+                if self.good_peers_on_sampling_subnets(*batch_id, network) {
+                    return self.send_batch(network, *batch_id);
+                }
+            }
+        }
+        Ok(KeepChain)
+    }
+
     /// Requests the batch assigned to the given id from a given peer.
     pub fn send_batch(
         &mut self,

From 6da924b1288977ad0def3dfa26228175d7c52433 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 20 Aug 2025 14:14:26 -0700
Subject: [PATCH 09/49] Fix bug in initialization code

---
 beacon_node/network/src/sync/block_sidecar_coupling.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index d064ad8ccd1..fc9c4567ae3 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -155,6 +155,10 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         column_requests: Vec<(DataColumnsByRootRequestId, Vec<ColumnIndex>)>,
         custody_columns: &[ColumnIndex],
     ) -> Result<(), String> {
+        // Nothing to insert, do not initialize
+        if column_requests.is_empty() {
+            return Ok(());
+        }
         match &mut self.block_data_request {
             RangeBlockDataRequest::DataColumnsFromRoot {
                 init,

From 1a0df3042cfc8f996c3f3fad2c68d95cbdc8eaf6 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 20 Aug 2025 14:14:48 -0700
Subject: [PATCH 10/49] Also penalize all batch peers for availability check
 errors

---
 .../network_beacon_processor/sync_methods.rs  | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
index 9967f9c5e2b..f1840cde242 100644
--- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
@@ -6,8 +6,10 @@ use crate::sync::{
     manager::{BlockProcessType, SyncMessage},
 };
 use beacon_chain::block_verification_types::{AsBlock, RpcBlock};
-use beacon_chain::data_availability_checker::AvailabilityCheckError;
 use beacon_chain::data_availability_checker::MaybeAvailableBlock;
+use beacon_chain::data_availability_checker::{
+    AvailabilityCheckError, AvailabilityCheckErrorCategory,
+};
 use beacon_chain::{
     AvailabilityProcessingStatus, BeaconChainTypes, BlockError, ChainSegmentResult,
     ExecutionPayloadError, HistoricalBlockError, NotifyExecutionLayer,
@@ -836,6 +838,27 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     peer_action: Some(PeerAction::Fatal),
                 })
             }
+            BlockError::AvailabilityCheck(err) => {
+                if matches!(err.category(), AvailabilityCheckErrorCategory::Malicious) {
+                    debug!(
+                        msg = "peer sent invalid block",
+                        outcome = ?err,
+                        "Invalid block received"
+                    );
+
+                    Err(ChainSegmentFailed {
+                        message: format!("Peer sent invalid block. Reason: {:?}", err),
+                        // Do not penalize peers for internal errors.
+                        peer_action: Some(PeerAction::MidToleranceError),
+                    })
+                } else {
+                    Err(ChainSegmentFailed {
+                        message: format!("Peer sent invalid block. Reason: {:?}", err),
+                        // Do not penalize peers for internal errors.
+                        peer_action: None,
+                    })
+                }
+            }
             other => {
                 debug!(
                     msg = "peer sent invalid block",

From 17c4e348f53466cadd435c15e155fb78464b5fe8 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 20 Aug 2025 14:18:15 -0700
Subject: [PATCH 11/49] Avoid root requests for backfill sync

---
 beacon_node/network/src/sync/network_context.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 5cc4236394d..b4c145d5144 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -1623,6 +1623,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         match &resp {
             // todo(pawan): send the data column request as soon as you get each chunk to spread out requests
             Some(Ok((blocks, _))) => {
+                // Return early if this is a backfill batch, backfill batches are handled by range requests instead of root
+                if matches!(
+                    id.parent_request_id.requester,
+                    RangeRequestId::BackfillSync { .. }
+                ) {
+                    return self
+                        .on_rpc_response_result(id, "BlocksByRange", resp, peer_id, |b| b.len());
+                }
                 // We have blocks here, check if they need data columns and request them
                 let mut block_roots = Vec::new();
                 let batch_epoch = id.batch_id();

From fdce537747507b218ae3c0738ad2f3f7f9153028 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 20 Aug 2025 18:37:44 -0700
Subject: [PATCH 12/49] Implement responsible peer tracking

---
 .../src/service/api_types.rs                  |  2 +
 .../network_beacon_processor/sync_methods.rs  | 84 +++++++++++++------
 .../network/src/sync/backfill_sync/mod.rs     | 45 +++++++---
 .../src/sync/block_sidecar_coupling.rs        | 24 +++++-
 beacon_node/network/src/sync/manager.rs       | 20 +++--
 .../network/src/sync/network_context.rs       | 16 +++-
 .../network/src/sync/range_sync/batch.rs      | 84 +++++++++++++------
 .../network/src/sync/range_sync/chain.rs      | 56 +++++++++----
 .../network/src/sync/range_sync/mod.rs        |  2 +-
 .../network/src/sync/range_sync/range.rs      |  9 +-
 10 files changed, 251 insertions(+), 91 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs
index 77d29133709..65a6cf61c5d 100644
--- a/beacon_node/lighthouse_network/src/service/api_types.rs
+++ b/beacon_node/lighthouse_network/src/service/api_types.rs
@@ -47,6 +47,8 @@ pub struct BlocksByRangeRequestId {
     pub id: Id,
     /// The Id of the overall By Range request for block components.
     pub parent_request_id: ComponentsByRangeRequestId,
+    /// The peer that we made this request to
+    pub peer_id: PeerId,
 }
 
 impl BlocksByRangeRequestId {
diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
index f1840cde242..e722e3cf7eb 100644
--- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
@@ -1,15 +1,14 @@
 use crate::metrics::{self, register_process_result_metrics};
 use crate::network_beacon_processor::{FUTURE_SLOT_TOLERANCE, NetworkBeaconProcessor};
 use crate::sync::BatchProcessResult;
+use crate::sync::manager::FaultyComponent;
 use crate::sync::{
     ChainId,
     manager::{BlockProcessType, SyncMessage},
 };
 use beacon_chain::block_verification_types::{AsBlock, RpcBlock};
+use beacon_chain::data_availability_checker::AvailabilityCheckError;
 use beacon_chain::data_availability_checker::MaybeAvailableBlock;
-use beacon_chain::data_availability_checker::{
-    AvailabilityCheckError, AvailabilityCheckErrorCategory,
-};
 use beacon_chain::{
     AvailabilityProcessingStatus, BeaconChainTypes, BlockError, ChainSegmentResult,
     ExecutionPayloadError, HistoricalBlockError, NotifyExecutionLayer,
@@ -44,6 +43,8 @@ struct ChainSegmentFailed {
     message: String,
     /// Used to penalize peers.
     peer_action: Option<PeerAction>,
+    /// Used to identify the faulty component
+    faulty_component: Option<FaultyComponent>,
 }
 
 impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
@@ -471,6 +472,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             Some(penalty) => BatchProcessResult::FaultyFailure {
                                 imported_blocks,
                                 penalty,
+                                faulty_component: e.faulty_component,
                             },
                             None => BatchProcessResult::NonFaultyFailure,
                         }
@@ -523,6 +525,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             Some(penalty) => BatchProcessResult::FaultyFailure {
                                 imported_blocks: 0,
                                 penalty,
+                                faulty_component: e.faulty_component,
                             },
                             None => BatchProcessResult::NonFaultyFailure,
                         }
@@ -595,15 +598,18 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                         Err(ChainSegmentFailed {
                             peer_action: None,
                             message: "Failed to check block availability".into(),
+                            faulty_component: None,
                         }),
                     );
                 }
+
                 e => {
                     return (
                         0,
                         Err(ChainSegmentFailed {
                             peer_action: Some(PeerAction::LowToleranceError),
                             message: format!("Failed to check block availability : {:?}", e),
+                            faulty_component: None, // Todo(pawan): replicate behaviour in forward sync once its proven
                         }),
                     );
                 }
@@ -620,6 +626,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                         (total_blocks - available_blocks.len()),
                         total_blocks
                     ),
+                    faulty_component: Some(FaultyComponent::Blocks),
                 }),
             );
         }
@@ -635,7 +642,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 metrics::inc_counter(
                     &metrics::BEACON_PROCESSOR_BACKFILL_CHAIN_SEGMENT_FAILED_TOTAL,
                 );
-                let peer_action = match &e {
+                let (peer_action, faulty_component) = match &e {
                     HistoricalBlockError::MismatchedBlockRoot {
                         block_root,
                         expected_block_root,
@@ -647,7 +654,10 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             "Backfill batch processing error"
                         );
                         // The peer is faulty if they send blocks with bad roots.
-                        Some(PeerAction::LowToleranceError)
+                        (
+                            Some(PeerAction::LowToleranceError),
+                            Some(FaultyComponent::Blocks),
+                        )
                     }
                     HistoricalBlockError::InvalidSignature
                     | HistoricalBlockError::SignatureSet(_) => {
@@ -656,7 +666,10 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             "Backfill batch processing error"
                         );
                         // The peer is faulty if they bad signatures.
-                        Some(PeerAction::LowToleranceError)
+                        (
+                            Some(PeerAction::LowToleranceError),
+                            Some(FaultyComponent::Blocks),
+                        )
                     }
                     HistoricalBlockError::ValidatorPubkeyCacheTimeout => {
                         warn!(
@@ -664,7 +677,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             "Backfill batch processing error"
                         );
                         // This is an internal error, do not penalize the peer.
-                        None
+                        (None, None)
                     }
                     HistoricalBlockError::IndexOutOfBounds => {
                         error!(
@@ -672,12 +685,12 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             "Backfill batch OOB error"
                         );
                         // This should never occur, don't penalize the peer.
-                        None
+                        (None, None)
                     }
                     HistoricalBlockError::StoreError(e) => {
                         warn!(error = ?e, "Backfill batch processing error");
                         // This is an internal error, don't penalize the peer.
-                        None
+                        (None, None)
                     } //
                       // Do not use a fallback match, handle all errors explicitly
                 };
@@ -688,6 +701,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                         message: format!("{:?}", err_str),
                         // This is an internal error, don't penalize the peer.
                         peer_action,
+                        faulty_component,
                     }),
                 )
             }
@@ -702,7 +716,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 Err(ChainSegmentFailed {
                     message: format!("Block has an unknown parent: {}", parent_root),
                     // Peers are faulty if they send non-sequential blocks.
-                    peer_action: Some(PeerAction::LowToleranceError),
+                    peer_action: Some(PeerAction::LowToleranceError), // todo(pawan): revise this
+                    faulty_component: Some(FaultyComponent::Blocks),
                 })
             }
             BlockError::DuplicateFullyImported(_)
@@ -741,6 +756,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     ),
                     // Peers are faulty if they send blocks from the future.
                     peer_action: Some(PeerAction::LowToleranceError),
+                    faulty_component: Some(FaultyComponent::Blocks),
                 })
             }
             BlockError::WouldRevertFinalizedSlot { .. } => {
@@ -757,6 +773,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                         block_parent_root
                     ),
                     peer_action: Some(PeerAction::Fatal),
+                    faulty_component: Some(FaultyComponent::Blocks),
                 })
             }
             BlockError::GenesisBlock => {
@@ -774,6 +791,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     message: format!("Internal error whilst processing block: {:?}", e),
                     // Do not penalize peers for internal errors.
                     peer_action: None,
+                    faulty_component: None,
                 })
             }
             ref err @ BlockError::ExecutionPayloadError(ref epe) => {
@@ -788,6 +806,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             err
                         ),
                         peer_action: Some(PeerAction::LowToleranceError),
+                        faulty_component: Some(FaultyComponent::Blocks), // todo(pawan): recheck this
                     })
                 } else if !epe.penalize_peer() {
                     // These errors indicate an issue with the EL and not the `ChainSegment`.
@@ -801,6 +820,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                         message: format!("Execution layer offline. Reason: {:?}", err),
                         // Do not penalize peers for internal errors.
                         peer_action: None,
+                        faulty_component: None,
                     })
                 } else {
                     debug!(
@@ -813,6 +833,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                             err
                         ),
                         peer_action: Some(PeerAction::LowToleranceError),
+                        faulty_component: Some(FaultyComponent::Blocks),
                     })
                 }
             }
@@ -828,6 +849,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     // of a faulty EL it will usually require manual intervention to fix anyway, so
                     // it's not too bad if we drop most of our peers.
                     peer_action: Some(PeerAction::LowToleranceError),
+                    faulty_component: Some(FaultyComponent::Blocks),
                 })
             }
             // Penalise peers for sending us banned blocks.
@@ -836,27 +858,40 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 Err(ChainSegmentFailed {
                     message: format!("Banned block: {block_root:?}"),
                     peer_action: Some(PeerAction::Fatal),
+                    faulty_component: Some(FaultyComponent::Blocks),
                 })
             }
-            BlockError::AvailabilityCheck(err) => {
-                if matches!(err.category(), AvailabilityCheckErrorCategory::Malicious) {
-                    debug!(
-                        msg = "peer sent invalid block",
-                        outcome = ?err,
-                        "Invalid block received"
-                    );
-
-                    Err(ChainSegmentFailed {
+            ref err @ BlockError::AvailabilityCheck(ref e) => {
+                match &e {
+                    AvailabilityCheckError::InvalidBlobs(_)
+                    | AvailabilityCheckError::BlobIndexInvalid(_) => Err(ChainSegmentFailed {
+                        message: format!("Peer sent invalid block. Reason: {:?}", err),
+                        // Do not penalize peers for internal errors.
+                        peer_action: Some(PeerAction::LowToleranceError),
+                        faulty_component: Some(FaultyComponent::Blobs),
+                    }),
+                    AvailabilityCheckError::InvalidColumn(columns) => Err(ChainSegmentFailed {
                         message: format!("Peer sent invalid block. Reason: {:?}", err),
                         // Do not penalize peers for internal errors.
                         peer_action: Some(PeerAction::MidToleranceError),
-                    })
-                } else {
-                    Err(ChainSegmentFailed {
+                        faulty_component: Some(FaultyComponent::Columns(
+                            columns.iter().map(|v| v.0).collect(),
+                        )),
+                    }),
+                    AvailabilityCheckError::DataColumnIndexInvalid(column) => {
+                        Err(ChainSegmentFailed {
+                            message: format!("Peer sent invalid block. Reason: {:?}", err),
+                            // Do not penalize peers for internal errors.
+                            peer_action: Some(PeerAction::MidToleranceError),
+                            faulty_component: Some(FaultyComponent::Columns(vec![*column])),
+                        })
+                    }
+                    _ => Err(ChainSegmentFailed {
                         message: format!("Peer sent invalid block. Reason: {:?}", err),
                         // Do not penalize peers for internal errors.
-                        peer_action: None,
-                    })
+                        peer_action: Some(PeerAction::MidToleranceError),
+                        faulty_component: None,
+                    }),
                 }
             }
             other => {
@@ -870,6 +905,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     message: format!("Peer sent invalid block. Reason: {:?}", other),
                     // Do not penalize peers for internal errors.
                     peer_action: None,
+                    faulty_component: None,
                 })
             }
         }
diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index dc70b08aec5..ac47310b3f0 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -10,12 +10,13 @@
 
 use crate::network_beacon_processor::ChainSegmentProcessId;
 use crate::sync::block_sidecar_coupling::CouplingError;
-use crate::sync::manager::BatchProcessResult;
+use crate::sync::manager::{BatchProcessResult, FaultyComponent};
 use crate::sync::network_context::{
     RangeRequestId, RpcRequestSendError, RpcResponseError, SyncNetworkContext,
 };
 use crate::sync::range_sync::{
     BatchConfig, BatchId, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
+    ResponsiblePeers,
 };
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -380,9 +381,9 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
         &mut self,
         network: &mut SyncNetworkContext<T>,
         batch_id: BatchId,
-        peer_id: &PeerId,
         request_id: Id,
         blocks: Vec<RpcBlock<T::EthSpec>>,
+        responsible_peers: ResponsiblePeers,
     ) -> Result<ProcessResult, BackFillError> {
         // check if we have this batch
         let Some(batch) = self.batches.get_mut(&batch_id) else {
@@ -401,7 +402,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             return Ok(ProcessResult::Successful);
         }
 
-        match batch.download_completed(blocks, *peer_id) {
+        match batch.download_completed(blocks, responsible_peers) {
             Ok(received) => {
                 let awaiting_batches =
                     self.processing_target.saturating_sub(batch_id) / BACKFILL_EPOCHS_PER_BATCH;
@@ -557,7 +558,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             }
         };
 
-        let Some(peer) = batch.processing_peer() else {
+        let Some(responsible_peers) = batch.processing_peers() else {
             self.fail_sync(BackFillError::BatchInvalidState(
                 batch_id,
                 String::from("Peer does not exist"),
@@ -569,8 +570,8 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             ?result,
             %batch,
             batch_epoch = %batch_id,
-            %peer,
-            client = %network.client_type(peer),
+            ?responsible_peers,
+            // client = %network.client_type(peer),
             "Backfill batch processed"
         );
 
@@ -613,7 +614,31 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             BatchProcessResult::FaultyFailure {
                 imported_blocks,
                 penalty,
+                faulty_component,
             } => {
+                let Some(responsible_peers) = batch.responsible_peers() else {
+                    crit!("Shouldn't happen");
+                    return self
+                        .fail_sync(BackFillError::BatchProcessingFailed(batch_id))
+                        .map(|_| ProcessResult::Successful);
+                };
+                // Penalize the peer appropriately.
+                match faulty_component {
+                    Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
+                        network.report_peer(responsible_peers.block_blob, *penalty, "faulty_batch");
+                    }
+                    // todo(pawan): clean this up
+                    Some(FaultyComponent::Columns(faulty_columns)) => {
+                        for (peer, columns) in responsible_peers.data_columns.iter() {
+                            for faulty_column in faulty_columns {
+                                if columns.contains(faulty_column) {
+                                    network.report_peer(*peer, *penalty, "faulty_batch");
+                                }
+                            }
+                        }
+                    }
+                    None => {}
+                }
                 match batch.processing_completed(BatchProcessingResult::FaultyFailure) {
                     Err(e) => {
                         // Batch was in the wrong state
@@ -687,7 +712,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                     // Batch is not ready, nothing to process
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
-                BatchState::Failed | BatchState::AwaitingDownload | BatchState::Processing(_) => {
+                BatchState::Failed | BatchState::AwaitingDownload | BatchState::Processing(..) => {
                     // these are all inconsistent states:
                     // - Failed -> non recoverable batch. Chain should have been removed
                     // - AwaitingDownload -> A recoverable failed batch should have been
@@ -698,7 +723,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                     )))?;
                     return Ok(ProcessResult::Successful);
                 }
-                BatchState::AwaitingValidation(_) => {
+                BatchState::AwaitingValidation(_, _) => {
                     // TODO: I don't think this state is possible, log a CRIT just in case.
                     // If this is not observed, add it to the failed state branch above.
                     crit!(
@@ -748,7 +773,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             // only for batches awaiting validation can we be sure the last attempt is
             // right, and thus, that any different attempt is wrong
             match batch.state() {
-                BatchState::AwaitingValidation(processed_attempt) => {
+                BatchState::AwaitingValidation(processed_attempt, _) => {
                     for attempt in batch.attempts() {
                         // The validated batch has been re-processed
                         if attempt.hash != processed_attempt.hash {
@@ -794,7 +819,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                     crit!("batch indicates inconsistent chain state while advancing chain")
                 }
                 BatchState::AwaitingProcessing(..) => {}
-                BatchState::Processing(_) => {
+                BatchState::Processing(..) => {
                     debug!(batch = %id, %batch, "Advancing chain while processing a batch");
                     if let Some(processing_id) = self.current_processing_batch
                         && id >= processing_id
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index fc9c4567ae3..9e72090561d 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -1,3 +1,5 @@
+use crate::sync::network_context::MAX_COLUMN_RETRIES;
+use crate::sync::range_sync::ResponsiblePeers;
 use beacon_chain::{
     block_verification_types::RpcBlock, data_column_verification::CustodyDataColumn, get_block_root,
 };
@@ -14,8 +16,6 @@ use types::{
     Hash256, RuntimeVariableList, SignedBeaconBlock,
 };
 
-use crate::sync::network_context::MAX_COLUMN_RETRIES;
-
 /// Accumulates and couples beacon blocks with their associated data (blobs or data columns)
 /// from range sync network responses.
 ///
@@ -30,6 +30,7 @@ use crate::sync::network_context::MAX_COLUMN_RETRIES;
 pub struct RangeBlockComponentsRequest<E: EthSpec> {
     /// Blocks we have received awaiting for their corresponding sidecar.
     blocks_request: ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
+    block_peer: PeerId,
     /// Sidecars we have received awaiting for their corresponding block.
     block_data_request: RangeBlockDataRequest<E>,
 }
@@ -95,6 +96,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         )>,
         data_columns_from_root: bool,
     ) -> Self {
+        let block_peer = blocks_req_id.peer_id;
         let block_data_request = if let Some(blobs_req_id) = blobs_req_id {
             RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
         } else if let Some((requests, expected_custody_columns)) = data_columns {
@@ -122,10 +124,28 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
 
         Self {
             blocks_request: ByRangeRequest::Active(blocks_req_id),
+            block_peer,
             block_data_request,
         }
     }
 
+    pub fn responsible_peers(&self) -> ResponsiblePeers {
+        ResponsiblePeers {
+            block_blob: self.block_peer,
+            data_columns: match &self.block_data_request {
+                RangeBlockDataRequest::NoData | RangeBlockDataRequest::Blobs(_) => HashMap::new(),
+                RangeBlockDataRequest::DataColumns { column_peers, .. } => column_peers
+                    .iter()
+                    .map(|(k, v)| (k.peer, v.clone()))
+                    .collect(),
+                RangeBlockDataRequest::DataColumnsFromRoot { column_peers, .. } => column_peers
+                    .iter()
+                    .map(|(k, v)| (k.peer, v.clone()))
+                    .collect(),
+            },
+        }
+    }
+
     /// Modifies `self` by inserting a new `DataColumnsByRangeRequestId` for a formerly failed
     /// request for some columns.
     pub fn reinsert_failed_column_requests(
diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index f9019026524..3bda91ad1c5 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -70,7 +70,8 @@ use std::time::Duration;
 use tokio::sync::mpsc;
 use tracing::{debug, error, info, trace};
 use types::{
-    BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
+    BlobSidecar, ColumnIndex, DataColumnSidecar, EthSpec, ForkContext, Hash256, SignedBeaconBlock,
+    Slot,
 };
 
 /// The number of slots ahead of us that is allowed before requesting a long-range (batch)  Sync
@@ -205,10 +206,19 @@ pub enum BatchProcessResult {
     FaultyFailure {
         imported_blocks: usize,
         penalty: PeerAction,
+        faulty_component: Option<FaultyComponent>,
     },
     NonFaultyFailure,
 }
 
+/// Identifies the specific component that was faulty if the batch was a faulty failure.
+#[derive(Debug)]
+pub enum FaultyComponent {
+    Blocks,
+    Blobs,
+    Columns(Vec<ColumnIndex>),
+}
+
 /// The primary object for handling and driving all the current syncing logic. It maintains the
 /// current state of the syncing process, the number of useful peers, downloaded blocks and
 /// controls the logic behind both the long-range (batch) sync and the on-going potential parent
@@ -1218,7 +1228,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
         peer_id: PeerId,
         range_block_component: RangeBlockComponent<T::EthSpec>,
     ) {
-        if let Some(resp) = self
+        if let Some((resp, responsible_peers)) = self
             .network
             .range_block_component_response(range_request_id, range_block_component)
         {
@@ -1228,7 +1238,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                         RangeRequestId::RangeSync { chain_id, batch_id } => {
                             self.range_sync.blocks_by_range_response(
                                 &mut self.network,
-                                peer_id,
+                                responsible_peers,
                                 chain_id,
                                 batch_id,
                                 range_request_id.id,
@@ -1240,9 +1250,9 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                             match self.backfill_sync.on_block_response(
                                 &mut self.network,
                                 batch_id,
-                                &peer_id,
                                 range_request_id.id,
                                 blocks,
+                                responsible_peers,
                             ) {
                                 Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
                                 Ok(ProcessResult::Successful) => {}
@@ -1259,7 +1269,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                     RangeRequestId::RangeSync { chain_id, batch_id } => {
                         self.range_sync.inject_error(
                             &mut self.network,
-                            peer_id,
+                            responsible_peers,
                             batch_id,
                             chain_id,
                             range_request_id.id,
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index b4c145d5144..c50769ddf6d 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -18,6 +18,7 @@ use crate::sync::block_sidecar_coupling::CouplingError;
 use crate::sync::network_context::requests::{
     BlobsByRootSingleBlockRequest, DataColumnsByRootRangeRequestItems,
 };
+use crate::sync::range_sync::ResponsiblePeers;
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
 use custody::CustodyRequestResult;
@@ -848,7 +849,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         &mut self,
         id: ComponentsByRangeRequestId,
         range_block_component: RangeBlockComponent<T::EthSpec>,
-    ) -> Option<Result<Vec<RpcBlock<T::EthSpec>>, RpcResponseError>> {
+    ) -> Option<(
+        Result<Vec<RpcBlock<T::EthSpec>>, RpcResponseError>,
+        ResponsiblePeers,
+    )> {
         let Entry::Occupied(mut entry) = self.components_by_range_requests.entry(id) else {
             metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &["range_blocks"]);
             return None;
@@ -895,12 +899,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 }
             }
         } {
+            let responsible_peers = entry.get().responsible_peers();
             entry.remove();
-            return Some(Err(e));
+            return Some((Err(e), responsible_peers));
         }
 
         let range_req = entry.get_mut();
         if let Some(blocks_result) = range_req.responses(&self.chain.spec) {
+            let responsible_peers = range_req.responsible_peers();
             if let Err(CouplingError::DataColumnPeerFailure {
                 action: _,
                 error,
@@ -923,7 +929,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 entry.remove();
             }
             // If the request is finished, dequeue everything
-            Some(blocks_result.map_err(RpcResponseError::BlockComponentCouplingError))
+            Some((
+                blocks_result.map_err(RpcResponseError::BlockComponentCouplingError),
+                responsible_peers,
+            ))
         } else {
             None
         }
@@ -1256,6 +1265,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let id = BlocksByRangeRequestId {
             id: self.next_id(),
             parent_request_id,
+            peer_id,
         };
         self.network_send
             .send(NetworkMessage::SendRequest {
diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs
index 1f516139969..14dd07ae31b 100644
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -2,13 +2,13 @@ use beacon_chain::block_verification_types::RpcBlock;
 use lighthouse_network::PeerId;
 use lighthouse_network::rpc::methods::BlocksByRangeRequest;
 use lighthouse_network::service::api_types::Id;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::hash::{Hash, Hasher};
 use std::ops::Sub;
 use std::time::{Duration, Instant};
 use strum::Display;
-use types::{Epoch, EthSpec, Slot};
+use types::{ColumnIndex, Epoch, EthSpec, Slot};
 
 /// The number of times to retry a batch before it is considered failed.
 const MAX_BATCH_DOWNLOAD_ATTEMPTS: u8 = 5;
@@ -127,6 +127,15 @@ impl<E: EthSpec, B: BatchConfig> fmt::Display for BatchInfo<E, B> {
     }
 }
 
+/// The peers that we got responses for this batch from.
+///
+/// This is used for penalizing in case of invalid batches.
+#[derive(Debug, Clone)]
+pub struct ResponsiblePeers {
+    pub block_blob: PeerId,
+    pub data_columns: HashMap<PeerId, Vec<ColumnIndex>>,
+}
+
 #[derive(Display)]
 /// Current state of a batch
 pub enum BatchState<E: EthSpec> {
@@ -135,15 +144,15 @@ pub enum BatchState<E: EthSpec> {
     /// The batch is being downloaded.
     Downloading(Id),
     /// The batch has been completely downloaded and is ready for processing.
-    AwaitingProcessing(PeerId, Vec<RpcBlock<E>>, Instant),
+    AwaitingProcessing(ResponsiblePeers, Vec<RpcBlock<E>>, Instant),
     /// The batch is being processed.
-    Processing(Attempt),
+    Processing(Attempt, ResponsiblePeers), // todo(pawan): attempt contains the peer, remove that
     /// The batch was successfully processed and is waiting to be validated.
     ///
     /// It is not sufficient to process a batch successfully to consider it correct. This is
     /// because batches could be erroneously empty, or incomplete. Therefore, a batch is considered
     /// valid, only if the next sequential batch imports at least a block.
-    AwaitingValidation(Attempt),
+    AwaitingValidation(Attempt, ResponsiblePeers),
     /// Intermediate state for inner state handling.
     Poisoned,
     /// The batch has maxed out the allowed attempts for either downloading or processing. It
@@ -213,13 +222,15 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
         false
     }
 
-    /// Returns the peer that is currently responsible for progressing the state of the batch.
-    pub fn processing_peer(&self) -> Option<&PeerId> {
+    /// Returns the peers that are currently responsible for progressing the state of the batch.
+    pub fn processing_peers(&self) -> Option<&ResponsiblePeers> {
         match &self.state {
             BatchState::AwaitingDownload | BatchState::Failed | BatchState::Downloading(..) => None,
-            BatchState::AwaitingProcessing(peer_id, _, _)
-            | BatchState::Processing(Attempt { peer_id, .. })
-            | BatchState::AwaitingValidation(Attempt { peer_id, .. }) => Some(peer_id),
+            BatchState::AwaitingProcessing(responsible_peers, _, _)
+            | BatchState::Processing(Attempt { .. }, responsible_peers)
+            | BatchState::AwaitingValidation(Attempt { .. }, responsible_peers) => {
+                Some(responsible_peers)
+            }
             BatchState::Poisoned => unreachable!("Poisoned batch"),
         }
     }
@@ -276,12 +287,13 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
     pub fn download_completed(
         &mut self,
         blocks: Vec<RpcBlock<E>>,
-        peer: PeerId,
+        responsible_peers: ResponsiblePeers,
     ) -> Result<usize /* Received blocks */, WrongState> {
         match self.state.poison() {
             BatchState::Downloading(_) => {
                 let received = blocks.len();
-                self.state = BatchState::AwaitingProcessing(peer, blocks, Instant::now());
+                self.state =
+                    BatchState::AwaitingProcessing(responsible_peers, blocks, Instant::now());
                 Ok(received)
             }
             BatchState::Poisoned => unreachable!("Poisoned batch"),
@@ -350,8 +362,11 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
 
     pub fn start_processing(&mut self) -> Result<(Vec<RpcBlock<E>>, Duration), WrongState> {
         match self.state.poison() {
-            BatchState::AwaitingProcessing(peer, blocks, start_instant) => {
-                self.state = BatchState::Processing(Attempt::new::<B, E>(peer, &blocks));
+            BatchState::AwaitingProcessing(responsible_peers, blocks, start_instant) => {
+                self.state = BatchState::Processing(
+                    Attempt::new::<B, E>(responsible_peers.block_blob, &blocks),
+                    responsible_peers,
+                );
                 Ok((blocks, start_instant.elapsed()))
             }
             BatchState::Poisoned => unreachable!("Poisoned batch"),
@@ -365,14 +380,28 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
         }
     }
 
+    pub fn responsible_peers(&self) -> Option<&ResponsiblePeers> {
+        match &self.state {
+            BatchState::AwaitingDownload
+            | BatchState::Failed
+            | BatchState::Poisoned
+            | BatchState::Downloading(_) => None,
+            BatchState::AwaitingProcessing(r, _, _)
+            | BatchState::AwaitingValidation(_, r)
+            | BatchState::Processing(_, r) => Some(r),
+        }
+    }
+
     pub fn processing_completed(
         &mut self,
         processing_result: BatchProcessingResult,
     ) -> Result<BatchOperationOutcome, WrongState> {
         match self.state.poison() {
-            BatchState::Processing(attempt) => {
+            BatchState::Processing(attempt, responsible_peers) => {
                 self.state = match processing_result {
-                    BatchProcessingResult::Success => BatchState::AwaitingValidation(attempt),
+                    BatchProcessingResult::Success => {
+                        BatchState::AwaitingValidation(attempt, responsible_peers)
+                    }
                     BatchProcessingResult::FaultyFailure => {
                         // register the failed attempt
                         self.failed_processing_attempts.push(attempt);
@@ -408,7 +437,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
     #[must_use = "Batch may have failed"]
     pub fn validation_failed(&mut self) -> Result<BatchOperationOutcome, WrongState> {
         match self.state.poison() {
-            BatchState::AwaitingValidation(attempt) => {
+            BatchState::AwaitingValidation(attempt, responsible_peers) => {
                 self.failed_processing_attempts.push(attempt);
 
                 // check if the batch can be downloaded again
@@ -459,16 +488,21 @@ impl Attempt {
 impl<E: EthSpec> std::fmt::Debug for BatchState<E> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            BatchState::Processing(Attempt { peer_id, hash: _ }) => {
-                write!(f, "Processing({})", peer_id)
+            BatchState::Processing(Attempt { peer_id, hash: _ }, responsible_peers) => {
+                write!(f, "Processing({}) {:?}", peer_id, responsible_peers)
             }
-            BatchState::AwaitingValidation(Attempt { peer_id, hash: _ }) => {
-                write!(f, "AwaitingValidation({})", peer_id)
+            BatchState::AwaitingValidation(Attempt { peer_id, hash: _ }, responsible_peers) => {
+                write!(f, "AwaitingValidation({}) {:?}", peer_id, responsible_peers)
             }
             BatchState::AwaitingDownload => f.write_str("AwaitingDownload"),
             BatchState::Failed => f.write_str("Failed"),
-            BatchState::AwaitingProcessing(peer, blocks, _) => {
-                write!(f, "AwaitingProcessing({}, {} blocks)", peer, blocks.len())
+            BatchState::AwaitingProcessing(responsible_peers, blocks, _) => {
+                write!(
+                    f,
+                    "AwaitingProcessing({:?}, {:?} blocks)",
+                    responsible_peers,
+                    blocks.len()
+                )
             }
             BatchState::Downloading(request_id) => {
                 write!(f, "Downloading({})", request_id)
@@ -484,8 +518,8 @@ impl<E: EthSpec> BatchState<E> {
     fn visualize(&self) -> char {
         match self {
             BatchState::Downloading(..) => 'D',
-            BatchState::Processing(_) => 'P',
-            BatchState::AwaitingValidation(_) => 'v',
+            BatchState::Processing(_, _) => 'P',
+            BatchState::AwaitingValidation(_, _) => 'v',
             BatchState::AwaitingDownload => 'd',
             BatchState::Failed => 'F',
             BatchState::AwaitingProcessing(..) => 'p',
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 2da3cfdb699..bd176a0708c 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -3,7 +3,9 @@ use super::batch::{BatchInfo, BatchProcessingResult, BatchState};
 use crate::metrics;
 use crate::network_beacon_processor::ChainSegmentProcessId;
 use crate::sync::block_sidecar_coupling::CouplingError;
+use crate::sync::manager::FaultyComponent;
 use crate::sync::network_context::{RangeRequestId, RpcRequestSendError, RpcResponseError};
+use crate::sync::range_sync::batch::ResponsiblePeers;
 use crate::sync::{BatchOperationOutcome, BatchProcessResult, network_context::SyncNetworkContext};
 use beacon_chain::BeaconChainTypes;
 use beacon_chain::block_verification_types::RpcBlock;
@@ -209,9 +211,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         &mut self,
         network: &mut SyncNetworkContext<T>,
         batch_id: BatchId,
-        peer_id: &PeerId,
         request_id: Id,
         blocks: Vec<RpcBlock<T::EthSpec>>,
+        responsible_peers: ResponsiblePeers,
     ) -> ProcessingResult {
         // check if we have this batch
         let batch = match self.batches.get_mut(&batch_id) {
@@ -238,7 +240,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         // Remove the request from the peer's active batches
 
         // TODO(das): should use peer group here https://github.com/sigp/lighthouse/issues/6258
-        let received = batch.download_completed(blocks, *peer_id)?;
+        let received = batch.download_completed(blocks, responsible_peers)?;
         let awaiting_batches = batch_id
             .saturating_sub(self.optimistic_start.unwrap_or(self.processing_target))
             / EPOCHS_PER_BATCH;
@@ -328,7 +330,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
                 BatchState::AwaitingDownload => return Ok(KeepChain),
-                BatchState::Processing(_) | BatchState::Failed => {
+                BatchState::Processing(_, _) | BatchState::Failed => {
                     // these are all inconsistent states:
                     // - Processing -> `self.current_processing_batch` is None
                     // - Failed -> non recoverable batch. For an optimistic batch, it should
@@ -340,7 +342,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                         state
                     )));
                 }
-                BatchState::AwaitingValidation(_) => {
+                BatchState::AwaitingValidation(_, _) => {
                     // If an optimistic start is given to the chain after the corresponding
                     // batch has been requested and processed we can land here. We drop the
                     // optimistic candidate since we can't conclude whether the batch included
@@ -363,7 +365,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
                 BatchState::AwaitingDownload => return Ok(KeepChain),
-                BatchState::Failed | BatchState::Processing(_) => {
+                BatchState::Failed | BatchState::Processing(_, _) => {
                     // these are all inconsistent states:
                     // - Failed -> non recoverable batch. Chain should have been removed
                     // - AwaitingDownload -> A recoverable failed batch should have been
@@ -374,7 +376,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                         state
                     )));
                 }
-                BatchState::AwaitingValidation(_) => {
+                BatchState::AwaitingValidation(_, _) => {
                     // we can land here if an empty optimistic batch succeeds processing and is
                     // inside the download buffer (between `self.processing_target` and
                     // `self.to_be_downloaded`). In this case, eventually the chain advances to the
@@ -441,7 +443,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             }
         };
 
-        let peer = batch.processing_peer().cloned().ok_or_else(|| {
+        let peers = batch.processing_peers().cloned().ok_or_else(|| {
             RemoveChain::WrongBatchState(format!(
                 "Processing target is in wrong state: {:?}",
                 batch.state(),
@@ -452,7 +454,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         debug!(
             result = ?result,
             batch_epoch = %batch_id,
-            client = %network.client_type(&peer),
+            ?peers,
             batch_state = ?batch_state,
             ?batch,
             "Batch processing result"
@@ -516,9 +518,29 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             BatchProcessResult::FaultyFailure {
                 imported_blocks,
                 penalty,
+                faulty_component,
             } => {
+                let Some(responsible_peers) = batch.responsible_peers() else {
+                    crit!("Shouldn't happen");
+                    return Ok(KeepChain);
+                };
                 // Penalize the peer appropriately.
-                network.report_peer(peer, *penalty, "faulty_batch");
+                match faulty_component {
+                    Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
+                        network.report_peer(responsible_peers.block_blob, *penalty, "faulty_batch");
+                    }
+                    // todo(pawan): clean this up
+                    Some(FaultyComponent::Columns(faulty_columns)) => {
+                        for (peer, columns) in responsible_peers.data_columns.iter() {
+                            for faulty_column in faulty_columns {
+                                if columns.contains(faulty_column) {
+                                    network.report_peer(*peer, *penalty, "faulty_batch");
+                                }
+                            }
+                        }
+                    }
+                    None => {}
+                }
 
                 // Check if this batch is allowed to continue
                 match batch.processing_completed(BatchProcessingResult::FaultyFailure)? {
@@ -621,11 +643,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             // only for batches awaiting validation can we be sure the last attempt is
             // right, and thus, that any different attempt is wrong
             match batch.state() {
-                BatchState::AwaitingValidation(processed_attempt) => {
+                BatchState::AwaitingValidation(processed_attempt, responsible_peers) => {
                     for attempt in batch.attempts() {
                         // The validated batch has been re-processed
                         if attempt.hash != processed_attempt.hash {
-                            // The re-downloaded version was different
+                            // The re-downloaded version had a different block peer
                             if processed_attempt.peer_id != attempt.peer_id {
                                 // A different peer sent the correct batch, the previous peer did not
                                 // We negatively score the original peer.
@@ -665,7 +687,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     crit!("batch indicates inconsistent chain state while advancing chain")
                 }
                 BatchState::AwaitingProcessing(..) => {}
-                BatchState::Processing(_) => {
+                BatchState::Processing(_, _) => {
                     debug!(batch = %id, %batch, "Advancing chain while processing a batch");
                     if let Some(processing_id) = self.current_processing_batch
                         && id <= processing_id
@@ -812,9 +834,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         &mut self,
         network: &mut SyncNetworkContext<T>,
         batch_id: BatchId,
-        peer_id: &PeerId,
         request_id: Id,
         err: RpcResponseError,
+        responsible_peers: ResponsiblePeers,
     ) -> ProcessingResult {
         let batch_state = self.visualize_batch_state();
         if let Some(batch) = self.batches.get_mut(&batch_id) {
@@ -865,7 +887,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 debug!(
                     batch_epoch = %batch_id,
                     batch_state = ?batch.state(),
-                    %peer_id,
+                    ?responsible_peers,
                     %request_id,
                     ?batch_state,
                     "Batch not expecting block"
@@ -876,12 +898,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 batch_epoch = %batch_id,
                 batch_state = ?batch.state(),
                 error = ?err,
-                %peer_id,
+                ?responsible_peers,
                 %request_id,
                 "Batch download error"
             );
             if let BatchOperationOutcome::Failed { blacklist } =
-                batch.download_failed(Some(*peer_id))?
+                batch.download_failed(Some(responsible_peers.block_blob))?
             {
                 return Err(RemoveChain::ChainFailed {
                     blacklist,
@@ -892,7 +914,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         } else {
             debug!(
                 batch_epoch = %batch_id,
-                %peer_id,
+                ?responsible_peers,
                 %request_id,
                 batch_state,
                 "Batch not found"
diff --git a/beacon_node/network/src/sync/range_sync/mod.rs b/beacon_node/network/src/sync/range_sync/mod.rs
index 8f881fba90f..04b622cb42f 100644
--- a/beacon_node/network/src/sync/range_sync/mod.rs
+++ b/beacon_node/network/src/sync/range_sync/mod.rs
@@ -9,7 +9,7 @@ mod sync_type;
 
 pub use batch::{
     BatchConfig, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
-    ByRangeRequestType,
+    ByRangeRequestType, ResponsiblePeers,
 };
 pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
 #[cfg(test)]
diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs
index 465edd3697f..cd523d3e193 100644
--- a/beacon_node/network/src/sync/range_sync/range.rs
+++ b/beacon_node/network/src/sync/range_sync/range.rs
@@ -44,6 +44,7 @@ use super::chain_collection::{ChainCollection, SyncChainStatus};
 use super::sync_type::RangeSyncType;
 use crate::metrics;
 use crate::status::ToStatusMessage;
+use crate::sync::range_sync::ResponsiblePeers;
 use crate::sync::BatchProcessResult;
 use crate::sync::network_context::{RpcResponseError, SyncNetworkContext};
 use beacon_chain::block_verification_types::RpcBlock;
@@ -203,7 +204,7 @@ where
     pub fn blocks_by_range_response(
         &mut self,
         network: &mut SyncNetworkContext<T>,
-        peer_id: PeerId,
+        responsible_peers: ResponsiblePeers,
         chain_id: ChainId,
         batch_id: BatchId,
         request_id: Id,
@@ -211,7 +212,7 @@ where
     ) {
         // check if this chunk removes the chain
         match self.chains.call_by_id(chain_id, |chain| {
-            chain.on_block_response(network, batch_id, &peer_id, request_id, blocks)
+            chain.on_block_response(network, batch_id,  request_id, blocks, responsible_peers)
         }) {
             Ok((removed_chain, sync_type)) => {
                 if let Some((removed_chain, remove_reason)) = removed_chain {
@@ -295,7 +296,7 @@ where
     pub fn inject_error(
         &mut self,
         network: &mut SyncNetworkContext<T>,
-        peer_id: PeerId,
+        responsible_peers: ResponsiblePeers,
         batch_id: BatchId,
         chain_id: ChainId,
         request_id: Id,
@@ -303,7 +304,7 @@ where
     ) {
         // check that this request is pending
         match self.chains.call_by_id(chain_id, |chain| {
-            chain.inject_error(network, batch_id, &peer_id, request_id, err)
+            chain.inject_error(network, batch_id, request_id, err, responsible_peers)
         }) {
             Ok((removed_chain, sync_type)) => {
                 if let Some((removed_chain, remove_reason)) = removed_chain {

From 45401958713d9ef75da09096664426757c37dfd3 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 13 Aug 2025 17:33:47 -0700
Subject: [PATCH 13/49] Request columns from global peer pool

---
 .../src/peer_manager/peerdb.rs                |  2 ++
 .../network/src/sync/range_sync/chain.rs      | 20 +++++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
index 1fa11357dc0..4d191645d63 100644
--- a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
+++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
@@ -323,6 +323,8 @@ impl<E: EthSpec> PeerDB<E> {
             .map(|(peer_id, _)| peer_id)
     }
 
+    /// Returns an iterator of all good gossipsub peers that are supposed to be custodying
+    /// the given subnet id.
     pub fn good_custody_subnet_peer_range_sync(
         &self,
         subnet: DataColumnSubnetId,
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index bd176a0708c..fe9305d83ad 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -957,7 +957,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 .network_globals()
                 .peers
                 .read()
-                .synced_peers_for_epoch(batch_id, Some(&self.peers))
+                .synced_peers_for_epoch(batch_id, None)
                 .cloned()
                 .collect::<HashSet<_>>();
 
@@ -1034,7 +1034,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 .network_globals()
                 .peers
                 .read()
-                .synced_peers_for_epoch(batch_id, Some(&self.peers))
+                .synced_peers_for_epoch(batch_id, None)
                 .cloned()
                 .collect::<HashSet<_>>();
 
@@ -1129,21 +1129,21 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     ) -> bool {
         if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
             // Require peers on all sampling column subnets before sending batches
-            network
+            let peers_on_all_custody_subnets = network
                 .network_globals()
                 .sampling_subnets()
                 .iter()
                 .all(|subnet_id| {
-                    let peer_db = network.network_globals().peers.read();
-                    let peer_count = self
+                    let peer_count = network
+                        .network_globals()
                         .peers
-                        .iter()
-                        .filter(|peer| {
-                            peer_db.is_good_range_sync_custody_subnet_peer(*subnet_id, peer)
-                        })
+                        .read()
+                        .good_custody_subnet_peer_range_sync(*subnet_id, epoch)
                         .count();
+
                     peer_count > 0
-                })
+                });
+            peers_on_all_custody_subnets
         } else {
             true
         }

From 521778b0a5749c21e144f26410b77ba8930f2d00 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 21 Aug 2025 07:03:58 -0700
Subject: [PATCH 14/49] Random logs

---
 .../src/rpc/self_limiter.rs                   |  4 ++--
 .../network_beacon_processor/rpc_methods.rs   | 24 +++++++++----------
 .../network/src/sync/range_sync/chain.rs      |  1 +
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/rpc/self_limiter.rs b/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
index 90e2db91357..6b1f759c795 100644
--- a/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
+++ b/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
@@ -90,7 +90,7 @@ impl<Id: ReqId, E: EthSpec> SelfRateLimiter<Id, E> {
         let protocol = req.versioned_protocol().protocol();
         // First check that there are not already other requests waiting to be sent.
         if let Some(queued_requests) = self.delayed_requests.get_mut(&(peer_id, protocol)) {
-            debug!(%peer_id, protocol = %req.protocol(), "Self rate limiting since there are already other requests waiting to be sent");
+            tracing::trace!(%peer_id, protocol = %req.protocol(), "Self rate limiting since there are already other requests waiting to be sent");
             queued_requests.push_back(QueuedRequest {
                 req,
                 request_id,
@@ -134,7 +134,7 @@ impl<Id: ReqId, E: EthSpec> SelfRateLimiter<Id, E> {
             && let Some(count) = active_request.get(&req.protocol())
             && *count >= MAX_CONCURRENT_REQUESTS
         {
-            debug!(
+            tracing::trace!(
                 %peer_id,
                 protocol = %req.protocol(),
                 "Self rate limiting due to the number of concurrent requests"
diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
index e38fa6f842c..64e863be6c9 100644
--- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
@@ -393,7 +393,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             }
         }
 
-        debug!(
+        tracing::trace!(
             %peer_id,
             request = ?request.data_column_ids,
             returned = send_data_column_count,
@@ -430,7 +430,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         inbound_request_id: InboundRequestId,
         req: LightClientUpdatesByRangeRequest,
     ) -> Result<(), (RpcErrorResponse, &'static str)> {
-        debug!(
+        tracing::trace!(
             %peer_id,
             count = req.count,
             start_period = req.start_period,
@@ -473,7 +473,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         let lc_updates_sent = lc_updates.len();
 
         if lc_updates_sent < req.count as usize {
-            debug!(
+            tracing::trace!(
                 peer = %peer_id,
                 info = "Failed to return all requested light client updates. The peer may have requested data ahead of whats currently available",
                 start_period = req.start_period,
@@ -482,7 +482,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 "LightClientUpdatesByRange outgoing response processed"
             );
         } else {
-            debug!(
+            tracing::trace!(
                 peer = %peer_id,
                 start_period = req.start_period,
                 requested = req.count,
@@ -603,7 +603,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         let req_start_slot = *req.start_slot();
         let req_count = *req.count();
 
-        debug!(
+        tracing::trace!(
             %peer_id,
             count = req_count,
             start_slot = %req_start_slot,
@@ -636,7 +636,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
 
         let log_results = |peer_id, blocks_sent| {
             if blocks_sent < (req_count as usize) {
-                debug!(
+                tracing::trace!(
                     %peer_id,
                     msg = "Failed to return all requested blocks",
                     start_slot = %req_start_slot,
@@ -646,7 +646,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     "BlocksByRange outgoing response processed"
                 );
             } else {
-                debug!(
+                tracing::trace!(
                     %peer_id,
                     start_slot = %req_start_slot,
                     %current_slot,
@@ -790,7 +790,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             elapsed,
         );
 
-        debug!(
+        tracing::trace!(
             req_type,
             start_slot = %req_start_slot,
             req_count,
@@ -885,7 +885,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         inbound_request_id: InboundRequestId,
         req: BlobsByRangeRequest,
     ) -> Result<(), (RpcErrorResponse, &'static str)> {
-        debug!(
+        tracing::trace!(
             ?peer_id,
             count = req.count,
             start_slot = req.start_slot,
@@ -938,7 +938,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             .unwrap_or_else(|_| self.chain.slot_clock.genesis_slot());
 
         let log_results = |peer_id, req: BlobsByRangeRequest, blobs_sent| {
-            debug!(
+            tracing::trace!(
                 %peer_id,
                 start_slot = req.start_slot,
                 %current_slot,
@@ -1013,7 +1013,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         inbound_request_id: InboundRequestId,
         req: DataColumnsByRangeRequest,
     ) -> Result<(), (RpcErrorResponse, &'static str)> {
-        debug!(
+        tracing::trace!(
             %peer_id,
             count = req.count,
             start_slot = req.start_slot,
@@ -1112,7 +1112,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             .slot()
             .unwrap_or_else(|_| self.chain.slot_clock.genesis_slot());
 
-        debug!(
+        tracing::trace!(
             %peer_id,
             start_slot = req.start_slot,
             %current_slot,
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index fe9305d83ad..7fbb8317c20 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -717,6 +717,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             previous_start = %old_start,
             new_start = %self.start_epoch,
             processing_target = %self.processing_target,
+            id=%self.id,
             "Chain advanced"
         );
     }

From 52762b91e12ef050a9e7445179fcfdcae72ec590 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Fri, 22 Aug 2025 11:52:56 -0700
Subject: [PATCH 15/49] Handle 0 blobs per epoch case

---
 .../src/sync/block_sidecar_coupling.rs        |  10 +
 .../network/src/sync/network_context.rs       | 234 ++++++++++--------
 .../requests/data_columns_by_root.rs          |   2 +-
 .../network/src/sync/range_sync/chain.rs      |  46 +++-
 4 files changed, 183 insertions(+), 109 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index e92bb0686ad..e5bb84813a3 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -210,6 +210,16 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
     }
 
+    pub fn no_columns_for_batch(&mut self) -> Result<(), String> {
+        match self.block_data_request {
+            RangeBlockDataRequest::DataColumnsFromRoot { .. } => {
+                self.block_data_request = RangeBlockDataRequest::NoData;
+                Ok(())
+            }
+            _ => Err("Invalid state: expected DataColumnsFromRoot".to_owned()),
+        }
+    }
+
     /// Adds received blocks to the request.
     ///
     /// Returns an error if the request ID doesn't match the expected blocks request.
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 749adaf64d0..d590f9998e3 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -103,6 +103,7 @@ pub enum RpcResponseError {
     VerifyError(LookupVerifyError),
     CustodyRequestError(#[allow(dead_code)] CustodyRequestError),
     BlockComponentCouplingError(CouplingError),
+    InternalError(String),
 }
 
 #[derive(Debug, PartialEq, Eq)]
@@ -1715,9 +1716,143 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let resp = self
             .data_columns_by_root_range_requests
             .on_response(id, rpc_event);
+        // This error implies we asked the peer for a specific root and it did not give it to us
+        // if let Some(Err(RpcResponseError::VerifyError(
+        //     LookupVerifyError::NotEnoughResponsesReturned { .. },
+        // ))) = resp
+        // {
+
+        // }
         self.on_rpc_response_result(id, "DataColumnsByRootRange", resp, peer_id, |b| b.len())
     }
 
+    fn request_columns_on_successful_blocks(
+        &mut self,
+        id: BlocksByRangeRequestId,
+        blocks: &Vec<Arc<SignedBeaconBlock<T::EthSpec>>>,
+    ) -> Result<(), RpcResponseError> {
+        let batch_epoch = id.batch_id();
+        // Return early if no columns are required for this epoch
+        if !matches!(
+            self.batch_type(batch_epoch),
+            ByRangeRequestType::BlocksAndColumns
+        ) {
+            return Ok(());
+        }
+        // Return early if this is a backfill batch, backfill batches are handled by range requests instead of root
+        if matches!(
+            id.parent_request_id.requester,
+            RangeRequestId::BackfillSync { .. }
+        ) {
+            return Ok(());
+        }
+        // todo(pawan): send the data column request as soon as you get each chunk to spread out requests
+        debug!(count = blocks.len(), "Received blocks from byrange query");
+        // We have blocks here, check if they need data columns and request them
+        let mut block_roots = Vec::new();
+
+        for block in blocks.iter() {
+            // Request columns only if the blob_kzg_commitments is non-empty
+            if let Ok(commitments) = block.message().body().blob_kzg_commitments() {
+                if !commitments.is_empty() {
+                    block_roots.push(block.canonical_root());
+                }
+            }
+        }
+        if block_roots.is_empty() {
+            // No blobs for the entire epoch, let the coupling logic know not to expect anything
+            // and return early
+            if let Some(req) = self
+                .components_by_range_requests
+                .get_mut(&id.parent_request_id)
+            {
+                if let Err(e) = req.no_columns_for_batch() {
+                    debug!(?e, "Created range request in inconsistent state");
+                    return Err(RpcResponseError::InternalError(e));
+                }
+                return Ok(());
+            } else {
+                return Err(RpcResponseError::InternalError(
+                    "Request sent without creating an entry".to_string(),
+                ));
+            }
+        }
+        // Generate the data column by root requests
+        let mut peer_to_columns: HashMap<PeerId, Vec<ColumnIndex>> = HashMap::new();
+        let mut no_peers_for_column: Vec<ColumnIndex> = Vec::new();
+        for column in self.chain.sampling_columns_for_epoch(batch_epoch).iter() {
+            let data_column = DataColumnSubnetId::new(*column);
+            if let Some(custody_peer) = self
+                .network_globals()
+                .peers
+                .read()
+                .good_custody_subnet_peer_range_sync(data_column, batch_epoch)
+                .next()
+            {
+                peer_to_columns
+                    .entry(*custody_peer)
+                    .or_default()
+                    .push(*column);
+            } else {
+                debug!(
+                    ?data_column,
+                    block_request_id=?id,
+                    "Not enough column peers for batch, need to retry"
+                );
+                no_peers_for_column.push(*column);
+            }
+        }
+
+        let mut data_column_requests = Vec::new();
+        for (peer, indices) in peer_to_columns.into_iter() {
+            let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
+                block_roots: block_roots.clone(),
+                indices: indices.clone(),
+            };
+
+            let requester = DataColumnsByRootRequester::RangeSync {
+                parent: id.parent_request_id,
+            };
+
+            data_column_requests.push((
+                self.send_data_columns_by_root_range_requests(
+                    peer,
+                    data_columns_by_root_request,
+                    requester,
+                    Span::none(),
+                )
+                .expect("should be able to send request"),
+                indices,
+            ));
+        }
+
+        if !no_peers_for_column.is_empty() {
+            let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
+                block_roots: block_roots.clone(),
+                indices: no_peers_for_column,
+            };
+
+            self.requests_to_retry
+                .insert(id.parent_request_id, data_columns_by_root_request);
+        }
+
+        if let Some(req) = self
+            .components_by_range_requests
+            .get_mut(&id.parent_request_id)
+        {
+            req.insert_column_request_after_block_request(
+                data_column_requests,
+                self.chain.sampling_columns_for_epoch(batch_epoch),
+            )
+            .expect("should be in the right state");
+        } else {
+            return Err(RpcResponseError::InternalError(
+                "Request sent without creating an entry".to_string(),
+            ));
+        }
+        Ok(())
+    }
+
     #[allow(clippy::type_complexity)]
     pub(crate) fn on_blocks_by_range_response(
         &mut self,
@@ -1727,104 +1862,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     ) -> Option<RpcResponseResult<Vec<Arc<SignedBeaconBlock<T::EthSpec>>>>> {
         let resp = self.blocks_by_range_requests.on_response(id, rpc_event);
         match &resp {
-            // todo(pawan): send the data column request as soon as you get each chunk to spread out requests
             Some(Ok((blocks, _))) => {
-                // Return early if this is a backfill batch, backfill batches are handled by range requests instead of root
-                if matches!(
-                    id.parent_request_id.requester,
-                    RangeRequestId::BackfillSync { .. }
-                ) {
-                    return self
-                        .on_rpc_response_result(id, "BlocksByRange", resp, peer_id, |b| b.len());
-                }
-                // We have blocks here, check if they need data columns and request them
-                let mut block_roots = Vec::new();
-                let batch_epoch = id.batch_id();
-                if !matches!(
-                    self.batch_type(batch_epoch),
-                    ByRangeRequestType::BlocksAndColumns
-                ) {
-                    return self
-                        .on_rpc_response_result(id, "BlocksByRange", resp, peer_id, |b| b.len());
-                }
-                for block in blocks.iter() {
-                    // Request columns only if the blob_kzg_commitments is non-empty
-                    if let Ok(commitments) = block.message().body().blob_kzg_commitments() {
-                        if !commitments.is_empty() {
-                            block_roots.push(block.canonical_root());
-                        }
-                    }
-                }
-                // Generate the data column by root requests
-                let mut peer_to_columns: HashMap<PeerId, Vec<ColumnIndex>> = HashMap::new();
-                let mut no_peers_for_column: Vec<ColumnIndex> = Vec::new();
-                for column in self.chain.sampling_columns_for_epoch(batch_epoch).iter() {
-                    let data_column = DataColumnSubnetId::new(*column);
-                    if let Some(custody_peer) = self
-                        .network_globals()
-                        .peers
-                        .read()
-                        .good_custody_subnet_peer_range_sync(data_column, batch_epoch)
-                        .next()
-                    {
-                        peer_to_columns
-                            .entry(*custody_peer)
-                            .or_default()
-                            .push(*column);
-                    } else {
-                        debug!(
-                            ?data_column,
-                            block_request_id=?id,
-                            "Not enough column peers for batch, need to retry"
-                        );
-                        no_peers_for_column.push(*column);
-                    }
-                }
-
-                // todo(pawan): no_peers_for_column nned to be requested once peers
-                // become available
-                let mut data_column_requests = Vec::new();
-                for (peer, indices) in peer_to_columns.into_iter() {
-                    let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
-                        block_roots: block_roots.clone(),
-                        indices: indices.clone(),
-                    };
-
-                    let requester = DataColumnsByRootRequester::RangeSync {
-                        parent: id.parent_request_id,
-                    };
-
-                    data_column_requests.push((
-                        self.send_data_columns_by_root_range_requests(
-                            peer,
-                            data_columns_by_root_request,
-                            requester,
-                            Span::none(),
-                        )
-                        .expect("should be able to send request"),
-                        indices,
-                    ));
-                }
-
-                if !no_peers_for_column.is_empty() {
-                    let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
-                        block_roots: block_roots.clone(),
-                        indices: no_peers_for_column,
-                    };
-
-                    self.requests_to_retry
-                        .insert(id.parent_request_id, data_columns_by_root_request);
-                }
-
-                if let Some(req) = self
-                    .components_by_range_requests
-                    .get_mut(&id.parent_request_id)
-                {
-                    req.insert_column_request_after_block_request(
-                        data_column_requests,
-                        self.chain.sampling_columns_for_epoch(batch_epoch),
-                    )
-                    .expect("should be in the right state");
+                if let Err(e) = self.request_columns_on_successful_blocks(id, blocks) {
+                    return Some(Err(e));
                 }
             }
             None => {}
diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
index 642b88c9d9d..17faee4fd9b 100644
--- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
@@ -146,7 +146,7 @@ impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRangeRequestItems<E> {
             .flatten()
             .any(|d| d.index == data_column.index && d.block_root() == block_root)
         {
-            tracing::debug!(
+            tracing::trace!(
                 ?data_column,
                 existing_items=?self.items,
                 "Duplicated data",
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 2d0362f2676..b955c0b0ab3 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -829,6 +829,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         let optimistic_epoch = align(optimistic_start_epoch);
 
         // advance the chain to the new validating epoch
+        debug!("Advancing chain");
         self.advance_chain(network, validating_epoch);
         if self.optimistic_start.is_none()
             && optimistic_epoch > self.processing_target
@@ -841,6 +842,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         self.state = ChainSyncingState::Syncing;
 
         // begin requesting blocks from the peer pool, until all peers are exhausted.
+        debug!("Requesting batches from inside start syncing");
         self.request_batches(network)?;
 
         // start processing batches if needed
@@ -965,13 +967,18 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         src: &str,
     ) -> ProcessingResult {
         debug!(?src, "In attempt_send_awaiting download batches");
-        // Check all batches in AwaitingDownload state and see if they can be sent
-        for (batch_id, batch) in self.batches.iter() {
-            if matches!(batch.state(), BatchState::AwaitingDownload) {
-                debug!(?src, ?batch_id, "Sending batch");
-                if self.good_peers_on_sampling_subnets(*batch_id, network) {
-                    return self.send_batch(network, *batch_id);
-                }
+        // Collect all batches in AwaitingDownload state and see if they can be sent
+        let awaiting_downloads: Vec<_> = self
+            .batches
+            .iter()
+            .filter(|(_, batch)| matches!(batch.state(), BatchState::AwaitingDownload))
+            .map(|(batch_id, _)| batch_id)
+            .copied()
+            .collect();
+        for batch_id in awaiting_downloads {
+            debug!(?src, ?batch_id, "Sending batch");
+            if self.good_peers_on_sampling_subnets(batch_id, network) {
+                self.send_batch(network, batch_id)?;
             }
         }
         Ok(KeepChain)
@@ -1127,12 +1134,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         if !matches!(self.state, ChainSyncingState::Syncing) {
             return Ok(KeepChain);
         }
+        debug!("In request batches");
 
         // find the next pending batch and request it from the peer
 
         // check if we have the batch for our optimistic start. If not, request it first.
         // We wait for this batch before requesting any other batches.
         if let Some(epoch) = self.optimistic_start {
+            debug!("In request batches optimistic start");
             if !self.good_peers_on_sampling_subnets(epoch, network) {
                 debug!("Waiting for peers to be available on sampling column subnets");
                 return Ok(KeepChain);
@@ -1143,10 +1152,15 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH, batch_type);
                 entry.insert(optimistic_batch);
                 self.send_batch(network, epoch)?;
+            } else {
+                debug!(batch=?self.batches.get(&epoch), "Optimistic batch info");
+                self.attempt_send_awaiting_download_batches(network, "optimisitc");
             }
             return Ok(KeepChain);
         }
 
+        debug!("In request batches checking if can send batch");
+
         // find the next pending batch and request it from the peer
         // Note: for this function to not infinite loop we must:
         // - If `include_next_batch` returns Some we MUST increase the count of batches that are
@@ -1193,6 +1207,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     /// Creates the next required batch from the chain. If there are no more batches required,
     /// `false` is returned.
     fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
+        debug!("In include_next_batch");
+
         // don't request batches beyond the target head slot
         if self
             .to_be_downloaded
@@ -1211,13 +1227,20 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 BatchState::Downloading(..) | BatchState::AwaitingProcessing(..)
             )
         };
-        if self
+        let in_buffer_batches: Vec<_> = self
             .batches
             .iter()
             .filter(|&(_epoch, batch)| in_buffer(batch))
-            .count()
-            > BATCH_BUFFER_SIZE as usize
-        {
+            .map(|(epoch, _)| epoch)
+            .collect();
+
+        if in_buffer_batches.len() > BATCH_BUFFER_SIZE as usize {
+            debug!(
+                ?in_buffer_batches,
+                ?self.processing_target,
+                ?self.to_be_downloaded, "Too many batches already"
+            );
+
             return None;
         }
 
@@ -1230,6 +1253,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             return None;
         }
 
+        debug!(?self.to_be_downloaded, "Trying to check next batch id");
         // If no batch needs a retry, attempt to send the batch of the next epoch to download
         let next_batch_id = self.to_be_downloaded;
         // this batch could have been included already being an optimistic batch

From 27d0b3666d1b65674357f3377d478e29fd90de47 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Tue, 26 Aug 2025 16:06:27 -0700
Subject: [PATCH 16/49] Remove debug statements

---
 .../src/peer_manager/mod.rs                   | 50 +------------------
 .../src/rpc/self_limiter.rs                   |  4 +-
 .../lighthouse_network/src/service/mod.rs     |  2 +-
 .../network_beacon_processor/rpc_methods.rs   | 26 +++++-----
 4 files changed, 18 insertions(+), 64 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs
index 0f0249eed10..93515ed5f6b 100644
--- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs
+++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs
@@ -729,7 +729,7 @@ impl<E: EthSpec> PeerManager<E> {
                 }
             } else {
                 // we have no meta-data for this peer, update
-                debug!(%peer_id, new_seq_no = meta_data.seq_number(), cgc=?meta_data.custody_group_count().ok(), "Obtained peer's metadata");
+                debug!(%peer_id, new_seq_no = meta_data.seq_number(), "Obtained peer's metadata");
             }
 
             let known_custody_group_count = peer_info
@@ -745,7 +745,7 @@ impl<E: EthSpec> PeerManager<E> {
                 if let Some(custody_group_count) = custody_group_count_opt {
                     match self.compute_peer_custody_groups(peer_id, custody_group_count) {
                         Ok(custody_groups) => {
-                            let custody_subnets: HashSet<DataColumnSubnetId> = custody_groups
+                            let custody_subnets = custody_groups
                                 .into_iter()
                                 .flat_map(|custody_index| {
                                     self.subnets_by_custody_group
@@ -761,13 +761,6 @@ impl<E: EthSpec> PeerManager<E> {
                                         })
                                 })
                                 .collect();
-                            let cgc = if custody_subnets.len() == 128 {
-                                "supernode".to_string()
-                            } else {
-                                format!("{:?}", custody_subnets)
-                            };
-
-                            debug!(cgc, ?peer_id, "Peer custodied subnets");
                             peer_info.set_custody_subnets(custody_subnets);
 
                             updated_cgc = Some(custody_group_count) != known_custody_group_count;
@@ -956,42 +949,6 @@ impl<E: EthSpec> PeerManager<E> {
         }
     }
 
-    /// Run discovery query for additional custody peers if we fall below `TARGET_PEERS`.
-    fn maintain_custody_peers(&mut self) {
-        let subnets_to_discover: Vec<SubnetDiscovery> = self
-            .network_globals
-            .sampling_subnets()
-            .iter()
-            .filter_map(|custody_subnet| {
-                if self
-                    .network_globals
-                    .peers
-                    .read()
-                    .good_range_sync_custody_subnet_peers(*custody_subnet)
-                    .count()
-                    < 2
-                {
-                    Some(SubnetDiscovery {
-                        subnet: Subnet::DataColumn(*custody_subnet),
-                        min_ttl: None,
-                    })
-                } else {
-                    None
-                }
-            })
-            .collect();
-
-        // request the subnet query from discovery
-        if !subnets_to_discover.is_empty() {
-            debug!(
-                subnets = ?subnets_to_discover.iter().map(|s| s.subnet).collect::<Vec<_>>(),
-                "Making subnet queries for maintaining custody peers"
-            );
-            self.events
-                .push(PeerManagerEvent::DiscoverSubnetPeers(subnets_to_discover));
-        }
-    }
-
     fn maintain_trusted_peers(&mut self) {
         let trusted_peers = self.trusted_peers.clone();
         for trusted_peer in trusted_peers {
@@ -1314,9 +1271,6 @@ impl<E: EthSpec> PeerManager<E> {
         // Update peer score metrics;
         self.update_peer_score_metrics();
 
-        // Maintain minimum count for custody peers.
-        self.maintain_custody_peers();
-
         // Maintain minimum count for sync committee peers.
         self.maintain_sync_committee_peers();
 
diff --git a/beacon_node/lighthouse_network/src/rpc/self_limiter.rs b/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
index 6b1f759c795..90e2db91357 100644
--- a/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
+++ b/beacon_node/lighthouse_network/src/rpc/self_limiter.rs
@@ -90,7 +90,7 @@ impl<Id: ReqId, E: EthSpec> SelfRateLimiter<Id, E> {
         let protocol = req.versioned_protocol().protocol();
         // First check that there are not already other requests waiting to be sent.
         if let Some(queued_requests) = self.delayed_requests.get_mut(&(peer_id, protocol)) {
-            tracing::trace!(%peer_id, protocol = %req.protocol(), "Self rate limiting since there are already other requests waiting to be sent");
+            debug!(%peer_id, protocol = %req.protocol(), "Self rate limiting since there are already other requests waiting to be sent");
             queued_requests.push_back(QueuedRequest {
                 req,
                 request_id,
@@ -134,7 +134,7 @@ impl<Id: ReqId, E: EthSpec> SelfRateLimiter<Id, E> {
             && let Some(count) = active_request.get(&req.protocol())
             && *count >= MAX_CONCURRENT_REQUESTS
         {
-            tracing::trace!(
+            debug!(
                 %peer_id,
                 protocol = %req.protocol(),
                 "Self rate limiting due to the number of concurrent requests"
diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs
index efac129724b..eebc2f02009 100644
--- a/beacon_node/lighthouse_network/src/service/mod.rs
+++ b/beacon_node/lighthouse_network/src/service/mod.rs
@@ -1909,7 +1909,7 @@ impl<E: EthSpec> Network<E> {
                         }
                     },
                 };
-                tracing::trace!(our_addr = %local_addr, from = %send_back_addr, error = error_repr, "Failed incoming connection");
+                debug!(our_addr = %local_addr, from = %send_back_addr, error = error_repr, "Failed incoming connection");
                 None
             }
             SwarmEvent::OutgoingConnectionError {
diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
index 8c0acb255c0..85e4f046410 100644
--- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs
@@ -228,7 +228,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     send_block_count += 1;
                 }
                 Ok(None) => {
-                    tracing::trace!(
+                    debug!(
                         %peer_id,
                         request_root = ?root,
                         "Peer requested unknown block"
@@ -449,7 +449,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             }
         }
 
-        tracing::trace!(
+        debug!(
             %peer_id,
             request = ?request.data_column_ids,
             returned = send_data_column_count,
@@ -495,7 +495,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         inbound_request_id: InboundRequestId,
         req: LightClientUpdatesByRangeRequest,
     ) -> Result<(), (RpcErrorResponse, &'static str)> {
-        tracing::trace!(
+        debug!(
             %peer_id,
             count = req.count,
             start_period = req.start_period,
@@ -538,7 +538,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         let lc_updates_sent = lc_updates.len();
 
         if lc_updates_sent < req.count as usize {
-            tracing::trace!(
+            debug!(
                 peer = %peer_id,
                 info = "Failed to return all requested light client updates. The peer may have requested data ahead of whats currently available",
                 start_period = req.start_period,
@@ -547,7 +547,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 "LightClientUpdatesByRange outgoing response processed"
             );
         } else {
-            tracing::trace!(
+            debug!(
                 peer = %peer_id,
                 start_period = req.start_period,
                 requested = req.count,
@@ -704,7 +704,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         let req_start_slot = *req.start_slot();
         let req_count = *req.count();
 
-        tracing::trace!(
+        debug!(
             %peer_id,
             count = req_count,
             start_slot = %req_start_slot,
@@ -737,7 +737,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
 
         let log_results = |peer_id, blocks_sent| {
             if blocks_sent < (req_count as usize) {
-                tracing::trace!(
+                debug!(
                     %peer_id,
                     msg = "Failed to return all requested blocks",
                     start_slot = %req_start_slot,
@@ -747,7 +747,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     "BlocksByRange outgoing response processed"
                 );
             } else {
-                tracing::trace!(
+                debug!(
                     %peer_id,
                     start_slot = %req_start_slot,
                     %current_slot,
@@ -891,7 +891,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             elapsed,
         );
 
-        tracing::trace!(
+        debug!(
             req_type,
             start_slot = %req_start_slot,
             req_count,
@@ -995,7 +995,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         inbound_request_id: InboundRequestId,
         req: BlobsByRangeRequest,
     ) -> Result<(), (RpcErrorResponse, &'static str)> {
-        tracing::trace!(
+        debug!(
             ?peer_id,
             count = req.count,
             start_slot = req.start_slot,
@@ -1048,7 +1048,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             .unwrap_or_else(|_| self.chain.slot_clock.genesis_slot());
 
         let log_results = |peer_id, req: BlobsByRangeRequest, blobs_sent| {
-            tracing::trace!(
+            debug!(
                 %peer_id,
                 start_slot = req.start_slot,
                 %current_slot,
@@ -1137,7 +1137,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
         inbound_request_id: InboundRequestId,
         req: DataColumnsByRangeRequest,
     ) -> Result<(), (RpcErrorResponse, &'static str)> {
-        tracing::trace!(
+        debug!(
             %peer_id,
             count = req.count,
             start_slot = req.start_slot,
@@ -1242,7 +1242,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
             .slot()
             .unwrap_or_else(|_| self.chain.slot_clock.genesis_slot());
 
-        tracing::trace!(
+        debug!(
             %peer_id,
             start_slot = req.start_slot,
             %current_slot,

From a97cf880f80f529d1a4b726aad8631add28acb45 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Tue, 26 Aug 2025 18:53:51 -0700
Subject: [PATCH 17/49] Add docs

---
 .../src/sync/block_sidecar_coupling.rs        |  55 +++++++---
 .../network/src/sync/network_context.rs       | 102 +++++++++++++-----
 2 files changed, 112 insertions(+), 45 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index e5bb84813a3..9db4c5d5e07 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -56,11 +56,18 @@ enum RangeBlockDataRequest<E: EthSpec> {
         expected_custody_columns: Vec<ColumnIndex>,
         attempt: usize,
     },
+    /// These are data columns fetched by root instead of by range like the previous variant.
+    ///
+    /// Note: this variant starts out in an uninitialized state because we typically make
+    /// the column requests by root only **after** we have fetched the corresponding blocks.
+    /// We can initialize this variant only after the columns requests have been made.
     DataColumnsFromRoot {
         requests: HashMap<
             DataColumnsByRootRequestId,
             ByRangeRequest<DataColumnsByRootRequestId, DataColumnSidecarList<E>>,
         >,
+        // Indicates if this variant has been initialized by sending columns by root requests.
+        // We only start expecting columns once this is set to true.
         init: bool,
         /// The column indices corresponding to the request
         column_peers: HashMap<DataColumnsByRootRequestId, Vec<ColumnIndex>>,
@@ -89,6 +96,8 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
     /// * `blocks_req_id` - Request ID for the blocks
     /// * `blobs_req_id` - Optional request ID for blobs (pre-Fulu fork)
     /// * `data_columns` - Optional tuple of (request_id->column_indices pairs, expected_custody_columns) for Fulu fork
+    /// * `request_columns_by_root` - Creates an uninitialized `RangeBlockDataRequest::DataColumnsFromRoot` variant if this is true.
+    ///    Note: this is only relevant is `data_columns == None`.
     #[allow(clippy::type_complexity)]
     pub fn new(
         blocks_req_id: BlocksByRangeRequestId,
@@ -97,7 +106,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             Vec<(DataColumnsByRangeRequestId, Vec<ColumnIndex>)>,
             Vec<ColumnIndex>,
         )>,
-        data_columns_from_root: bool,
+        request_columns_by_root: bool,
         request_span: Span,
     ) -> Self {
         let block_peer = blocks_req_id.peer_id;
@@ -114,7 +123,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 expected_custody_columns,
                 attempt: 0,
             }
-        } else if data_columns_from_root {
+        } else if request_columns_by_root {
             RangeBlockDataRequest::DataColumnsFromRoot {
                 requests: HashMap::new(),
                 init: false,
@@ -134,6 +143,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
     }
 
+    /// Returns the peers that we requested the blocks, blobs and columns for this component.
     pub fn responsible_peers(&self) -> ResponsiblePeers {
         ResponsiblePeers {
             block_blob: self.block_peer,
@@ -174,8 +184,8 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
     }
 
-    /// `column_requests`: each element represents a request id and the columns requested under that request.
-    pub fn insert_column_request_after_block_request(
+    /// Initialize the entries for this component after the column requests have been sent.
+    pub fn initialize_data_columns_from_root_component(
         &mut self,
         column_requests: Vec<(DataColumnsByRootRequestId, Vec<ColumnIndex>)>,
         custody_columns: &[ColumnIndex],
@@ -210,6 +220,13 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
     }
 
+    /// This modifies the internal variant to `NoData`.
+    ///
+    /// Once we make the block request for a batch and get responses, it is possible
+    /// that the entire batch contained no blobs based on the values of `expected_kzg_commitments`.
+    ///
+    /// At this point, we do not need to make any requests and the blocks correspond to all the
+    /// available data for this batch. Hence, we indicate here that this component requires no data.
     pub fn no_columns_for_batch(&mut self) -> Result<(), String> {
         match self.block_data_request {
             RangeBlockDataRequest::DataColumnsFromRoot { .. } => {
@@ -336,17 +353,13 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     spec,
                 ))
             }
-            RangeBlockDataRequest::DataColumnsFromRoot {
-                init,
-                attempt,
-                column_peers,
-                expected_custody_columns,
+
+            RangeBlockDataRequest::DataColumns {
                 requests,
+                expected_custody_columns,
+                column_peers,
+                attempt,
             } => {
-                if !*init {
-                    return None;
-                }
-
                 let mut data_columns = vec![];
                 let mut column_to_peer_id: HashMap<u64, PeerId> = HashMap::new();
                 for req in requests.values() {
@@ -393,12 +406,20 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
 
                 Some(resp)
             }
-            RangeBlockDataRequest::DataColumns {
-                requests,
-                expected_custody_columns,
-                column_peers,
+            // Reuse same logic that we use for coupling data columns for now.
+            // todo(pawan): we should never get a coupling error here, so simplify this
+            // variant's handling.
+            RangeBlockDataRequest::DataColumnsFromRoot {
+                init,
                 attempt,
+                column_peers,
+                expected_custody_columns,
+                requests,
             } => {
+                if !*init {
+                    return None;
+                }
+
                 let mut data_columns = vec![];
                 let mut column_to_peer_id: HashMap<u64, PeerId> = HashMap::new();
                 for req in requests.values() {
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index d590f9998e3..c99008ebf18 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -559,9 +559,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     }
 
     /// Try to make all the requests that we failed to make earlier because of lack of peers
-    /// in the required subnets.
+    /// in the required columns.
     ///
-    /// This function must be manually invoked at regular intervals.
+    /// This function must be manually invoked at regular intervals or when a new peer
+    /// gets added.
     pub fn retry_pending_requests(&mut self) -> Result<(), String> {
         let active_requests = self.active_request_count_by_peer();
 
@@ -603,21 +604,27 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                                 requester,
                                 Span::none(),
                             )
-                            .expect("should be able to send request"),
+                            .map_err(|e| {
+                                format!("Failed to send data columns by root request {:?}", e)
+                            }),
                             indices,
-                        ));
+                        )?);
                     }
                     // we have sent out requests to peers, register these requests with the coupling service.
                     if let Some(req) = self.components_by_range_requests.get_mut(&parent_request) {
-                        req.insert_column_request_after_block_request(
+                        req.initialize_data_columns_from_root_component(
                             data_column_requests,
                             self.chain
                                 .sampling_columns_for_epoch(parent_request.requester.batch_id()),
                         )
-                        .expect("should be in the right state");
+                        .map_err(|e| {
+                            format!(
+                                "Inconsistent state when inserting columns by root request {:?}",
+                                e
+                            )
+                        })?;
                     }
                     debug!(?requests, "Successfully retried requests");
-                    // Successfully processed, don't keep this entry
                 }
                 Err(err) => {
                     debug!(
@@ -625,7 +632,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                         ?parent_request,
                         "Failed to retry request, no peers in subnets",
                     );
-                    // Failed to process, keep this entry for next retry
+                    // Still no peers, keep this entry for next retry
                     entries_to_keep.push((parent_request, requests));
                 }
             }
@@ -769,6 +776,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     self.chain.sampling_columns_for_epoch(epoch).to_vec(),
                 )
             }),
+            // We are requesting data columns by range here
             false,
             range_request_span,
         );
@@ -778,6 +786,9 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     }
 
     /// A blocks by range request sent by the range sync algorithm
+    ///
+    /// This function is used when we want to request data columns by root instead of range.
+    /// Pre-fulu, it works similar to `Self::block_components_by_range_request`.
     pub fn block_components_by_range_request_without_components(
         &mut self,
         batch_type: ByRangeRequestType,
@@ -854,14 +865,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             None
         };
 
-        let data_columns_by_root = matches!(batch_type, ByRangeRequestType::BlocksAndColumns);
 
-        debug!(?requester, data_columns_by_root, "Batch type");
         let info = RangeBlockComponentsRequest::new(
             blocks_req_id,
             blobs_req_id,
             None,
-            data_columns_by_root,
+            // request data columns by root only if this batch requires requesting columns
+            matches!(batch_type, ByRangeRequestType::BlocksAndColumns),
             range_request_span,
         );
         self.components_by_range_requests.insert(id, info);
@@ -1473,6 +1483,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         Ok((id, requested_columns))
     }
 
+    /// Send `DataColumnsByRoot` requests for progressing range sync.
     fn send_data_columns_by_root_range_requests(
         &mut self,
         peer_id: PeerId,
@@ -1492,7 +1503,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 request
                     .clone()
                     .try_into_request(self.fork_context.current_fork_name(), &self.chain.spec)
-                    .expect("should work"),
+                    .map_err(|e| RpcRequestSendError::InternalError(e.to_string()))?,
             ),
             app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRoot(id)),
         })
@@ -1716,16 +1727,27 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let resp = self
             .data_columns_by_root_range_requests
             .on_response(id, rpc_event);
-        // This error implies we asked the peer for a specific root and it did not give it to us
-        // if let Some(Err(RpcResponseError::VerifyError(
-        //     LookupVerifyError::NotEnoughResponsesReturned { .. },
-        // ))) = resp
-        // {
-
-        // }
         self.on_rpc_response_result(id, "DataColumnsByRootRange", resp, peer_id, |b| b.len())
     }
 
+    /// Requests data columns for the given blocks by root.
+    ///
+    /// We request by root because it is much easier to reason about
+    /// and handle for failure cases when we ask for the same roots that
+    /// we are trying to sync the blocks for.
+    ///
+    /// This is specially relevant in periods of non-finality when there are multiple
+    /// head chains to sync.
+    ///
+    /// This function piggybacks on the existing parent block request and inserts the
+    /// column requests made into `self.components_by_range_requests` such that when
+    /// the column requests complete, we return the coupled batch to range sync to progress.
+    ///
+    /// If there are no peers to serve the column requests, we add them to a queue for retrying
+    /// the requests once more peers become available.
+    ///
+    /// Note: we do not use the by root syncing mechanism for backfill since there is only
+    /// one canonical chain to sync.
     fn request_columns_on_successful_blocks(
         &mut self,
         id: BlocksByRangeRequestId,
@@ -1746,11 +1768,11 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         ) {
             return Ok(());
         }
-        // todo(pawan): send the data column request as soon as you get each chunk to spread out requests
         debug!(count = blocks.len(), "Received blocks from byrange query");
-        // We have blocks here, check if they need data columns and request them
+
         let mut block_roots = Vec::new();
 
+        // We have blocks here, check if they need data columns and request them
         for block in blocks.iter() {
             // Request columns only if the blob_kzg_commitments is non-empty
             if let Ok(commitments) = block.message().body().blob_kzg_commitments() {
@@ -1759,9 +1781,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 }
             }
         }
+
+        // No blobs for the entire epoch, let the coupling logic know not to expect anything
+        // and return early
         if block_roots.is_empty() {
-            // No blobs for the entire epoch, let the coupling logic know not to expect anything
-            // and return early
             if let Some(req) = self
                 .components_by_range_requests
                 .get_mut(&id.parent_request_id)
@@ -1773,7 +1796,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 return Ok(());
             } else {
                 return Err(RpcResponseError::InternalError(
-                    "Request sent without creating an entry".to_string(),
+                    "Block request sent without creating a components_by_range entry".to_string(),
                 ));
             }
         }
@@ -1803,6 +1826,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             }
         }
 
+        // Send the requests for all columns that we have peers for
         let mut data_column_requests = Vec::new();
         for (peer, indices) in peer_to_columns.into_iter() {
             let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
@@ -1821,11 +1845,17 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     requester,
                     Span::none(),
                 )
-                .expect("should be able to send request"),
+                .map_err(|e| {
+                    RpcResponseError::InternalError(format!(
+                        "Failed to send data columns by root request {:?}",
+                        e
+                    ))
+                }),
                 indices,
-            ));
+            )?);
         }
 
+        // There are columns for which we have no peers, queue them up for retry later
         if !no_peers_for_column.is_empty() {
             let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
                 block_roots: block_roots.clone(),
@@ -1836,15 +1866,21 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 .insert(id.parent_request_id, data_columns_by_root_request);
         }
 
+        // Insert the requests into the existing block parent request
         if let Some(req) = self
             .components_by_range_requests
             .get_mut(&id.parent_request_id)
         {
-            req.insert_column_request_after_block_request(
+            req.initialize_data_columns_from_root_component(
                 data_column_requests,
                 self.chain.sampling_columns_for_epoch(batch_epoch),
             )
-            .expect("should be in the right state");
+            .map_err(|e| {
+                format!(
+                    "Inconsistent state when inserting columns by root request {:?}",
+                    e
+                )
+            })?;
         } else {
             return Err(RpcResponseError::InternalError(
                 "Request sent without creating an entry".to_string(),
@@ -1863,6 +1899,16 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let resp = self.blocks_by_range_requests.on_response(id, rpc_event);
         match &resp {
             Some(Ok((blocks, _))) => {
+                // On receving a successful response for a blocks by range request,
+                // request the corresponding data columns for this batch by root (if required).
+                //
+                // We request the columns by root instead of by range to avoid peers responding
+                // with the columns corresponding to their view of the canonical chain
+                // instead of the chain that we are trying to sync. Requesting by root allows
+                // us to be more specific and reduces the number of failure cases we have to handle.
+                //
+                // This is specially relevant when we are syncing at times when there are a lot of
+                // head chains in a non-finality scenario.
                 if let Err(e) = self.request_columns_on_successful_blocks(id, blocks) {
                     return Some(Err(e));
                 }

From 05adb7195d7c1eb320ac2cd532cdd4818536c8d2 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 27 Aug 2025 14:26:28 -0700
Subject: [PATCH 18/49] Fix bug with partial column responses before all column
 requests sent

---
 .../src/sync/block_sidecar_coupling.rs        | 44 ++++++++-------
 .../network/src/sync/network_context.rs       | 53 ++++++++++---------
 2 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 9db4c5d5e07..59c0ebc81d2 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -10,7 +10,10 @@ use lighthouse_network::{
         DataColumnsByRootRequestId,
     },
 };
-use std::{collections::HashMap, sync::Arc};
+use std::{
+    collections::{HashMap, HashSet},
+    sync::Arc,
+};
 use tracing::Span;
 use types::{
     BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec,
@@ -66,12 +69,11 @@ enum RangeBlockDataRequest<E: EthSpec> {
             DataColumnsByRootRequestId,
             ByRangeRequest<DataColumnsByRootRequestId, DataColumnSidecarList<E>>,
         >,
-        // Indicates if this variant has been initialized by sending columns by root requests.
-        // We only start expecting columns once this is set to true.
-        init: bool,
+        // Indicates if we have made column requests for each of the `expected_custody_columns` or not
+        all_requests_made: bool,
         /// The column indices corresponding to the request
         column_peers: HashMap<DataColumnsByRootRequestId, Vec<ColumnIndex>>,
-        expected_custody_columns: Vec<ColumnIndex>,
+        expected_custody_columns: HashSet<ColumnIndex>,
         attempt: usize,
     },
 }
@@ -106,7 +108,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             Vec<(DataColumnsByRangeRequestId, Vec<ColumnIndex>)>,
             Vec<ColumnIndex>,
         )>,
-        request_columns_by_root: bool,
+        data_columns_by_root: Option<HashSet<ColumnIndex>>,
         request_span: Span,
     ) -> Self {
         let block_peer = blocks_req_id.peer_id;
@@ -123,13 +125,13 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 expected_custody_columns,
                 attempt: 0,
             }
-        } else if request_columns_by_root {
+        } else if let Some(expected_custody_columns) = data_columns_by_root {
             RangeBlockDataRequest::DataColumnsFromRoot {
                 requests: HashMap::new(),
-                init: false,
+                all_requests_made: false,
                 attempt: 0,
                 column_peers: HashMap::new(),
-                expected_custody_columns: Vec::new(),
+                expected_custody_columns,
             }
         } else {
             RangeBlockDataRequest::NoData
@@ -188,7 +190,6 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
     pub fn initialize_data_columns_from_root_component(
         &mut self,
         column_requests: Vec<(DataColumnsByRootRequestId, Vec<ColumnIndex>)>,
-        custody_columns: &[ColumnIndex],
     ) -> Result<(), String> {
         // Nothing to insert, do not initialize
         if column_requests.is_empty() {
@@ -196,9 +197,9 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         }
         match &mut self.block_data_request {
             RangeBlockDataRequest::DataColumnsFromRoot {
-                init,
                 requests,
                 attempt: _,
+                all_requests_made,
                 column_peers,
                 expected_custody_columns,
             } => {
@@ -206,13 +207,14 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     requests.insert(request, ByRangeRequest::Active(request));
                     column_peers.insert(request, peers);
                 }
-                // expected custody columns should be populated only once during initialization
-                if !*init {
-                    for column in custody_columns {
-                        expected_custody_columns.push(*column);
+
+                if !*all_requests_made {
+                    let mut all_columns_requested = HashSet::new();
+                    for columns in column_peers.values() {
+                        all_columns_requested.extend(columns.iter());
                     }
+                    *all_requests_made = all_columns_requested == *expected_custody_columns;
                 }
-                *init = true;
 
                 Ok(())
             }
@@ -410,13 +412,15 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             // todo(pawan): we should never get a coupling error here, so simplify this
             // variant's handling.
             RangeBlockDataRequest::DataColumnsFromRoot {
-                init,
+                all_requests_made,
                 attempt,
                 column_peers,
                 expected_custody_columns,
                 requests,
             } => {
-                if !*init {
+                // Do not couple until requests covering all required columns
+                // have been made
+                if !*all_requests_made {
                     return None;
                 }
 
@@ -441,11 +445,13 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     }
                 }
 
+                let expected_custody_columns: Vec<_> =
+                    expected_custody_columns.iter().copied().collect();
                 let resp = Self::responses_with_custody_columns(
                     blocks.to_vec(),
                     data_columns,
                     column_to_peer_id,
-                    expected_custody_columns,
+                    &expected_custody_columns,
                     *attempt,
                 );
 
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index c99008ebf18..c6bd3ce2f57 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -606,16 +606,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                             )
                             .map_err(|e| {
                                 format!("Failed to send data columns by root request {:?}", e)
-                            }),
+                            })?,
                             indices,
-                        )?);
+                        ));
                     }
                     // we have sent out requests to peers, register these requests with the coupling service.
                     if let Some(req) = self.components_by_range_requests.get_mut(&parent_request) {
                         req.initialize_data_columns_from_root_component(
                             data_column_requests,
-                            self.chain
-                                .sampling_columns_for_epoch(parent_request.requester.batch_id()),
                         )
                         .map_err(|e| {
                             format!(
@@ -777,7 +775,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 )
             }),
             // We are requesting data columns by range here
-            false,
+            None,
             range_request_span,
         );
         self.components_by_range_requests.insert(id, info);
@@ -865,13 +863,19 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             None
         };
 
-
+        let epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
         let info = RangeBlockComponentsRequest::new(
             blocks_req_id,
             blobs_req_id,
             None,
             // request data columns by root only if this batch requires requesting columns
-            matches!(batch_type, ByRangeRequestType::BlocksAndColumns),
+            if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
+                Some(HashSet::from_iter(
+                    self.chain.sampling_columns_for_epoch(epoch).iter().copied(),
+                ))
+            } else {
+                None
+            },
             range_request_span,
         );
         self.components_by_range_requests.insert(id, info);
@@ -1817,11 +1821,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     .or_default()
                     .push(*column);
             } else {
-                debug!(
-                    ?data_column,
-                    block_request_id=?id,
-                    "Not enough column peers for batch, need to retry"
-                );
                 no_peers_for_column.push(*column);
             }
         }
@@ -1850,13 +1849,18 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                         "Failed to send data columns by root request {:?}",
                         e
                     ))
-                }),
+                })?,
                 indices,
-            )?);
+            ));
         }
 
         // There are columns for which we have no peers, queue them up for retry later
         if !no_peers_for_column.is_empty() {
+            debug!(
+                block_request_id=?id,
+                ?no_peers_for_column,
+                "Not enough column peers for batch, will retry request"
+            );
             let data_columns_by_root_request = DataColumnsByRootBatchBlockRequest {
                 block_roots: block_roots.clone(),
                 indices: no_peers_for_column,
@@ -1871,16 +1875,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .components_by_range_requests
             .get_mut(&id.parent_request_id)
         {
-            req.initialize_data_columns_from_root_component(
-                data_column_requests,
-                self.chain.sampling_columns_for_epoch(batch_epoch),
-            )
-            .map_err(|e| {
-                format!(
-                    "Inconsistent state when inserting columns by root request {:?}",
-                    e
-                )
-            })?;
+            req.initialize_data_columns_from_root_component(data_column_requests)
+                .map_err(|e| {
+                    RpcResponseError::InternalError(format!(
+                        "Inconsistent state when inserting columns by root request {:?}",
+                        e
+                    ))
+                })?;
         } else {
             return Err(RpcResponseError::InternalError(
                 "Request sent without creating an entry".to_string(),
@@ -1910,6 +1911,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 // This is specially relevant when we are syncing at times when there are a lot of
                 // head chains in a non-finality scenario.
                 if let Err(e) = self.request_columns_on_successful_blocks(id, blocks) {
+                    debug!(
+                        ?e,
+                        "Error requesting columns on succesful blocks by range request"
+                    );
                     return Some(Err(e));
                 }
             }

From b4bc7fed69f6cff410624b68e73c89d33d0b0f89 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 27 Aug 2025 14:30:46 -0700
Subject: [PATCH 19/49] Remove more debug logs

---
 .../network/src/sync/range_sync/chain.rs      | 21 +------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index b955c0b0ab3..a31a24f7d08 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -966,7 +966,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
         src: &str,
     ) -> ProcessingResult {
-        debug!(?src, "In attempt_send_awaiting download batches");
         // Collect all batches in AwaitingDownload state and see if they can be sent
         let awaiting_downloads: Vec<_> = self
             .batches
@@ -976,7 +975,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             .copied()
             .collect();
         for batch_id in awaiting_downloads {
-            debug!(?src, ?batch_id, "Sending batch");
             if self.good_peers_on_sampling_subnets(batch_id, network) {
                 self.send_batch(network, batch_id)?;
             }
@@ -991,7 +989,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         batch_id: BatchId,
     ) -> ProcessingResult {
         let _guard = self.span.clone().entered();
-        debug!(batch_epoch = %batch_id, "Requesting batch");
         let batch_state = self.visualize_batch_state();
         if let Some(batch) = self.batches.get_mut(&batch_id) {
             let (request, batch_type) = batch.to_blocks_by_range_request();
@@ -1121,7 +1118,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
     ) -> Result<KeepChain, RemoveChain> {
         let _guard = self.span.clone().entered();
-        debug!("Resuming chain");
         // Request more batches if needed.
         self.request_batches(network)?;
         // If there is any batch ready for processing, send it.
@@ -1134,14 +1130,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         if !matches!(self.state, ChainSyncingState::Syncing) {
             return Ok(KeepChain);
         }
-        debug!("In request batches");
-
         // find the next pending batch and request it from the peer
 
         // check if we have the batch for our optimistic start. If not, request it first.
         // We wait for this batch before requesting any other batches.
         if let Some(epoch) = self.optimistic_start {
-            debug!("In request batches optimistic start");
             if !self.good_peers_on_sampling_subnets(epoch, network) {
                 debug!("Waiting for peers to be available on sampling column subnets");
                 return Ok(KeepChain);
@@ -1153,14 +1146,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 entry.insert(optimistic_batch);
                 self.send_batch(network, epoch)?;
             } else {
-                debug!(batch=?self.batches.get(&epoch), "Optimistic batch info");
-                self.attempt_send_awaiting_download_batches(network, "optimisitc");
+                self.attempt_send_awaiting_download_batches(network, "optimistic");
             }
             return Ok(KeepChain);
         }
 
-        debug!("In request batches checking if can send batch");
-
         // find the next pending batch and request it from the peer
         // Note: for this function to not infinite loop we must:
         // - If `include_next_batch` returns Some we MUST increase the count of batches that are
@@ -1207,8 +1197,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     /// Creates the next required batch from the chain. If there are no more batches required,
     /// `false` is returned.
     fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
-        debug!("In include_next_batch");
-
         // don't request batches beyond the target head slot
         if self
             .to_be_downloaded
@@ -1235,12 +1223,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             .collect();
 
         if in_buffer_batches.len() > BATCH_BUFFER_SIZE as usize {
-            debug!(
-                ?in_buffer_batches,
-                ?self.processing_target,
-                ?self.to_be_downloaded, "Too many batches already"
-            );
-
             return None;
         }
 
@@ -1253,7 +1235,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             return None;
         }
 
-        debug!(?self.to_be_downloaded, "Trying to check next batch id");
         // If no batch needs a retry, attempt to send the batch of the next epoch to download
         let next_batch_id = self.to_be_downloaded;
         // this batch could have been included already being an optimistic batch

From 73313239c7f9278c6fce5b9e5bdb311603699e4d Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 27 Aug 2025 17:30:46 -0700
Subject: [PATCH 20/49] AwaitingValidation state only needs block peer

---
 .../network/src/sync/backfill_sync/mod.rs     |  4 +--
 .../network/src/sync/network_context.rs       |  2 +-
 .../src/sync/network_context/requests.rs      |  4 +--
 .../requests/data_columns_by_root.rs          |  5 +--
 .../network/src/sync/range_sync/batch.rs      | 34 +++++++++----------
 .../network/src/sync/range_sync/chain.rs      | 13 +++----
 .../network/src/sync/range_sync/range.rs      |  4 +--
 7 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index ac47310b3f0..f8572a6eb0e 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -723,7 +723,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                     )))?;
                     return Ok(ProcessResult::Successful);
                 }
-                BatchState::AwaitingValidation(_, _) => {
+                BatchState::AwaitingValidation(_) => {
                     // TODO: I don't think this state is possible, log a CRIT just in case.
                     // If this is not observed, add it to the failed state branch above.
                     crit!(
@@ -773,7 +773,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             // only for batches awaiting validation can we be sure the last attempt is
             // right, and thus, that any different attempt is wrong
             match batch.state() {
-                BatchState::AwaitingValidation(processed_attempt, _) => {
+                BatchState::AwaitingValidation(processed_attempt) => {
                     for attempt in batch.attempts() {
                         // The validated batch has been re-processed
                         if attempt.hash != processed_attempt.hash {
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 19c2beaf8ef..139bf54109c 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -103,7 +103,7 @@ pub enum RpcResponseError {
     VerifyError(LookupVerifyError),
     CustodyRequestError(#[allow(dead_code)] CustodyRequestError),
     BlockComponentCouplingError(CouplingError),
-    InternalError(String),
+    InternalError(#[allow(dead_code)] String),
 }
 
 #[derive(Debug, PartialEq, Eq)]
diff --git a/beacon_node/network/src/sync/network_context/requests.rs b/beacon_node/network/src/sync/network_context/requests.rs
index 18cd00bfda3..950fc3db312 100644
--- a/beacon_node/network/src/sync/network_context/requests.rs
+++ b/beacon_node/network/src/sync/network_context/requests.rs
@@ -13,8 +13,8 @@ pub use blocks_by_range::BlocksByRangeRequestItems;
 pub use blocks_by_root::{BlocksByRootRequestItems, BlocksByRootSingleRequest};
 pub use data_columns_by_range::DataColumnsByRangeRequestItems;
 pub use data_columns_by_root::{
-    DataColumnsByRootBatchBlockRequest, DataColumnsByRootRequestItems,
-    DataColumnsByRootSingleBlockRequest, DataColumnsByRootRangeRequestItems
+    DataColumnsByRootBatchBlockRequest, DataColumnsByRootRangeRequestItems,
+    DataColumnsByRootRequestItems, DataColumnsByRootSingleBlockRequest,
 };
 
 use crate::metrics;
diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
index 879c1036a67..22a91e23792 100644
--- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
@@ -30,10 +30,7 @@ impl DataColumnsByRootBatchBlockRequest {
             })
             .collect();
         assert!(ids.len() <= 32);
-        Ok(DataColumnsByRootRequest::new(
-            ids,
-            spec.max_request_blocks(fork_name),
-        ))
+        DataColumnsByRootRequest::new(ids, spec.max_request_blocks(fork_name))
     }
 }
 
diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs
index 14dd07ae31b..fb7689ed392 100644
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -152,7 +152,7 @@ pub enum BatchState<E: EthSpec> {
     /// It is not sufficient to process a batch successfully to consider it correct. This is
     /// because batches could be erroneously empty, or incomplete. Therefore, a batch is considered
     /// valid, only if the next sequential batch imports at least a block.
-    AwaitingValidation(Attempt, ResponsiblePeers),
+    AwaitingValidation(Attempt),
     /// Intermediate state for inner state handling.
     Poisoned,
     /// The batch has maxed out the allowed attempts for either downloading or processing. It
@@ -225,12 +225,12 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
     /// Returns the peers that are currently responsible for progressing the state of the batch.
     pub fn processing_peers(&self) -> Option<&ResponsiblePeers> {
         match &self.state {
-            BatchState::AwaitingDownload | BatchState::Failed | BatchState::Downloading(..) => None,
+            BatchState::AwaitingDownload
+            | BatchState::Failed
+            | BatchState::Downloading(..)
+            | BatchState::AwaitingValidation(..) => None,
             BatchState::AwaitingProcessing(responsible_peers, _, _)
-            | BatchState::Processing(Attempt { .. }, responsible_peers)
-            | BatchState::AwaitingValidation(Attempt { .. }, responsible_peers) => {
-                Some(responsible_peers)
-            }
+            | BatchState::Processing(Attempt { .. }, responsible_peers) => Some(responsible_peers),
             BatchState::Poisoned => unreachable!("Poisoned batch"),
         }
     }
@@ -385,10 +385,9 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
             BatchState::AwaitingDownload
             | BatchState::Failed
             | BatchState::Poisoned
-            | BatchState::Downloading(_) => None,
-            BatchState::AwaitingProcessing(r, _, _)
-            | BatchState::AwaitingValidation(_, r)
-            | BatchState::Processing(_, r) => Some(r),
+            | BatchState::Downloading(_)
+            | BatchState::AwaitingValidation(_) => None,
+            BatchState::AwaitingProcessing(r, _, _) | BatchState::Processing(_, r) => Some(r),
         }
     }
 
@@ -397,11 +396,9 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
         processing_result: BatchProcessingResult,
     ) -> Result<BatchOperationOutcome, WrongState> {
         match self.state.poison() {
-            BatchState::Processing(attempt, responsible_peers) => {
+            BatchState::Processing(attempt, _responsible_peers) => {
                 self.state = match processing_result {
-                    BatchProcessingResult::Success => {
-                        BatchState::AwaitingValidation(attempt, responsible_peers)
-                    }
+                    BatchProcessingResult::Success => BatchState::AwaitingValidation(attempt),
                     BatchProcessingResult::FaultyFailure => {
                         // register the failed attempt
                         self.failed_processing_attempts.push(attempt);
@@ -437,7 +434,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
     #[must_use = "Batch may have failed"]
     pub fn validation_failed(&mut self) -> Result<BatchOperationOutcome, WrongState> {
         match self.state.poison() {
-            BatchState::AwaitingValidation(attempt, responsible_peers) => {
+            BatchState::AwaitingValidation(attempt) => {
                 self.failed_processing_attempts.push(attempt);
 
                 // check if the batch can be downloaded again
@@ -473,6 +470,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
 #[derive(PartialEq, Debug)]
 pub struct Attempt {
     /// The peer that made the attempt.
+    /// This peer is effectively the peer that we requested the blocks from.
     pub peer_id: PeerId,
     /// The hash of the blocks of the attempt.
     pub hash: u64,
@@ -491,8 +489,8 @@ impl<E: EthSpec> std::fmt::Debug for BatchState<E> {
             BatchState::Processing(Attempt { peer_id, hash: _ }, responsible_peers) => {
                 write!(f, "Processing({}) {:?}", peer_id, responsible_peers)
             }
-            BatchState::AwaitingValidation(Attempt { peer_id, hash: _ }, responsible_peers) => {
-                write!(f, "AwaitingValidation({}) {:?}", peer_id, responsible_peers)
+            BatchState::AwaitingValidation(Attempt { peer_id, hash: _ }) => {
+                write!(f, "AwaitingValidation({})", peer_id)
             }
             BatchState::AwaitingDownload => f.write_str("AwaitingDownload"),
             BatchState::Failed => f.write_str("Failed"),
@@ -519,7 +517,7 @@ impl<E: EthSpec> BatchState<E> {
         match self {
             BatchState::Downloading(..) => 'D',
             BatchState::Processing(_, _) => 'P',
-            BatchState::AwaitingValidation(_, _) => 'v',
+            BatchState::AwaitingValidation(_) => 'v',
             BatchState::AwaitingDownload => 'd',
             BatchState::Failed => 'F',
             BatchState::AwaitingProcessing(..) => 'p',
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index a31a24f7d08..390a81b1cbf 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -365,7 +365,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                         state
                     )));
                 }
-                BatchState::AwaitingValidation(_, _) => {
+                BatchState::AwaitingValidation(_) => {
                     // If an optimistic start is given to the chain after the corresponding
                     // batch has been requested and processed we can land here. We drop the
                     // optimistic candidate since we can't conclude whether the batch included
@@ -399,7 +399,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                         state
                     )));
                 }
-                BatchState::AwaitingValidation(_, _) => {
+                BatchState::AwaitingValidation(_) => {
                     // we can land here if an empty optimistic batch succeeds processing and is
                     // inside the download buffer (between `self.processing_target` and
                     // `self.to_be_downloaded`). In this case, eventually the chain advances to the
@@ -667,7 +667,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             // only for batches awaiting validation can we be sure the last attempt is
             // right, and thus, that any different attempt is wrong
             match batch.state() {
-                BatchState::AwaitingValidation(processed_attempt, responsible_peers) => {
+                BatchState::AwaitingValidation(processed_attempt) => {
                     for attempt in batch.attempts() {
                         // The validated batch has been re-processed
                         if attempt.hash != processed_attempt.hash {
@@ -793,7 +793,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         // reset
         self.processing_target = self.start_epoch;
 
-        // finally, re-request the failed batch.
+        // finally, re-request the failed batch and all other batches in `AwaitingDownload` state.
         self.attempt_send_awaiting_download_batches(network, "handle_invalid_batch")
     }
 
@@ -961,10 +961,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         }
     }
 
+    /// Attempts to send all batches that are in `AwaitingDownload` state.
     pub fn attempt_send_awaiting_download_batches(
         &mut self,
         network: &mut SyncNetworkContext<T>,
-        src: &str,
+        _src: &str,
     ) -> ProcessingResult {
         // Collect all batches in AwaitingDownload state and see if they can be sent
         let awaiting_downloads: Vec<_> = self
@@ -1146,7 +1147,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 entry.insert(optimistic_batch);
                 self.send_batch(network, epoch)?;
             } else {
-                self.attempt_send_awaiting_download_batches(network, "optimistic");
+                self.attempt_send_awaiting_download_batches(network, "optimistic")?;
             }
             return Ok(KeepChain);
         }
diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs
index cd523d3e193..703164d6874 100644
--- a/beacon_node/network/src/sync/range_sync/range.rs
+++ b/beacon_node/network/src/sync/range_sync/range.rs
@@ -44,9 +44,9 @@ use super::chain_collection::{ChainCollection, SyncChainStatus};
 use super::sync_type::RangeSyncType;
 use crate::metrics;
 use crate::status::ToStatusMessage;
-use crate::sync::range_sync::ResponsiblePeers;
 use crate::sync::BatchProcessResult;
 use crate::sync::network_context::{RpcResponseError, SyncNetworkContext};
+use crate::sync::range_sync::ResponsiblePeers;
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
 use lighthouse_network::rpc::GoodbyeReason;
@@ -212,7 +212,7 @@ where
     ) {
         // check if this chunk removes the chain
         match self.chains.call_by_id(chain_id, |chain| {
-            chain.on_block_response(network, batch_id,  request_id, blocks, responsible_peers)
+            chain.on_block_response(network, batch_id, request_id, blocks, responsible_peers)
         }) {
             Ok((removed_chain, sync_type)) => {
                 if let Some((removed_chain, remove_reason)) = removed_chain {

From da1aabab7383f82efe2338be51710883a4e9d8bc Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 27 Aug 2025 17:34:27 -0700
Subject: [PATCH 21/49] Revise error tolerance

---
 .../src/network_beacon_processor/sync_methods.rs     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
index 820cf3ab75f..322cfdc23e4 100644
--- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
@@ -887,31 +887,31 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 match &e {
                     AvailabilityCheckError::InvalidBlobs(_)
                     | AvailabilityCheckError::BlobIndexInvalid(_) => Err(ChainSegmentFailed {
-                        message: format!("Peer sent invalid block. Reason: {:?}", err),
+                        message: format!("Peer sent invalid blobs. Reason: {:?}", err),
                         // Do not penalize peers for internal errors.
                         peer_action: Some(PeerAction::LowToleranceError),
                         faulty_component: Some(FaultyComponent::Blobs),
                     }),
                     AvailabilityCheckError::InvalidColumn(columns) => Err(ChainSegmentFailed {
-                        message: format!("Peer sent invalid block. Reason: {:?}", err),
+                        message: format!("Peer sent invalid columns. Reason: {:?}", err),
                         // Do not penalize peers for internal errors.
-                        peer_action: Some(PeerAction::MidToleranceError),
+                        peer_action: Some(PeerAction::LowToleranceError),
                         faulty_component: Some(FaultyComponent::Columns(
                             columns.iter().map(|v| v.0).collect(),
                         )),
                     }),
                     AvailabilityCheckError::DataColumnIndexInvalid(column) => {
                         Err(ChainSegmentFailed {
-                            message: format!("Peer sent invalid block. Reason: {:?}", err),
+                            message: format!("Peer sent invalid columns. Reason: {:?}", err),
                             // Do not penalize peers for internal errors.
-                            peer_action: Some(PeerAction::MidToleranceError),
+                            peer_action: Some(PeerAction::LowToleranceError),
                             faulty_component: Some(FaultyComponent::Columns(vec![*column])),
                         })
                     }
                     _ => Err(ChainSegmentFailed {
                         message: format!("Peer sent invalid block. Reason: {:?}", err),
                         // Do not penalize peers for internal errors.
-                        peer_action: Some(PeerAction::MidToleranceError),
+                        peer_action: None,
                         faulty_component: None,
                     }),
                 }

From b07bc6d4e4e97b02be85cc939d8990be9156cd28 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Fri, 29 Aug 2025 16:18:53 -0700
Subject: [PATCH 22/49] Force requests if batch buffer is full under certain
 conditions

---
 .../network/src/sync/range_sync/chain.rs       | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 390a81b1cbf..12cfef28564 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -1220,11 +1220,25 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             .batches
             .iter()
             .filter(|&(_epoch, batch)| in_buffer(batch))
-            .map(|(epoch, _)| epoch)
             .collect();
 
         if in_buffer_batches.len() > BATCH_BUFFER_SIZE as usize {
-            return None;
+            // Force the request to avoid stalling the chain if the batch to be downloaded is less
+            // than all batches sitting inside the buffer awaiting downloaded/processing.
+            let should_force_request = in_buffer_batches
+                .iter()
+                .all(|(epoch, _)| **epoch > self.to_be_downloaded);
+            debug!(
+                ?in_buffer_batches,
+                ?self.to_be_downloaded,
+                ?self.processing_target,
+                ?self.optimistic_start,
+                should_force_request,
+                "Batch buffer full, not able to make new requests"
+            );
+            if !should_force_request {
+                return None;
+            }
         }
 
         // don't send batch requests until we have peers on sampling subnets

From 4f60e86dc70b27b125f880eec76c93dd458fdef4 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Sun, 31 Aug 2025 14:54:44 -0700
Subject: [PATCH 23/49] Add logs to debug stuck range sync

---
 beacon_node/network/src/sync/range_sync/chain.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 12cfef28564..88a7e78baa6 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -829,8 +829,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         let optimistic_epoch = align(optimistic_start_epoch);
 
         // advance the chain to the new validating epoch
-        debug!("Advancing chain");
+        debug!(?self.to_be_downloaded, ?self.processing_target,"Advancing chain");
         self.advance_chain(network, validating_epoch);
+        debug!(?self.to_be_downloaded, ?self.processing_target,"Advanced chain");
         if self.optimistic_start.is_none()
             && optimistic_epoch > self.processing_target
             && !self.attempted_optimistic_starts.contains(&optimistic_epoch)
@@ -967,6 +968,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
         _src: &str,
     ) -> ProcessingResult {
+        debug!(?self.processing_target,?self.to_be_downloaded,"In attempt");
         // Collect all batches in AwaitingDownload state and see if they can be sent
         let awaiting_downloads: Vec<_> = self
             .batches
@@ -989,6 +991,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
         batch_id: BatchId,
     ) -> ProcessingResult {
+        debug!(?self.processing_target,?self.to_be_downloaded,"In send_batch");
         let _guard = self.span.clone().entered();
         let batch_state = self.visualize_batch_state();
         if let Some(batch) = self.batches.get_mut(&batch_id) {
@@ -1054,6 +1057,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     }
                 },
             }
+        } else {
+            debug!(?self.to_be_downloaded, ?self.processing_target,"Did not get batch");
         }
 
         Ok(KeepChain)
@@ -1131,22 +1136,26 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         if !matches!(self.state, ChainSyncingState::Syncing) {
             return Ok(KeepChain);
         }
+        debug!(?self.to_be_downloaded, ?self.processing_target,"Requesting batches");
         // find the next pending batch and request it from the peer
 
         // check if we have the batch for our optimistic start. If not, request it first.
         // We wait for this batch before requesting any other batches.
         if let Some(epoch) = self.optimistic_start {
+            debug!(?self.to_be_downloaded, ?self.processing_target,"Optimistic start in request_batches");
             if !self.good_peers_on_sampling_subnets(epoch, network) {
                 debug!("Waiting for peers to be available on sampling column subnets");
                 return Ok(KeepChain);
             }
 
             if let Entry::Vacant(entry) = self.batches.entry(epoch) {
+                debug!(?self.to_be_downloaded, ?self.processing_target,"Vacant entry in request_batches");
                 let batch_type = network.batch_type(epoch);
                 let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH, batch_type);
                 entry.insert(optimistic_batch);
                 self.send_batch(network, epoch)?;
             } else {
+                debug!(?self.to_be_downloaded, ?self.processing_target,"Not vacant in request_batches");
                 self.attempt_send_awaiting_download_batches(network, "optimistic")?;
             }
             return Ok(KeepChain);
@@ -1159,6 +1168,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         //   that function.
         while let Some(batch_id) = self.include_next_batch(network) {
             // send the batch
+            debug!(?self.to_be_downloaded, ?self.processing_target,"Got a batch to send");
             self.send_batch(network, batch_id)?;
         }
 
@@ -1198,6 +1208,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     /// Creates the next required batch from the chain. If there are no more batches required,
     /// `false` is returned.
     fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
+        debug!(?self.to_be_downloaded, ?self.processing_target,"In include next batch");
         // don't request batches beyond the target head slot
         if self
             .to_be_downloaded

From 7a6d0d9215431ef8e30e8f42b52fa4ba11ddbcfb Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Sun, 31 Aug 2025 19:07:39 -0700
Subject: [PATCH 24/49] Force processing_target request

---
 .../network/src/sync/range_sync/chain.rs      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 88a7e78baa6..67684a7bf52 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -1161,6 +1161,28 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             return Ok(KeepChain);
         }
 
+        // Try to force requesting the `processing_batch` to progress sync
+        if !self.batches.contains_key(&self.processing_target) {
+            debug!(?self.to_be_downloaded, ?self.processing_target,"Processing start in request_batches");
+            if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
+                debug!("Waiting for peers to be available on sampling column subnets");
+                return Ok(KeepChain);
+            }
+
+            if let Entry::Vacant(entry) = self.batches.entry(self.processing_target) {
+                debug!(?self.to_be_downloaded, ?self.processing_target,"Vacant entry in request_batches for processing");
+                let batch_type = network.batch_type(self.processing_target);
+                let processing_batch =
+                    BatchInfo::new(&self.processing_target, EPOCHS_PER_BATCH, batch_type);
+                entry.insert(processing_batch);
+                self.send_batch(network, self.processing_target)?;
+            } else {
+                debug!(?self.to_be_downloaded, ?self.processing_target,"Not vacant in request_batches processing");
+                self.attempt_send_awaiting_download_batches(network, "optimistic")?;
+            }
+            return Ok(KeepChain);
+        }
+
         // find the next pending batch and request it from the peer
         // Note: for this function to not infinite loop we must:
         // - If `include_next_batch` returns Some we MUST increase the count of batches that are

From 8458df67526c38e04355ffe501151cdd1b5c2835 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Mon, 1 Sep 2025 12:53:01 -0700
Subject: [PATCH 25/49] Attempt sending awaitingDownload batches when
 restarting sync

---
 beacon_node/network/src/sync/range_sync/chain.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 67684a7bf52..91943049502 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -832,6 +832,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         debug!(?self.to_be_downloaded, ?self.processing_target,"Advancing chain");
         self.advance_chain(network, validating_epoch);
         debug!(?self.to_be_downloaded, ?self.processing_target,"Advanced chain");
+        self.attempt_send_awaiting_download_batches(network, "start_syncing")?;
         if self.optimistic_start.is_none()
             && optimistic_epoch > self.processing_target
             && !self.attempted_optimistic_starts.contains(&optimistic_epoch)
@@ -968,7 +969,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
         _src: &str,
     ) -> ProcessingResult {
-        debug!(?self.processing_target,?self.to_be_downloaded,"In attempt");
         // Collect all batches in AwaitingDownload state and see if they can be sent
         let awaiting_downloads: Vec<_> = self
             .batches
@@ -977,6 +977,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             .map(|(batch_id, _)| batch_id)
             .copied()
             .collect();
+        debug!(?self.processing_target,?self.to_be_downloaded,_src, ?awaiting_downloads, "In attempt");
+
         for batch_id in awaiting_downloads {
             if self.good_peers_on_sampling_subnets(batch_id, network) {
                 self.send_batch(network, batch_id)?;

From 29c2f83bee3d68ae32d678ff17bad8a191d62a78 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Tue, 2 Sep 2025 13:29:19 -0700
Subject: [PATCH 26/49] Cleanup SyncingChain

---
 .../network/src/sync/range_sync/chain.rs      | 115 +++++++++---------
 1 file changed, 59 insertions(+), 56 deletions(-)

diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 91943049502..821dcf9538e 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -352,6 +352,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     return Ok(KeepChain);
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
+                // Batches can be in `AwaitingDownload` state if there weren't good data column subnet
+                // peers to send the request to.
                 BatchState::AwaitingDownload => return Ok(KeepChain),
                 BatchState::Processing(_, _) | BatchState::Failed => {
                     // these are all inconsistent states:
@@ -387,6 +389,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                     // Batch is not ready, nothing to process
                 }
                 BatchState::Poisoned => unreachable!("Poisoned batch"),
+                // Batches can be in `AwaitingDownload` state if there weren't good data column subnet
+                // peers to send the request to.
                 BatchState::AwaitingDownload => return Ok(KeepChain),
                 BatchState::Failed | BatchState::Processing(_, _) => {
                     // these are all inconsistent states:
@@ -545,15 +549,20 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 faulty_component,
             } => {
                 let Some(responsible_peers) = batch.responsible_peers() else {
-                    crit!("Shouldn't happen");
-                    return Ok(KeepChain);
+                    crit!(
+                        current_state = ?batch.state(),
+                        "Inconsistent state, batch must have been in processing state"
+                    );
+                    return Err(RemoveChain::ChainFailed {
+                        blacklist: false,
+                        failing_batch: batch_id,
+                    });
                 };
                 // Penalize the peer appropriately.
                 match faulty_component {
                     Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
                         network.report_peer(responsible_peers.block_blob, *penalty, "faulty_batch");
                     }
-                    // todo(pawan): clean this up
                     Some(FaultyComponent::Columns(faulty_columns)) => {
                         for (peer, columns) in responsible_peers.data_columns.iter() {
                             for faulty_column in faulty_columns {
@@ -606,7 +615,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             BatchProcessResult::NonFaultyFailure => {
                 batch.processing_completed(BatchProcessingResult::NonFaultyFailure)?;
 
-                // Simply re-download the batch.
+                // Simply re-download all batches in `AwaitingDownload` state.
                 self.attempt_send_awaiting_download_batches(network, "non-faulty-failure")
             }
         }
@@ -829,9 +838,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         let optimistic_epoch = align(optimistic_start_epoch);
 
         // advance the chain to the new validating epoch
-        debug!(?self.to_be_downloaded, ?self.processing_target,"Advancing chain");
         self.advance_chain(network, validating_epoch);
-        debug!(?self.to_be_downloaded, ?self.processing_target,"Advanced chain");
+        // attempt to download any batches stuck in the `AwaitingDownload` state because of
+        // a lack of peers earlier
         self.attempt_send_awaiting_download_batches(network, "start_syncing")?;
         if self.optimistic_start.is_none()
             && optimistic_epoch > self.processing_target
@@ -844,7 +853,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         self.state = ChainSyncingState::Syncing;
 
         // begin requesting blocks from the peer pool, until all peers are exhausted.
-        debug!("Requesting batches from inside start syncing");
         self.request_batches(network)?;
 
         // start processing batches if needed
@@ -964,10 +972,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     }
 
     /// Attempts to send all batches that are in `AwaitingDownload` state.
+    ///
+    /// Batches might get stuck in `AwaitingDownload` post peerdas because of lack of peers
+    /// in required subnets. We need to progress them if peers are available at a later point.
     pub fn attempt_send_awaiting_download_batches(
         &mut self,
         network: &mut SyncNetworkContext<T>,
-        _src: &str,
+        src: &str,
     ) -> ProcessingResult {
         // Collect all batches in AwaitingDownload state and see if they can be sent
         let awaiting_downloads: Vec<_> = self
@@ -977,11 +988,19 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             .map(|(batch_id, _)| batch_id)
             .copied()
             .collect();
-        debug!(?self.processing_target,?self.to_be_downloaded,_src, ?awaiting_downloads, "In attempt");
+        debug!(
+            ?awaiting_downloads,
+            src, "Attempting to send batches awaiting downlaod"
+        );
 
         for batch_id in awaiting_downloads {
             if self.good_peers_on_sampling_subnets(batch_id, network) {
                 self.send_batch(network, batch_id)?;
+            } else {
+                debug!(
+                    src = "attempt_send_awaiting_download_batches",
+                    "Waiting for peers to be available on sampling column subnets"
+                );
             }
         }
         Ok(KeepChain)
@@ -993,7 +1012,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
         batch_id: BatchId,
     ) -> ProcessingResult {
-        debug!(?self.processing_target,?self.to_be_downloaded,"In send_batch");
         let _guard = self.span.clone().entered();
         let batch_state = self.visualize_batch_state();
         if let Some(batch) = self.batches.get_mut(&batch_id) {
@@ -1138,62 +1156,60 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         if !matches!(self.state, ChainSyncingState::Syncing) {
             return Ok(KeepChain);
         }
-        debug!(?self.to_be_downloaded, ?self.processing_target,"Requesting batches");
         // find the next pending batch and request it from the peer
 
         // check if we have the batch for our optimistic start. If not, request it first.
         // We wait for this batch before requesting any other batches.
         if let Some(epoch) = self.optimistic_start {
-            debug!(?self.to_be_downloaded, ?self.processing_target,"Optimistic start in request_batches");
             if !self.good_peers_on_sampling_subnets(epoch, network) {
-                debug!("Waiting for peers to be available on sampling column subnets");
+                debug!(
+                    src = "request_batches_optimistic",
+                    "Waiting for peers to be available on sampling column subnets"
+                );
                 return Ok(KeepChain);
             }
 
             if let Entry::Vacant(entry) = self.batches.entry(epoch) {
-                debug!(?self.to_be_downloaded, ?self.processing_target,"Vacant entry in request_batches");
                 let batch_type = network.batch_type(epoch);
                 let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH, batch_type);
                 entry.insert(optimistic_batch);
                 self.send_batch(network, epoch)?;
             } else {
-                debug!(?self.to_be_downloaded, ?self.processing_target,"Not vacant in request_batches");
-                self.attempt_send_awaiting_download_batches(network, "optimistic")?;
+                self.attempt_send_awaiting_download_batches(network, "request_batches_optimistic")?;
             }
             return Ok(KeepChain);
         }
 
-        // Try to force requesting the `processing_batch` to progress sync
+        // find the next pending batch and request it from the peer
+        // Note: for this function to not infinite loop we must:
+        // - If `include_next_batch` returns Some we MUST increase the count of batches that are
+        //   accounted in the `BACKFILL_BATCH_BUFFER_SIZE` limit in the `matches!` statement of
+        //   that function.
+        while let Some(batch_id) = self.include_next_batch(network) {
+            // send the batch
+            self.send_batch(network, batch_id)?;
+        }
+
+        // Force requesting the `processing_batch` to progress sync if required
         if !self.batches.contains_key(&self.processing_target) {
-            debug!(?self.to_be_downloaded, ?self.processing_target,"Processing start in request_batches");
+            debug!(?self.processing_target,"Forcing requesting processing_target to progress sync");
             if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
-                debug!("Waiting for peers to be available on sampling column subnets");
+                debug!(
+                    src = "request_batches_processing",
+                    "Waiting for peers to be available on sampling column subnets"
+                );
                 return Ok(KeepChain);
             }
 
             if let Entry::Vacant(entry) = self.batches.entry(self.processing_target) {
-                debug!(?self.to_be_downloaded, ?self.processing_target,"Vacant entry in request_batches for processing");
                 let batch_type = network.batch_type(self.processing_target);
                 let processing_batch =
                     BatchInfo::new(&self.processing_target, EPOCHS_PER_BATCH, batch_type);
                 entry.insert(processing_batch);
                 self.send_batch(network, self.processing_target)?;
             } else {
-                debug!(?self.to_be_downloaded, ?self.processing_target,"Not vacant in request_batches processing");
-                self.attempt_send_awaiting_download_batches(network, "optimistic")?;
+                self.attempt_send_awaiting_download_batches(network, "request_batches_processing")?;
             }
-            return Ok(KeepChain);
-        }
-
-        // find the next pending batch and request it from the peer
-        // Note: for this function to not infinite loop we must:
-        // - If `include_next_batch` returns Some we MUST increase the count of batches that are
-        //   accounted in the `BACKFILL_BATCH_BUFFER_SIZE` limit in the `matches!` statement of
-        //   that function.
-        while let Some(batch_id) = self.include_next_batch(network) {
-            // send the batch
-            debug!(?self.to_be_downloaded, ?self.processing_target,"Got a batch to send");
-            self.send_batch(network, batch_id)?;
         }
 
         // No more batches, simply stop
@@ -1232,7 +1248,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     /// Creates the next required batch from the chain. If there are no more batches required,
     /// `false` is returned.
     fn include_next_batch(&mut self, network: &mut SyncNetworkContext<T>) -> Option<BatchId> {
-        debug!(?self.to_be_downloaded, ?self.processing_target,"In include next batch");
         // don't request batches beyond the target head slot
         if self
             .to_be_downloaded
@@ -1251,29 +1266,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 BatchState::Downloading(..) | BatchState::AwaitingProcessing(..)
             )
         };
-        let in_buffer_batches: Vec<_> = self
+        if self
             .batches
             .iter()
             .filter(|&(_epoch, batch)| in_buffer(batch))
-            .collect();
-
-        if in_buffer_batches.len() > BATCH_BUFFER_SIZE as usize {
-            // Force the request to avoid stalling the chain if the batch to be downloaded is less
-            // than all batches sitting inside the buffer awaiting downloaded/processing.
-            let should_force_request = in_buffer_batches
-                .iter()
-                .all(|(epoch, _)| **epoch > self.to_be_downloaded);
-            debug!(
-                ?in_buffer_batches,
-                ?self.to_be_downloaded,
-                ?self.processing_target,
-                ?self.optimistic_start,
-                should_force_request,
-                "Batch buffer full, not able to make new requests"
-            );
-            if !should_force_request {
-                return None;
-            }
+            .count()
+            > BATCH_BUFFER_SIZE as usize
+        {
+            return None;
         }
 
         // don't send batch requests until we have peers on sampling subnets
@@ -1281,7 +1281,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         // block and data column requests are currently coupled. This can be removed once we find a
         // way to decouple the requests and do retries individually, see issue #6258.
         if !self.good_peers_on_sampling_subnets(self.to_be_downloaded, network) {
-            debug!("Waiting for peers to be available on custody column subnets");
+            debug!(
+                src = "include_next_batch",
+                "Waiting for peers to be available on custody column subnets"
+            );
             return None;
         }
 

From e0d8f047ec13ca55c81be2b48d6b5f14bb53d6d1 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 4 Sep 2025 18:09:07 -0700
Subject: [PATCH 27/49] Tests compile

---
 .../lighthouse_network/src/peer_manager/peerdb.rs |  2 +-
 .../lighthouse_network/src/service/api_types.rs   |  1 +
 .../network/src/sync/block_sidecar_coupling.rs    | 11 +++++++++--
 beacon_node/network/src/sync/manager.rs           |  8 +++-----
 beacon_node/network/src/sync/network_context.rs   | 15 ++++++++-------
 .../network/src/sync/network_context/custody.rs   | 14 +++++++-------
 beacon_node/network/src/sync/range_sync/chain.rs  |  5 ++---
 7 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
index 17c070c3d70..3e5b637220b 100644
--- a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
+++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs
@@ -328,7 +328,7 @@ impl<E: EthSpec> PeerDB<E> {
     }
 
     /// Returns an iterator of all good gossipsub peers that are supposed to be custodying
-    /// the given subnet id.
+    /// the given subnet id and have the epoch according to their status messages.
     pub fn good_custody_subnet_peer_range_sync(
         &self,
         subnet: DataColumnSubnetId,
diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs
index 65a6cf61c5d..645ab69ce50 100644
--- a/beacon_node/lighthouse_network/src/service/api_types.rs
+++ b/beacon_node/lighthouse_network/src/service/api_types.rs
@@ -283,6 +283,7 @@ mod tests {
                     lookup_id: 101,
                 }),
             }),
+            peer: PeerId::random(),
         };
         assert_eq!(format!("{id}"), "123/Custody/121/Lookup/101");
     }
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 59c0ebc81d2..6c453547093 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -99,7 +99,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
     /// * `blobs_req_id` - Optional request ID for blobs (pre-Fulu fork)
     /// * `data_columns` - Optional tuple of (request_id->column_indices pairs, expected_custody_columns) for Fulu fork
     /// * `request_columns_by_root` - Creates an uninitialized `RangeBlockDataRequest::DataColumnsFromRoot` variant if this is true.
-    ///    Note: this is only relevant is `data_columns == None`.
+    ///   Note: this is only relevant is `data_columns == None`.
     #[allow(clippy::type_complexity)]
     pub fn new(
         blocks_req_id: BlocksByRangeRequestId,
@@ -698,6 +698,7 @@ mod tests {
         BlocksByRangeRequestId {
             id: 1,
             parent_request_id,
+            peer_id: PeerId::random(),
         }
     }
 
@@ -738,7 +739,7 @@ mod tests {
 
         let blocks_req_id = blocks_id(components_id());
         let mut info =
-            RangeBlockComponentsRequest::<E>::new(blocks_req_id, None, None, Span::none());
+            RangeBlockComponentsRequest::<E>::new(blocks_req_id, None, None, None, Span::none());
 
         // Send blocks and complete terminate response
         info.add_blocks(blocks_req_id, blocks).unwrap();
@@ -772,6 +773,7 @@ mod tests {
             blocks_req_id,
             Some(blobs_req_id),
             None,
+            None,
             Span::none(),
         );
 
@@ -813,6 +815,7 @@ mod tests {
             blocks_req_id,
             None,
             Some((columns_req_id.clone(), expects_custody_columns.clone())),
+            None,
             Span::none(),
         );
         // Send blocks and complete terminate response
@@ -873,6 +876,7 @@ mod tests {
             blocks_req_id,
             None,
             Some((columns_req_id.clone(), expects_custody_columns.clone())),
+            None,
             Span::none(),
         );
 
@@ -953,6 +957,7 @@ mod tests {
             blocks_req_id,
             None,
             Some((columns_req_id.clone(), expected_custody_columns.clone())),
+            None,
             Span::none(),
         );
 
@@ -1033,6 +1038,7 @@ mod tests {
             blocks_req_id,
             None,
             Some((columns_req_id.clone(), expected_custody_columns.clone())),
+            None,
             Span::none(),
         );
 
@@ -1115,6 +1121,7 @@ mod tests {
             blocks_req_id,
             None,
             Some((columns_req_id.clone(), expected_custody_columns.clone())),
+            None,
             Span::none(),
         );
 
diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index 3bda91ad1c5..2b376402a19 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -1134,13 +1134,11 @@ impl<T: BeaconChainTypes> SyncManager<T> {
                 if let Some(resp) =
                     self.network
                         .on_data_columns_by_root_response(req_id, peer_id, data_column)
-                {
-                    if let Some(result) = self
+                    && let Some(result) = self
                         .network
                         .on_custody_by_root_response(custody_id, req_id, peer_id, resp)
-                    {
-                        self.on_custody_by_root_result(custody_id.requester, result);
-                    }
+                {
+                    self.on_custody_by_root_result(custody_id.requester, result);
                 }
             }
             DataColumnsByRootRequester::RangeSync { parent } => {
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 139bf54109c..187a81e9b1f 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -573,7 +573,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         for (parent_request, requests) in entries_to_process {
             let mut data_column_requests = Vec::new();
             let requester = DataColumnsByRootRequester::RangeSync {
-                parent: parent_request.clone(),
+                parent: parent_request,
             };
             let custody_indices = requests.indices.iter().cloned().collect();
             let synced_peers = self
@@ -829,7 +829,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         // Create the overall components_by_range request ID before its individual components
         let id = ComponentsByRangeRequestId {
             id: self.next_id(),
-            requester: requester.clone(),
+            requester,
         };
 
         let blocks_req_id = self.send_blocks_by_range_request(
@@ -940,6 +940,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
 
     /// Received a blocks by range or blobs by range response for a request that couples blocks '
     /// and blobs.
+    #[allow(clippy::type_complexity)]
     pub fn range_block_component_response(
         &mut self,
         id: ComponentsByRangeRequestId,
@@ -1766,7 +1767,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     fn request_columns_on_successful_blocks(
         &mut self,
         id: BlocksByRangeRequestId,
-        blocks: &Vec<Arc<SignedBeaconBlock<T::EthSpec>>>,
+        blocks: &[Arc<SignedBeaconBlock<T::EthSpec>>],
     ) -> Result<(), RpcResponseError> {
         let batch_epoch = id.batch_id();
         // Return early if no columns are required for this epoch
@@ -1790,10 +1791,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         // We have blocks here, check if they need data columns and request them
         for block in blocks.iter() {
             // Request columns only if the blob_kzg_commitments is non-empty
-            if let Ok(commitments) = block.message().body().blob_kzg_commitments() {
-                if !commitments.is_empty() {
-                    block_roots.push(block.canonical_root());
-                }
+            if let Ok(commitments) = block.message().body().blob_kzg_commitments()
+                && !commitments.is_empty()
+            {
+                block_roots.push(block.canonical_root());
             }
         }
 
diff --git a/beacon_node/network/src/sync/network_context/custody.rs b/beacon_node/network/src/sync/network_context/custody.rs
index d973e83cea7..337fde619ac 100644
--- a/beacon_node/network/src/sync/network_context/custody.rs
+++ b/beacon_node/network/src/sync/network_context/custody.rs
@@ -5,7 +5,7 @@ use beacon_chain::BeaconChainTypes;
 use beacon_chain::validator_monitor::timestamp_now;
 use fnv::FnvHashMap;
 use lighthouse_network::PeerId;
-use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester};
+use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester, Id};
 use lighthouse_tracing::SPAN_OUTGOING_CUSTODY_REQUEST;
 use lru_cache::LRUTimeCache;
 use parking_lot::RwLock;
@@ -50,8 +50,8 @@ pub enum Error {
     /// There should only exist a single request at a time. Having multiple requests is a bug and
     /// can result in undefined state, so it's treated as a hard error and the lookup is dropped.
     UnexpectedRequestId {
-        expected_req_id: DataColumnsByRootRequestId,
-        req_id: DataColumnsByRootRequestId,
+        expected_req_id: Id,
+        req_id: Id,
     },
 }
 
@@ -401,8 +401,8 @@ impl<E: EthSpec> ColumnRequest<E> {
             Status::Downloading(expected_req_id) => {
                 if req_id != *expected_req_id {
                     return Err(Error::UnexpectedRequestId {
-                        expected_req_id: *expected_req_id,
-                        req_id,
+                        expected_req_id: expected_req_id.id,
+                        req_id: req_id.id,
                     });
                 }
                 self.status = Status::NotStarted(Instant::now());
@@ -434,8 +434,8 @@ impl<E: EthSpec> ColumnRequest<E> {
             Status::Downloading(expected_req_id) => {
                 if req_id != *expected_req_id {
                     return Err(Error::UnexpectedRequestId {
-                        expected_req_id: *expected_req_id,
-                        req_id,
+                        expected_req_id: expected_req_id.id,
+                        req_id: req_id.id,
                     });
                 }
                 self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 821dcf9538e..e762bfb55f0 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -1225,7 +1225,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     ) -> bool {
         if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
             // Require peers on all sampling column subnets before sending batches
-            let peers_on_all_custody_subnets = network
+            network
                 .network_globals()
                 .sampling_subnets()
                 .iter()
@@ -1238,8 +1238,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                         .count();
 
                     peer_count > 0
-                });
-            peers_on_all_custody_subnets
+                })
         } else {
             true
         }

From 6a2a33d459a54ffa94c9d0e09be5c04e767167dc Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Fri, 5 Sep 2025 12:48:05 -0700
Subject: [PATCH 28/49] Fix some issues from review

---
 .../network/src/sync/backfill_sync/mod.rs     |  2 +-
 .../src/sync/block_sidecar_coupling.rs        | 22 +++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index 8c93b824244..dc27892092f 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -617,7 +617,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                 faulty_component,
             } => {
                 let Some(responsible_peers) = batch.responsible_peers() else {
-                    crit!("Shouldn't happen");
+                    error!(?batch_id, "Responsible peers not found for a failed batch");
                     return self
                         .fail_sync(BackFillError::BatchProcessingFailed(batch_id))
                         .map(|_| ProcessResult::Successful);
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 6c453547093..c155609e81e 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -72,7 +72,7 @@ enum RangeBlockDataRequest<E: EthSpec> {
         // Indicates if we have made column requests for each of the `expected_custody_columns` or not
         all_requests_made: bool,
         /// The column indices corresponding to the request
-        column_peers: HashMap<DataColumnsByRootRequestId, Vec<ColumnIndex>>,
+        request_to_column_indices: HashMap<DataColumnsByRootRequestId, Vec<ColumnIndex>>,
         expected_custody_columns: HashSet<ColumnIndex>,
         attempt: usize,
     },
@@ -130,7 +130,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 requests: HashMap::new(),
                 all_requests_made: false,
                 attempt: 0,
-                column_peers: HashMap::new(),
+                request_to_column_indices: HashMap::new(),
                 expected_custody_columns,
             }
         } else {
@@ -155,7 +155,10 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     .iter()
                     .map(|(k, v)| (k.peer, v.clone()))
                     .collect(),
-                RangeBlockDataRequest::DataColumnsFromRoot { column_peers, .. } => column_peers
+                RangeBlockDataRequest::DataColumnsFromRoot {
+                    request_to_column_indices: column_peers,
+                    ..
+                } => column_peers
                     .iter()
                     .map(|(k, v)| (k.peer, v.clone()))
                     .collect(),
@@ -200,17 +203,17 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 requests,
                 attempt: _,
                 all_requests_made,
-                column_peers,
+                request_to_column_indices,
                 expected_custody_columns,
             } => {
-                for (request, peers) in column_requests {
+                for (request, indices) in column_requests {
                     requests.insert(request, ByRangeRequest::Active(request));
-                    column_peers.insert(request, peers);
+                    request_to_column_indices.insert(request, indices);
                 }
 
                 if !*all_requests_made {
                     let mut all_columns_requested = HashSet::new();
-                    for columns in column_peers.values() {
+                    for columns in request_to_column_indices.values() {
                         all_columns_requested.extend(columns.iter());
                     }
                     *all_requests_made = all_columns_requested == *expected_custody_columns;
@@ -414,7 +417,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::DataColumnsFromRoot {
                 all_requests_made,
                 attempt,
-                column_peers,
+                request_to_column_indices,
                 expected_custody_columns,
                 requests,
             } => {
@@ -439,7 +442,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
 
                 // Note: this assumes that only 1 peer is responsible for a column
                 // with a batch.
-                for (id, columns) in column_peers {
+                for (id, columns) in request_to_column_indices.iter() {
                     for column in columns {
                         column_to_peer_id.insert(*column, id.peer);
                     }
@@ -467,6 +470,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                         // delete it from the entries as we are going to make
                         // a separate attempt for those components.
                         requests.retain(|&k, _| k.peer != *peer);
+                        request_to_column_indices.retain(|&k, _| k.peer != *peer);
                     }
                 }
 

From e259ecdf912e1605440ebd7708d6c4d00c22d5b6 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Fri, 5 Sep 2025 12:58:49 -0700
Subject: [PATCH 29/49] More renamings

---
 .../src/sync/block_sidecar_coupling.rs        | 26 +++++++++++--------
 .../network/src/sync/network_context.rs       | 22 +++++++++-------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index c155609e81e..b0c2588e292 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -55,7 +55,7 @@ enum RangeBlockDataRequest<E: EthSpec> {
             ByRangeRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<E>>,
         >,
         /// The column indices corresponding to the request
-        column_peers: HashMap<DataColumnsByRangeRequestId, Vec<ColumnIndex>>,
+        request_to_column_indices: HashMap<DataColumnsByRangeRequestId, Vec<ColumnIndex>>,
         expected_custody_columns: Vec<ColumnIndex>,
         attempt: usize,
     },
@@ -115,13 +115,13 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         let block_data_request = if let Some(blobs_req_id) = blobs_req_id {
             RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
         } else if let Some((requests, expected_custody_columns)) = data_columns {
-            let column_peers: HashMap<_, _> = requests.into_iter().collect();
+            let request_to_column_indices: HashMap<_, _> = requests.into_iter().collect();
             RangeBlockDataRequest::DataColumns {
-                requests: column_peers
+                requests: request_to_column_indices
                     .keys()
                     .map(|id| (*id, ByRangeRequest::Active(*id)))
                     .collect(),
-                column_peers,
+                request_to_column_indices,
                 expected_custody_columns,
                 attempt: 0,
             }
@@ -151,14 +151,17 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             block_blob: self.block_peer,
             data_columns: match &self.block_data_request {
                 RangeBlockDataRequest::NoData | RangeBlockDataRequest::Blobs(_) => HashMap::new(),
-                RangeBlockDataRequest::DataColumns { column_peers, .. } => column_peers
+                RangeBlockDataRequest::DataColumns {
+                    request_to_column_indices,
+                    ..
+                } => request_to_column_indices
                     .iter()
                     .map(|(k, v)| (k.peer, v.clone()))
                     .collect(),
                 RangeBlockDataRequest::DataColumnsFromRoot {
-                    request_to_column_indices: column_peers,
+                    request_to_column_indices,
                     ..
-                } => column_peers
+                } => request_to_column_indices
                     .iter()
                     .map(|(k, v)| (k.peer, v.clone()))
                     .collect(),
@@ -176,12 +179,12 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::DataColumns {
                 requests,
                 expected_custody_columns: _,
-                column_peers,
+                request_to_column_indices,
                 attempt: _,
             } => {
                 for (request, columns) in failed_column_requests.into_iter() {
                     requests.insert(request, ByRangeRequest::Active(request));
-                    column_peers.insert(request, columns);
+                    request_to_column_indices.insert(request, columns);
                 }
                 Ok(())
             }
@@ -362,7 +365,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::DataColumns {
                 requests,
                 expected_custody_columns,
-                column_peers,
+                request_to_column_indices,
                 attempt,
             } => {
                 let mut data_columns = vec![];
@@ -380,7 +383,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
 
                 // Note: this assumes that only 1 peer is responsible for a column
                 // with a batch.
-                for (id, columns) in column_peers {
+                for (id, columns) in request_to_column_indices.iter() {
                     for column in columns {
                         column_to_peer_id.insert(*column, id.peer);
                     }
@@ -406,6 +409,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                         // delete it from the entries as we are going to make
                         // a separate attempt for those components.
                         requests.retain(|&k, _| k.peer != *peer);
+                        request_to_column_indices.retain(|&k, _| k.peer != *peer);
                     }
                 }
 
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 187a81e9b1f..c4ba20a2ccb 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -225,13 +225,13 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
     components_by_range_requests:
         FnvHashMap<ComponentsByRangeRequestId, RangeBlockComponentsRequest<T::EthSpec>>,
 
-    // todo(pawan): make this a bounded queue, make the types better, add better docs
     // A hashmap with the key being the parent request and the value being the data column by root
     // requests that we have to retry because of one of the following reasons:
     // 1. The root requests couldn't be made after the parent blocks request because there were no
     // column peers available
     // 2. The root request errored (either peer sent an RPC error or an empty response)
-    requests_to_retry: HashMap<ComponentsByRangeRequestId, DataColumnsByRootBatchBlockRequest>,
+    pending_column_by_root_range_requests:
+        HashMap<ComponentsByRangeRequestId, DataColumnsByRootBatchBlockRequest>,
 
     /// Whether the ee is online. If it's not, we don't allow access to the
     /// `beacon_processor_send`.
@@ -314,7 +314,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
             custody_by_root_requests: <_>::default(),
             components_by_range_requests: FnvHashMap::default(),
-            requests_to_retry: Default::default(),
+            pending_column_by_root_range_requests: Default::default(),
             network_beacon_processor,
             chain,
             fork_context,
@@ -345,7 +345,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             custody_by_root_requests: _,
             // components_by_range_requests is a meta request of various _by_range requests
             components_by_range_requests: _,
-            requests_to_retry: _,
+            pending_column_by_root_range_requests: _,
             execution_engine_state: _,
             network_beacon_processor: _,
             chain: _,
@@ -452,7 +452,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             components_by_range_requests: _,
             execution_engine_state: _,
             network_beacon_processor: _,
-            requests_to_retry: _,
+            pending_column_by_root_range_requests: _,
             chain: _,
             fork_context: _,
             // Don't use a fallback match. We want to be sure that all requests are considered when
@@ -567,7 +567,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let active_requests = self.active_request_count_by_peer();
 
         // Collect entries to process and remove from requests_to_retry
-        let entries_to_process: Vec<_> = self.requests_to_retry.drain().collect();
+        let entries_to_process: Vec<_> =
+            self.pending_column_by_root_range_requests.drain().collect();
         let mut entries_to_keep = Vec::new();
 
         for (parent_request, requests) in entries_to_process {
@@ -637,7 +638,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         }
 
         // Re-insert entries that still need to be retried
-        self.requests_to_retry.extend(entries_to_keep);
+        self.pending_column_by_root_range_requests
+            .extend(entries_to_keep);
 
         Ok(())
     }
@@ -1820,12 +1822,12 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let mut peer_to_columns: HashMap<PeerId, Vec<ColumnIndex>> = HashMap::new();
         let mut no_peers_for_column: Vec<ColumnIndex> = Vec::new();
         for column in self.chain.sampling_columns_for_epoch(batch_epoch).iter() {
-            let data_column = DataColumnSubnetId::new(*column);
+            let subnet_id = DataColumnSubnetId::new(*column);
             if let Some(custody_peer) = self
                 .network_globals()
                 .peers
                 .read()
-                .good_custody_subnet_peer_range_sync(data_column, batch_epoch)
+                .good_custody_subnet_peer_range_sync(subnet_id, batch_epoch)
                 .next()
             {
                 peer_to_columns
@@ -1878,7 +1880,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 indices: no_peers_for_column,
             };
 
-            self.requests_to_retry
+            self.pending_column_by_root_range_requests
                 .insert(id.parent_request_id, data_columns_by_root_request);
         }
 

From 04398ad267e2cc304e4bc291b32d43305cde6c1c Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Mon, 8 Sep 2025 15:37:26 -0700
Subject: [PATCH 30/49] Fix some more issues from review

---
 beacon_node/network/src/sync/manager.rs        |  2 +-
 .../network/src/sync/network_context.rs        |  6 +++---
 .../network/src/sync/range_sync/batch.rs       |  2 +-
 .../network/src/sync/range_sync/chain.rs       | 18 +++++-------------
 4 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index 2b376402a19..c68c506b8bb 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -415,7 +415,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
         }
 
         // Try to make range requests that we failed to make because of lack of peers.
-        let _ = self.network.retry_pending_requests();
+        let _ = self.network.retry_pending_root_range_requests();
     }
 
     /// Trigger range sync for a set of peers that claim to have imported a head unknown to us.
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index c4ba20a2ccb..999fafcff8c 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -34,6 +34,7 @@ use lighthouse_network::service::api_types::{
 use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
 use lighthouse_tracing::SPAN_OUTGOING_RANGE_REQUEST;
 use parking_lot::RwLock;
+use rand::seq::IteratorRandom;
 pub use requests::LookupVerifyError;
 use requests::{
     ActiveRequests, BlobsByRangeRequestItems, BlobsByRootRequestItems, BlocksByRangeRequestItems,
@@ -563,7 +564,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     ///
     /// This function must be manually invoked at regular intervals or when a new peer
     /// gets added.
-    pub fn retry_pending_requests(&mut self) -> Result<(), String> {
+    pub fn retry_pending_root_range_requests(&mut self) -> Result<(), String> {
         let active_requests = self.active_request_count_by_peer();
 
         // Collect entries to process and remove from requests_to_retry
@@ -1827,8 +1828,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 .network_globals()
                 .peers
                 .read()
-                .good_custody_subnet_peer_range_sync(subnet_id, batch_epoch)
-                .next()
+                .good_custody_subnet_peer_range_sync(subnet_id, batch_epoch).choose(&mut rand::rng())
             {
                 peer_to_columns
                     .entry(*custody_peer)
diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs
index fb7689ed392..695262523a5 100644
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -146,7 +146,7 @@ pub enum BatchState<E: EthSpec> {
     /// The batch has been completely downloaded and is ready for processing.
     AwaitingProcessing(ResponsiblePeers, Vec<RpcBlock<E>>, Instant),
     /// The batch is being processed.
-    Processing(Attempt, ResponsiblePeers), // todo(pawan): attempt contains the peer, remove that
+    Processing(Attempt, ResponsiblePeers),
     /// The batch was successfully processed and is waiting to be validated.
     ///
     /// It is not sufficient to process a batch successfully to consider it correct. This is
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index e762bfb55f0..8006a501de7 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -990,7 +990,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             .collect();
         debug!(
             ?awaiting_downloads,
-            src, "Attempting to send batches awaiting downlaod"
+            src, "Attempting to send batches awaiting download"
         );
 
         for batch_id in awaiting_downloads {
@@ -1225,20 +1225,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     ) -> bool {
         if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) {
             // Require peers on all sampling column subnets before sending batches
+            let sampling_subnets = network.network_globals().sampling_subnets();
             network
                 .network_globals()
-                .sampling_subnets()
-                .iter()
-                .all(|subnet_id| {
-                    let peer_count = network
-                        .network_globals()
-                        .peers
-                        .read()
-                        .good_custody_subnet_peer_range_sync(*subnet_id, epoch)
-                        .count();
-
-                    peer_count > 0
-                })
+                .peers
+                .read()
+                .has_good_custody_range_sync_peer(&sampling_subnets, epoch)
         } else {
             true
         }

From bf09d57e29a45409172b5e7cd267785a06544e59 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 11 Sep 2025 15:06:07 -0700
Subject: [PATCH 31/49] Fix some issues from lion's review

---
 .../network/src/sync/backfill_sync/mod.rs     | 16 +++---
 .../src/sync/block_sidecar_coupling.rs        | 10 ++--
 .../network/src/sync/network_context.rs       |  4 +-
 .../network/src/sync/range_sync/batch.rs      | 41 +++++++-------
 .../network/src/sync/range_sync/chain.rs      | 53 ++++++-------------
 .../network/src/sync/range_sync/mod.rs        |  2 +-
 .../network/src/sync/range_sync/range.rs      | 10 ++--
 7 files changed, 56 insertions(+), 80 deletions(-)

diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index dc27892092f..e953244976b 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -16,7 +16,7 @@ use crate::sync::network_context::{
 };
 use crate::sync::range_sync::{
     BatchConfig, BatchId, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
-    ResponsiblePeers,
+    BatchPeers,
 };
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
@@ -383,7 +383,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
         batch_id: BatchId,
         request_id: Id,
         blocks: Vec<RpcBlock<T::EthSpec>>,
-        responsible_peers: ResponsiblePeers,
+        batch_peers: BatchPeers,
     ) -> Result<ProcessResult, BackFillError> {
         // check if we have this batch
         let Some(batch) = self.batches.get_mut(&batch_id) else {
@@ -402,7 +402,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             return Ok(ProcessResult::Successful);
         }
 
-        match batch.download_completed(blocks, responsible_peers) {
+        match batch.download_completed(blocks, batch_peers) {
             Ok(received) => {
                 let awaiting_batches =
                     self.processing_target.saturating_sub(batch_id) / BACKFILL_EPOCHS_PER_BATCH;
@@ -558,7 +558,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             }
         };
 
-        let Some(responsible_peers) = batch.processing_peers() else {
+        let Some(batch_peers) = batch.processing_peers() else {
             self.fail_sync(BackFillError::BatchInvalidState(
                 batch_id,
                 String::from("Peer does not exist"),
@@ -570,7 +570,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             ?result,
             %batch,
             batch_epoch = %batch_id,
-            ?responsible_peers,
+            ?batch_peers,
             // client = %network.client_type(peer),
             "Backfill batch processed"
         );
@@ -616,7 +616,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                 penalty,
                 faulty_component,
             } => {
-                let Some(responsible_peers) = batch.responsible_peers() else {
+                let Some(batch_peers) = batch.processing_peers() else {
                     error!(?batch_id, "Responsible peers not found for a failed batch");
                     return self
                         .fail_sync(BackFillError::BatchProcessingFailed(batch_id))
@@ -625,11 +625,11 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                 // Penalize the peer appropriately.
                 match faulty_component {
                     Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
-                        network.report_peer(responsible_peers.block_blob, *penalty, "faulty_batch");
+                        network.report_peer(batch_peers.block_and_blob, *penalty, "faulty_batch");
                     }
                     // todo(pawan): clean this up
                     Some(FaultyComponent::Columns(faulty_columns)) => {
-                        for (peer, columns) in responsible_peers.data_columns.iter() {
+                        for (peer, columns) in batch_peers.data_columns.iter() {
                             for faulty_column in faulty_columns {
                                 if columns.contains(faulty_column) {
                                     network.report_peer(*peer, *penalty, "faulty_batch");
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index b0c2588e292..6a1c162e674 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -1,5 +1,5 @@
 use crate::sync::network_context::MAX_COLUMN_RETRIES;
-use crate::sync::range_sync::ResponsiblePeers;
+use crate::sync::range_sync::BatchPeers;
 use beacon_chain::{
     block_verification_types::RpcBlock, data_column_verification::CustodyDataColumn, get_block_root,
 };
@@ -146,9 +146,9 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
     }
 
     /// Returns the peers that we requested the blocks, blobs and columns for this component.
-    pub fn responsible_peers(&self) -> ResponsiblePeers {
-        ResponsiblePeers {
-            block_blob: self.block_peer,
+    pub fn responsible_peers(&self) -> BatchPeers {
+        BatchPeers {
+            block_and_blob: self.block_peer,
             data_columns: match &self.block_data_request {
                 RangeBlockDataRequest::NoData | RangeBlockDataRequest::Blobs(_) => HashMap::new(),
                 RangeBlockDataRequest::DataColumns {
@@ -224,7 +224,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
 
                 Ok(())
             }
-            _ => Err("Invalid initialization".to_string()),
+            _ => Err("Invalid state: expected DataColumnsFromRoot".to_string()),
         }
     }
 
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 999fafcff8c..840e6ab6ae2 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -18,7 +18,7 @@ use crate::sync::block_sidecar_coupling::CouplingError;
 use crate::sync::network_context::requests::{
     BlobsByRootSingleBlockRequest, DataColumnsByRootRangeRequestItems,
 };
-use crate::sync::range_sync::ResponsiblePeers;
+use crate::sync::range_sync::BatchPeers;
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
 use custody::CustodyRequestResult;
@@ -950,7 +950,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         range_block_component: RangeBlockComponent<T::EthSpec>,
     ) -> Option<(
         Result<Vec<RpcBlock<T::EthSpec>>, RpcResponseError>,
-        ResponsiblePeers,
+        BatchPeers,
     )> {
         let Entry::Occupied(mut entry) = self.components_by_range_requests.entry(id) else {
             metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &["range_blocks"]);
diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs
index 695262523a5..d2fa0d4eb96 100644
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -131,8 +131,10 @@ impl<E: EthSpec, B: BatchConfig> fmt::Display for BatchInfo<E, B> {
 ///
 /// This is used for penalizing in case of invalid batches.
 #[derive(Debug, Clone)]
-pub struct ResponsiblePeers {
-    pub block_blob: PeerId,
+pub struct BatchPeers {
+    /// Note: we send the blob request to the same peer as the block request
+    /// Hence, block and blob peers would be the same.
+    pub block_and_blob: PeerId,
     pub data_columns: HashMap<PeerId, Vec<ColumnIndex>>,
 }
 
@@ -144,9 +146,9 @@ pub enum BatchState<E: EthSpec> {
     /// The batch is being downloaded.
     Downloading(Id),
     /// The batch has been completely downloaded and is ready for processing.
-    AwaitingProcessing(ResponsiblePeers, Vec<RpcBlock<E>>, Instant),
+    AwaitingProcessing(BatchPeers, Vec<RpcBlock<E>>, Instant),
     /// The batch is being processed.
-    Processing(Attempt, ResponsiblePeers),
+    Processing(Attempt, BatchPeers),
     /// The batch was successfully processed and is waiting to be validated.
     ///
     /// It is not sufficient to process a batch successfully to consider it correct. This is
@@ -223,7 +225,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
     }
 
     /// Returns the peers that are currently responsible for progressing the state of the batch.
-    pub fn processing_peers(&self) -> Option<&ResponsiblePeers> {
+    pub fn processing_peers(&self) -> Option<&BatchPeers> {
         match &self.state {
             BatchState::AwaitingDownload
             | BatchState::Failed
@@ -287,7 +289,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
     pub fn download_completed(
         &mut self,
         blocks: Vec<RpcBlock<E>>,
-        responsible_peers: ResponsiblePeers,
+        responsible_peers: BatchPeers,
     ) -> Result<usize /* Received blocks */, WrongState> {
         match self.state.poison() {
             BatchState::Downloading(_) => {
@@ -364,7 +366,7 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
         match self.state.poison() {
             BatchState::AwaitingProcessing(responsible_peers, blocks, start_instant) => {
                 self.state = BatchState::Processing(
-                    Attempt::new::<B, E>(responsible_peers.block_blob, &blocks),
+                    Attempt::new::<B, E>(responsible_peers.block_and_blob, &blocks),
                     responsible_peers,
                 );
                 Ok((blocks, start_instant.elapsed()))
@@ -380,17 +382,6 @@ impl<E: EthSpec, B: BatchConfig> BatchInfo<E, B> {
         }
     }
 
-    pub fn responsible_peers(&self) -> Option<&ResponsiblePeers> {
-        match &self.state {
-            BatchState::AwaitingDownload
-            | BatchState::Failed
-            | BatchState::Poisoned
-            | BatchState::Downloading(_)
-            | BatchState::AwaitingValidation(_) => None,
-            BatchState::AwaitingProcessing(r, _, _) | BatchState::Processing(_, r) => Some(r),
-        }
-    }
-
     pub fn processing_completed(
         &mut self,
         processing_result: BatchProcessingResult,
@@ -486,19 +477,23 @@ impl Attempt {
 impl<E: EthSpec> std::fmt::Debug for BatchState<E> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            BatchState::Processing(Attempt { peer_id, hash: _ }, responsible_peers) => {
-                write!(f, "Processing({}) {:?}", peer_id, responsible_peers)
+            BatchState::Processing(Attempt { peer_id, hash: _ }, batch_peers) => {
+                write!(
+                    f,
+                    "Processing({}) {}",
+                    peer_id, batch_peers.block_and_blob
+                )
             }
             BatchState::AwaitingValidation(Attempt { peer_id, hash: _ }) => {
                 write!(f, "AwaitingValidation({})", peer_id)
             }
             BatchState::AwaitingDownload => f.write_str("AwaitingDownload"),
             BatchState::Failed => f.write_str("Failed"),
-            BatchState::AwaitingProcessing(responsible_peers, blocks, _) => {
+            BatchState::AwaitingProcessing(batch_peers, blocks, _) => {
                 write!(
                     f,
-                    "AwaitingProcessing({:?}, {:?} blocks)",
-                    responsible_peers,
+                    "AwaitingProcessing({}, {:?} blocks)",
+                    batch_peers.block_and_blob,
                     blocks.len()
                 )
             }
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 8006a501de7..9d954c0dc72 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -5,7 +5,7 @@ use crate::network_beacon_processor::ChainSegmentProcessId;
 use crate::sync::block_sidecar_coupling::CouplingError;
 use crate::sync::manager::FaultyComponent;
 use crate::sync::network_context::{RangeRequestId, RpcRequestSendError, RpcResponseError};
-use crate::sync::range_sync::batch::ResponsiblePeers;
+use crate::sync::range_sync::batch::BatchPeers;
 use crate::sync::{BatchOperationOutcome, BatchProcessResult, network_context::SyncNetworkContext};
 use beacon_chain::BeaconChainTypes;
 use beacon_chain::block_verification_types::RpcBlock;
@@ -225,10 +225,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     pub fn on_block_response(
         &mut self,
         network: &mut SyncNetworkContext<T>,
+        batch_peers: BatchPeers,
         batch_id: BatchId,
         request_id: Id,
         blocks: Vec<RpcBlock<T::EthSpec>>,
-        responsible_peers: ResponsiblePeers,
     ) -> ProcessingResult {
         let _guard = self.span.clone().entered();
         // check if we have this batch
@@ -255,8 +255,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         // A stream termination has been sent. This batch has ended. Process a completed batch.
         // Remove the request from the peer's active batches
 
-        // TODO(das): should use peer group here https://github.com/sigp/lighthouse/issues/6258
-        let received = batch.download_completed(blocks, responsible_peers.clone())?;
+        let received = batch.download_completed(blocks, batch_peers.clone())?;
         let awaiting_batches = batch_id
             .saturating_sub(self.optimistic_start.unwrap_or(self.processing_target))
             / EPOCHS_PER_BATCH;
@@ -265,7 +264,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             blocks = received,
             batch_state = self.visualize_batch_state(),
             %awaiting_batches,
-            ?responsible_peers,
+            ?batch_peers,
             "Batch downloaded"
         );
 
@@ -548,7 +547,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 penalty,
                 faulty_component,
             } => {
-                let Some(responsible_peers) = batch.responsible_peers() else {
+                let Some(batch_peers) = batch.processing_peers() else {
                     crit!(
                         current_state = ?batch.state(),
                         "Inconsistent state, batch must have been in processing state"
@@ -561,10 +560,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 // Penalize the peer appropriately.
                 match faulty_component {
                     Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
-                        network.report_peer(responsible_peers.block_blob, *penalty, "faulty_batch");
+                        network.report_peer(batch_peers.block_and_blob, *penalty, "faulty_batch");
                     }
                     Some(FaultyComponent::Columns(faulty_columns)) => {
-                        for (peer, columns) in responsible_peers.data_columns.iter() {
+                        for (peer, columns) in batch_peers.data_columns.iter() {
                             for faulty_column in faulty_columns {
                                 if columns.contains(faulty_column) {
                                     network.report_peer(*peer, *penalty, "faulty_batch");
@@ -879,10 +878,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
     pub fn inject_error(
         &mut self,
         network: &mut SyncNetworkContext<T>,
+        batch_peers: BatchPeers,
         batch_id: BatchId,
         request_id: Id,
         err: RpcResponseError,
-        responsible_peers: ResponsiblePeers,
     ) -> ProcessingResult {
         let _guard = self.span.clone().entered();
         let batch_state = self.visualize_batch_state();
@@ -934,7 +933,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 debug!(
                     batch_epoch = %batch_id,
                     batch_state = ?batch.state(),
-                    ?responsible_peers,
+                    ?batch_peers,
                     %request_id,
                     ?batch_state,
                     "Batch not expecting block"
@@ -945,12 +944,12 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 batch_epoch = %batch_id,
                 batch_state = ?batch.state(),
                 error = ?err,
-                ?responsible_peers,
+                ?batch_peers,
                 %request_id,
                 "Batch download error"
             );
             if let BatchOperationOutcome::Failed { blacklist } =
-                batch.download_failed(Some(responsible_peers.block_blob))?
+                batch.download_failed(Some(batch_peers.block_and_blob))?
             {
                 return Err(RemoveChain::ChainFailed {
                     blacklist,
@@ -961,7 +960,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         } else {
             debug!(
                 batch_epoch = %batch_id,
-                ?responsible_peers,
+                ?batch_peers,
                 %request_id,
                 batch_state,
                 "Batch not found"
@@ -1078,7 +1077,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 },
             }
         } else {
-            debug!(?self.to_be_downloaded, ?self.processing_target,"Did not get batch");
+            debug!(?self.to_be_downloaded, ?self.processing_target, "Did not get batch");
         }
 
         Ok(KeepChain)
@@ -1144,6 +1143,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
         network: &mut SyncNetworkContext<T>,
     ) -> Result<KeepChain, RemoveChain> {
         let _guard = self.span.clone().entered();
+        debug!("Resuming chain");
+        // attempt to download any batches stuck in the `AwaitingDownload` state because of
+        // a lack of peers earlier
+        self.attempt_send_awaiting_download_batches(network, "resume")?;
         // Request more batches if needed.
         self.request_batches(network)?;
         // If there is any batch ready for processing, send it.
@@ -1190,28 +1193,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             self.send_batch(network, batch_id)?;
         }
 
-        // Force requesting the `processing_batch` to progress sync if required
-        if !self.batches.contains_key(&self.processing_target) {
-            debug!(?self.processing_target,"Forcing requesting processing_target to progress sync");
-            if !self.good_peers_on_sampling_subnets(self.processing_target, network) {
-                debug!(
-                    src = "request_batches_processing",
-                    "Waiting for peers to be available on sampling column subnets"
-                );
-                return Ok(KeepChain);
-            }
-
-            if let Entry::Vacant(entry) = self.batches.entry(self.processing_target) {
-                let batch_type = network.batch_type(self.processing_target);
-                let processing_batch =
-                    BatchInfo::new(&self.processing_target, EPOCHS_PER_BATCH, batch_type);
-                entry.insert(processing_batch);
-                self.send_batch(network, self.processing_target)?;
-            } else {
-                self.attempt_send_awaiting_download_batches(network, "request_batches_processing")?;
-            }
-        }
-
         // No more batches, simply stop
         Ok(KeepChain)
     }
diff --git a/beacon_node/network/src/sync/range_sync/mod.rs b/beacon_node/network/src/sync/range_sync/mod.rs
index 04b622cb42f..265840166ca 100644
--- a/beacon_node/network/src/sync/range_sync/mod.rs
+++ b/beacon_node/network/src/sync/range_sync/mod.rs
@@ -9,7 +9,7 @@ mod sync_type;
 
 pub use batch::{
     BatchConfig, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
-    ByRangeRequestType, ResponsiblePeers,
+    ByRangeRequestType, BatchPeers,
 };
 pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
 #[cfg(test)]
diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs
index 703164d6874..8f18f4e8f06 100644
--- a/beacon_node/network/src/sync/range_sync/range.rs
+++ b/beacon_node/network/src/sync/range_sync/range.rs
@@ -46,7 +46,7 @@ use crate::metrics;
 use crate::status::ToStatusMessage;
 use crate::sync::BatchProcessResult;
 use crate::sync::network_context::{RpcResponseError, SyncNetworkContext};
-use crate::sync::range_sync::ResponsiblePeers;
+use crate::sync::range_sync::BatchPeers;
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
 use lighthouse_network::rpc::GoodbyeReason;
@@ -204,7 +204,7 @@ where
     pub fn blocks_by_range_response(
         &mut self,
         network: &mut SyncNetworkContext<T>,
-        responsible_peers: ResponsiblePeers,
+        batch_peers: BatchPeers,
         chain_id: ChainId,
         batch_id: BatchId,
         request_id: Id,
@@ -212,7 +212,7 @@ where
     ) {
         // check if this chunk removes the chain
         match self.chains.call_by_id(chain_id, |chain| {
-            chain.on_block_response(network, batch_id, request_id, blocks, responsible_peers)
+            chain.on_block_response(network, batch_peers, batch_id, request_id, blocks)
         }) {
             Ok((removed_chain, sync_type)) => {
                 if let Some((removed_chain, remove_reason)) = removed_chain {
@@ -296,7 +296,7 @@ where
     pub fn inject_error(
         &mut self,
         network: &mut SyncNetworkContext<T>,
-        responsible_peers: ResponsiblePeers,
+        batch_peers: BatchPeers,
         batch_id: BatchId,
         chain_id: ChainId,
         request_id: Id,
@@ -304,7 +304,7 @@ where
     ) {
         // check that this request is pending
         match self.chains.call_by_id(chain_id, |chain| {
-            chain.inject_error(network, batch_id, request_id, err, responsible_peers)
+            chain.inject_error(network, batch_peers, batch_id, request_id, err)
         }) {
             Ok((removed_chain, sync_type)) => {
                 if let Some((removed_chain, remove_reason)) = removed_chain {

From cffbd34f3f8c7ea40887e9ff24563a3c687e11be Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 11 Sep 2025 18:19:16 -0700
Subject: [PATCH 32/49] Reduce code duplication

---
 .../src/service/api_types.rs                  |  13 ++
 .../network/src/sync/backfill_sync/mod.rs     |   8 +-
 .../network/src/sync/network_context.rs       | 136 ++++--------------
 .../network/src/sync/range_sync/batch.rs      |   8 ++
 .../network/src/sync/range_sync/chain.rs      |   8 +-
 5 files changed, 58 insertions(+), 115 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs
index 645ab69ce50..efcbcaf9561 100644
--- a/beacon_node/lighthouse_network/src/service/api_types.rs
+++ b/beacon_node/lighthouse_network/src/service/api_types.rs
@@ -9,6 +9,12 @@ use types::{
 
 pub type Id = u32;
 
+#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
+pub enum RangeRequestType {
+    ForwardSync,
+    BackfillSync,
+}
+
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
 pub struct SingleLookupReqId {
     pub lookup_id: Id,
@@ -111,6 +117,13 @@ impl RangeRequestId {
             } => *batch_id,
         }
     }
+
+    pub fn batch_type(&self) -> RangeRequestType {
+        match &self {
+            RangeRequestId::BackfillSync { .. } => RangeRequestType::BackfillSync,
+            RangeRequestId::RangeSync { .. } => RangeRequestType::ForwardSync,
+        }
+    }
 }
 
 // TODO(das) refactor in a separate PR. We might be able to remove this and replace
diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index e953244976b..6c94c1821a7 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -15,12 +15,12 @@ use crate::sync::network_context::{
     RangeRequestId, RpcRequestSendError, RpcResponseError, SyncNetworkContext,
 };
 use crate::sync::range_sync::{
-    BatchConfig, BatchId, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
-    BatchPeers,
+    BatchConfig, BatchId, BatchInfo, BatchOperationOutcome, BatchPeers, BatchProcessingResult,
+    BatchState,
 };
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes};
-use lighthouse_network::service::api_types::Id;
+use lighthouse_network::service::api_types::{Id, RangeRequestType};
 use lighthouse_network::types::{BackFillState, NetworkGlobals};
 use lighthouse_network::{PeerAction, PeerId};
 use logging::crit;
@@ -1114,7 +1114,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                 self.include_next_batch(network)
             }
             Entry::Vacant(entry) => {
-                let batch_type = network.batch_type(batch_id);
+                let batch_type = network.batch_type(batch_id, RangeRequestType::BackfillSync);
                 entry.insert(BatchInfo::new(
                     &batch_id,
                     BACKFILL_EPOCHS_PER_BATCH,
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 840e6ab6ae2..a68927d811d 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -29,7 +29,7 @@ pub use lighthouse_network::service::api_types::RangeRequestId;
 use lighthouse_network::service::api_types::{
     AppRequestId, BlobsByRangeRequestId, BlocksByRangeRequestId, ComponentsByRangeRequestId,
     CustodyId, CustodyRequester, DataColumnsByRangeRequestId, DataColumnsByRootRequestId,
-    DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId,
+    DataColumnsByRootRequester, Id, RangeRequestType, SingleLookupReqId, SyncRequestId,
 };
 use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource};
 use lighthouse_tracing::SPAN_OUTGOING_RANGE_REQUEST;
@@ -768,6 +768,17 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .transpose()?;
 
         let epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
+
+        let data_column_by_root_range_request =
+            // with this variant, we request columns by root after we receive
+            // a successful blocks by range response.
+            if matches!(batch_type, ByRangeRequestType::BlocksAndColumnsSeparate) {
+                Some(HashSet::from_iter(
+                    self.chain.sampling_columns_for_epoch(epoch).iter().copied(),
+                ))
+            } else {
+                None
+            };
         let info = RangeBlockComponentsRequest::new(
             blocks_req_id,
             blobs_req_id,
@@ -777,108 +788,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     self.chain.sampling_columns_for_epoch(epoch).to_vec(),
                 )
             }),
-            // We are requesting data columns by range here
-            None,
-            range_request_span,
-        );
-        self.components_by_range_requests.insert(id, info);
-
-        Ok(id.id)
-    }
-
-    /// A blocks by range request sent by the range sync algorithm
-    ///
-    /// This function is used when we want to request data columns by root instead of range.
-    /// Pre-fulu, it works similar to `Self::block_components_by_range_request`.
-    pub fn block_components_by_range_request_without_components(
-        &mut self,
-        batch_type: ByRangeRequestType,
-        request: BlocksByRangeRequest,
-        requester: RangeRequestId,
-        peers: &HashSet<PeerId>,
-        peers_to_deprioritize: &HashSet<PeerId>,
-    ) -> Result<Id, RpcRequestSendError> {
-        let range_request_span = debug_span!(
-            parent: None,
-            SPAN_OUTGOING_RANGE_REQUEST,
-            range_req_id = %requester,
-            peers = peers.len()
-        );
-        let _guard = range_request_span.clone().entered();
-        let active_request_count_by_peer = self.active_request_count_by_peer();
-
-        let Some(block_peer) = peers
-            .iter()
-            .map(|peer| {
-                (
-                    // If contains -> 1 (order after), not contains -> 0 (order first)
-                    peers_to_deprioritize.contains(peer),
-                    // Prefer peers with less overall requests
-                    active_request_count_by_peer.get(peer).copied().unwrap_or(0),
-                    // Random factor to break ties, otherwise the PeerID breaks ties
-                    rand::random::<u32>(),
-                    peer,
-                )
-            })
-            .min()
-            .map(|(_, _, _, peer)| *peer)
-        else {
-            // Backfill and forward sync handle this condition gracefully.
-            // - Backfill sync: will pause waiting for more peers to join
-            // - Forward sync: can never happen as the chain is dropped when removing the last peer.
-            return Err(RpcRequestSendError::NoPeer(NoPeerError::BlockPeer));
-        };
-
-        // Create the overall components_by_range request ID before its individual components
-        let id = ComponentsByRangeRequestId {
-            id: self.next_id(),
-            requester,
-        };
-
-        let blocks_req_id = self.send_blocks_by_range_request(
-            block_peer,
-            request.clone(),
-            id,
-            new_range_request_span!(
-                self,
-                "outgoing_blocks_by_range",
-                range_request_span.clone(),
-                block_peer
-            ),
-        )?;
-
-        let blobs_req_id = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) {
-            Some(self.send_blobs_by_range_request(
-                block_peer,
-                BlobsByRangeRequest {
-                    start_slot: *request.start_slot(),
-                    count: *request.count(),
-                },
-                id,
-                new_range_request_span!(
-                    self,
-                    "outgoing_blobs_by_range",
-                    range_request_span.clone(),
-                    block_peer
-                ),
-            )?)
-        } else {
-            None
-        };
-
-        let epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch());
-        let info = RangeBlockComponentsRequest::new(
-            blocks_req_id,
-            blobs_req_id,
-            None,
-            // request data columns by root only if this batch requires requesting columns
-            if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) {
-                Some(HashSet::from_iter(
-                    self.chain.sampling_columns_for_epoch(epoch).iter().copied(),
-                ))
-            } else {
-                None
-            },
+            data_column_by_root_range_request,
             range_request_span,
         );
         self.components_by_range_requests.insert(id, info);
@@ -1618,7 +1528,11 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
 
     /// Check whether a batch for this epoch (and only this epoch) should request just blocks or
     /// blocks and blobs.
-    pub fn batch_type(&self, epoch: types::Epoch) -> ByRangeRequestType {
+    pub fn batch_type(
+        &self,
+        epoch: types::Epoch,
+        request_type: RangeRequestType,
+    ) -> ByRangeRequestType {
         // Induces a compile time panic if this doesn't hold true.
         #[allow(clippy::assertions_on_constants)]
         const _: () = assert!(
@@ -1632,7 +1546,14 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .data_availability_checker
             .data_columns_required_for_epoch(epoch)
         {
-            ByRangeRequestType::BlocksAndColumns
+            match request_type {
+                // Currently, we download blocks and columns separately when we forward sync as
+                // requesting columns by root is less ambiguous when there are multiple heads.
+                // For backfill, since there is just one chain, it makes more sense to download
+                // blocks and columns together.
+                RangeRequestType::BackfillSync => ByRangeRequestType::BlocksAndColumns,
+                RangeRequestType::ForwardSync => ByRangeRequestType::BlocksAndColumnsSeparate,
+            }
         } else if self
             .chain
             .data_availability_checker
@@ -1775,7 +1696,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         let batch_epoch = id.batch_id();
         // Return early if no columns are required for this epoch
         if !matches!(
-            self.batch_type(batch_epoch),
+            self.batch_type(batch_epoch, id.parent_request_id.requester.batch_type()),
             ByRangeRequestType::BlocksAndColumns
         ) {
             return Ok(());
@@ -1828,7 +1749,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 .network_globals()
                 .peers
                 .read()
-                .good_custody_subnet_peer_range_sync(subnet_id, batch_epoch).choose(&mut rand::rng())
+                .good_custody_subnet_peer_range_sync(subnet_id, batch_epoch)
+                .choose(&mut rand::rng())
             {
                 peer_to_columns
                     .entry(*custody_peer)
diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs
index d2fa0d4eb96..748cf8ac1e4 100644
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -21,7 +21,15 @@ const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3;
 #[derive(Debug, Copy, Clone, Display)]
 #[strum(serialize_all = "snake_case")]
 pub enum ByRangeRequestType {
+    /// This variant requests the blocks and columns
+    /// simaltaneously and then tries to couple the
+    /// responses.
     BlocksAndColumns,
+    /// This variant requests the blocks first using
+    /// a byrange request and then requests the data columns
+    /// for the received blocks using the `DataColumnsByRoot`
+    /// root request.
+    BlocksAndColumnsSeparate,
     BlocksAndBlobs,
     Blocks,
 }
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 9d954c0dc72..894c2756206 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -9,7 +9,7 @@ use crate::sync::range_sync::batch::BatchPeers;
 use crate::sync::{BatchOperationOutcome, BatchProcessResult, network_context::SyncNetworkContext};
 use beacon_chain::BeaconChainTypes;
 use beacon_chain::block_verification_types::RpcBlock;
-use lighthouse_network::service::api_types::Id;
+use lighthouse_network::service::api_types::{Id, RangeRequestType};
 use lighthouse_network::{PeerAction, PeerId};
 use lighthouse_tracing::SPAN_SYNCING_CHAIN;
 use logging::crit;
@@ -1025,7 +1025,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 .cloned()
                 .collect::<HashSet<_>>();
 
-            match network.block_components_by_range_request_without_components(
+            match network.block_components_by_range_request(
                 batch_type,
                 request,
                 RangeRequestId::RangeSync {
@@ -1173,7 +1173,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             }
 
             if let Entry::Vacant(entry) = self.batches.entry(epoch) {
-                let batch_type = network.batch_type(epoch);
+                let batch_type = network.batch_type(epoch, RangeRequestType::ForwardSync);
                 let optimistic_batch = BatchInfo::new(&epoch, EPOCHS_PER_BATCH, batch_type);
                 entry.insert(optimistic_batch);
                 self.send_batch(network, epoch)?;
@@ -1270,7 +1270,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 self.include_next_batch(network)
             }
             Entry::Vacant(entry) => {
-                let batch_type = network.batch_type(next_batch_id);
+                let batch_type = network.batch_type(next_batch_id, RangeRequestType::ForwardSync);
                 entry.insert(BatchInfo::new(&next_batch_id, EPOCHS_PER_BATCH, batch_type));
                 self.to_be_downloaded += EPOCHS_PER_BATCH;
                 Some(next_batch_id)

From 08bba3f3beda4e2de048edb2ccd4f426acbb7570 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 11 Sep 2025 18:24:25 -0700
Subject: [PATCH 33/49] fmt

---
 beacon_node/network/src/sync/range_sync/batch.rs | 6 +-----
 beacon_node/network/src/sync/range_sync/mod.rs   | 4 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs
index 748cf8ac1e4..24f21a9441d 100644
--- a/beacon_node/network/src/sync/range_sync/batch.rs
+++ b/beacon_node/network/src/sync/range_sync/batch.rs
@@ -486,11 +486,7 @@ impl<E: EthSpec> std::fmt::Debug for BatchState<E> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             BatchState::Processing(Attempt { peer_id, hash: _ }, batch_peers) => {
-                write!(
-                    f,
-                    "Processing({}) {}",
-                    peer_id, batch_peers.block_and_blob
-                )
+                write!(f, "Processing({}) {}", peer_id, batch_peers.block_and_blob)
             }
             BatchState::AwaitingValidation(Attempt { peer_id, hash: _ }) => {
                 write!(f, "AwaitingValidation({})", peer_id)
diff --git a/beacon_node/network/src/sync/range_sync/mod.rs b/beacon_node/network/src/sync/range_sync/mod.rs
index 265840166ca..1218e0cd09c 100644
--- a/beacon_node/network/src/sync/range_sync/mod.rs
+++ b/beacon_node/network/src/sync/range_sync/mod.rs
@@ -8,8 +8,8 @@ mod range;
 mod sync_type;
 
 pub use batch::{
-    BatchConfig, BatchInfo, BatchOperationOutcome, BatchProcessingResult, BatchState,
-    ByRangeRequestType, BatchPeers,
+    BatchConfig, BatchInfo, BatchOperationOutcome, BatchPeers, BatchProcessingResult, BatchState,
+    ByRangeRequestType,
 };
 pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
 #[cfg(test)]

From 9db4c3071de79c22889769531766963eb6ffc751 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Tue, 16 Sep 2025 16:38:19 -0700
Subject: [PATCH 34/49] Fix small bug

---
 beacon_node/network/src/sync/network_context.rs | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index a68927d811d..7b76e3c32ff 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -1697,14 +1697,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         // Return early if no columns are required for this epoch
         if !matches!(
             self.batch_type(batch_epoch, id.parent_request_id.requester.batch_type()),
-            ByRangeRequestType::BlocksAndColumns
-        ) {
-            return Ok(());
-        }
-        // Return early if this is a backfill batch, backfill batches are handled by range requests instead of root
-        if matches!(
-            id.parent_request_id.requester,
-            RangeRequestId::BackfillSync { .. }
+            ByRangeRequestType::BlocksAndColumnsSeparate
         ) {
             return Ok(());
         }

From e3aed89749a5f0a92ba8a45d48c3e34f181c1cb6 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Tue, 16 Sep 2025 17:46:21 -0700
Subject: [PATCH 35/49] Remove retry test that we do not use anymore

---
 .../src/sync/block_sidecar_coupling.rs        | 90 -------------------
 1 file changed, 90 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 6a1c162e674..2400e05c800 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -676,7 +676,6 @@ impl<I: PartialEq + std::fmt::Display, T> ByRangeRequest<I, T> {
 #[cfg(test)]
 mod tests {
     use super::RangeBlockComponentsRequest;
-    use crate::sync::network_context::MAX_COLUMN_RETRIES;
     use beacon_chain::test_utils::{
         NumBlobs, generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec,
     };
@@ -1100,93 +1099,4 @@ mod tests {
         let rpc_blocks = result.unwrap();
         assert_eq!(rpc_blocks.len(), 2);
     }
-
-    #[test]
-    fn max_retries_exceeded_behavior() {
-        // GIVEN: A request where peers consistently fail to provide required columns
-        let spec = test_spec::<E>();
-        let expected_custody_columns = vec![1, 2];
-        let mut rng = XorShiftRng::from_seed([42; 16]);
-        let blocks = (0..1)
-            .map(|_| {
-                generate_rand_block_and_data_columns::<E>(
-                    ForkName::Fulu,
-                    NumBlobs::Number(1),
-                    &mut rng,
-                    &spec,
-                )
-            })
-            .collect::<Vec<_>>();
-
-        let components_id = components_id();
-        let blocks_req_id = blocks_id(components_id);
-        let columns_req_id = expected_custody_columns
-            .iter()
-            .enumerate()
-            .map(|(i, column)| (columns_id(i as Id, components_id), vec![*column]))
-            .collect::<Vec<_>>();
-        let mut info = RangeBlockComponentsRequest::<E>::new(
-            blocks_req_id,
-            None,
-            Some((columns_req_id.clone(), expected_custody_columns.clone())),
-            None,
-            Span::none(),
-        );
-
-        // AND: All blocks are received
-        info.add_blocks(
-            blocks_req_id,
-            blocks.iter().map(|b| b.0.clone().into()).collect(),
-        )
-        .unwrap();
-
-        // AND: Only partial custody columns are provided (column 1 but not 2)
-        let (req1, _) = columns_req_id.first().unwrap();
-        info.add_custody_columns(
-            *req1,
-            blocks
-                .iter()
-                .flat_map(|b| b.1.iter().filter(|d| d.index == 1).cloned())
-                .collect(),
-        )
-        .unwrap();
-
-        // AND: Column 2 request completes with empty data (persistent peer failure)
-        let (req2, _) = columns_req_id.get(1).unwrap();
-        info.add_custody_columns(*req2, vec![]).unwrap();
-
-        // WHEN: Multiple retry attempts are made (up to max retries)
-        for _ in 0..MAX_COLUMN_RETRIES {
-            let result = info.responses(&spec).unwrap();
-            assert!(result.is_err());
-
-            if let Err(super::CouplingError::DataColumnPeerFailure {
-                exceeded_retries, ..
-            }) = &result
-                && *exceeded_retries
-            {
-                break;
-            }
-        }
-
-        // AND: One final attempt after exceeding max retries
-        let result = info.responses(&spec).unwrap();
-
-        // THEN: Should fail with exceeded_retries = true
-        assert!(result.is_err());
-        if let Err(super::CouplingError::DataColumnPeerFailure {
-            error: _,
-            faulty_peers,
-            action,
-            exceeded_retries,
-        }) = result
-        {
-            assert_eq!(faulty_peers.len(), 1); // column 2 missing
-            assert_eq!(faulty_peers[0].0, 2); // column index 2
-            assert!(matches!(action, PeerAction::LowToleranceError));
-            assert!(exceeded_retries); // Should be true after max retries
-        } else {
-            panic!("Expected PeerFailure error with exceeded_retries=true");
-        }
-    }
 }

From b3b3756e6f2565acb80599b5cc0c00cd3a333cbd Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 17 Sep 2025 15:01:46 -0700
Subject: [PATCH 36/49] Fix tests

---
 beacon_node/network/src/sync/tests/range.rs | 84 +++++++++++++++++++--
 1 file changed, 78 insertions(+), 6 deletions(-)

diff --git a/beacon_node/network/src/sync/tests/range.rs b/beacon_node/network/src/sync/tests/range.rs
index cb728a90c1b..516b66c45eb 100644
--- a/beacon_node/network/src/sync/tests/range.rs
+++ b/beacon_node/network/src/sync/tests/range.rs
@@ -16,7 +16,7 @@ use lighthouse_network::rpc::methods::{
 };
 use lighthouse_network::service::api_types::{
     AppRequestId, BlobsByRangeRequestId, BlocksByRangeRequestId, DataColumnsByRangeRequestId,
-    SyncRequestId,
+    DataColumnsByRootRequestId, SyncRequestId,
 };
 use lighthouse_network::{PeerId, SyncInfo};
 use std::time::Duration;
@@ -36,6 +36,7 @@ enum ByRangeDataRequestIds {
     PreDeneb,
     PrePeerDAS(BlobsByRangeRequestId, PeerId),
     PostPeerDAS(Vec<(DataColumnsByRangeRequestId, PeerId)>),
+    PostPeerDASByRoot(Vec<(DataColumnsByRootRequestId, PeerId)>),
 }
 
 /// Sync tests are usually written in the form:
@@ -233,7 +234,8 @@ impl TestRig {
             });
 
         let by_range_data_requests = if self.after_fulu() {
-            let mut data_columns_requests = vec![];
+            // First check for DataColumnsByRange requests (old paradigm)
+            let mut data_columns_range_requests = vec![];
             while let Ok(data_columns_request) = self.pop_received_network_event(|ev| match ev {
                 NetworkMessage::SendRequest {
                     peer_id,
@@ -245,12 +247,34 @@ impl TestRig {
                 } if filter_f(*peer_id, *start_slot) => Some((*id, *peer_id)),
                 _ => None,
             }) {
-                data_columns_requests.push(data_columns_request);
+                data_columns_range_requests.push(data_columns_request);
             }
-            if data_columns_requests.is_empty() {
-                panic!("Found zero DataColumnsByRange requests, filter {request_filter:?}");
+
+            // If we found range requests, use the `ByRangeRequestType::BlocksAndColumns` paradigm
+            if !data_columns_range_requests.is_empty() {
+                ByRangeDataRequestIds::PostPeerDAS(data_columns_range_requests)
+            } else {
+                // Try to find the byroot requests associated with the `ByRangeRequestType::BlocksAndColumnsSeparate`
+                let mut data_columns_root_requests = vec![];
+                while let Ok(data_columns_request) = self.pop_received_network_event(|ev| match ev {
+                    NetworkMessage::SendRequest {
+                        peer_id,
+                        request: RequestType::DataColumnsByRoot(_),
+                        app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRoot(id)),
+                    } => Some((*id, *peer_id)),
+                    _ => None,
+                }) {
+                    data_columns_root_requests.push(data_columns_request);
+                }
+
+                if !data_columns_root_requests.is_empty() {
+                    ByRangeDataRequestIds::PostPeerDASByRoot(data_columns_root_requests)
+                } else {
+                    // No data column requests found - this is expected for the new paradigm
+                    // since DataColumnsByRoot requests are sent after blocks are received
+                    ByRangeDataRequestIds::PostPeerDASByRoot(vec![])
+                }
             }
-            ByRangeDataRequestIds::PostPeerDAS(data_columns_requests)
         } else if self.after_deneb() {
             let (id, peer) = self
                 .pop_received_network_event(|ev| match ev {
@@ -318,11 +342,54 @@ impl TestRig {
                     });
                 }
             }
+            ByRangeDataRequestIds::PostPeerDASByRoot(data_column_req_ids) => {
+                // Complete the DataColumnsByRoot requests with stream termination
+                for (id, peer_id) in data_column_req_ids {
+                    self.log(&format!(
+                        "Completing DataColumnsByRoot request {id:?} with empty stream"
+                    ));
+                    self.send_sync_message(SyncMessage::RpcDataColumn {
+                        sync_request_id: SyncRequestId::DataColumnsByRoot(id),
+                        peer_id,
+                        data_column: None,
+                        seen_timestamp: D,
+                    });
+                }
+            }
         }
 
         blocks_req_id.parent_request_id.requester
     }
 
+    fn find_and_complete_data_columns_by_root_requests(&mut self) {
+        // In the new paradigm, DataColumnsByRoot requests are sent after blocks are received
+        // We need to complete any pending DataColumnsByRoot requests
+        let mut data_columns_root_requests = vec![];
+        while let Ok(data_columns_request) = self.pop_received_network_event(|ev| match ev {
+            NetworkMessage::SendRequest {
+                peer_id,
+                request: RequestType::DataColumnsByRoot(_),
+                app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRoot(id)),
+            } => Some((*id, *peer_id)),
+            _ => None,
+        }) {
+            data_columns_root_requests.push(data_columns_request);
+        }
+
+        // Complete the DataColumnsByRoot requests
+        for (id, peer_id) in data_columns_root_requests {
+            self.log(&format!(
+                "Completing DataColumnsByRoot request {id:?} with empty stream"
+            ));
+            self.send_sync_message(SyncMessage::RpcDataColumn {
+                sync_request_id: SyncRequestId::DataColumnsByRoot(id),
+                peer_id,
+                data_column: None,
+                seen_timestamp: D,
+            });
+        }
+    }
+
     fn find_and_complete_processing_chain_segment(&mut self, id: ChainSegmentProcessId) {
         self.pop_received_processor_event(|ev| {
             (ev.work_type() == WorkType::ChainSegment).then_some(())
@@ -366,6 +433,11 @@ impl TestRig {
             };
 
             self.find_and_complete_processing_chain_segment(id);
+
+            // In the new paradigm, DataColumnsByRoot requests are sent after blocks are processed
+            // We need to complete any pending DataColumnsByRoot requests
+            self.find_and_complete_data_columns_by_root_requests();
+
             if epoch < last_epoch - 1 {
                 self.assert_state(RangeSyncType::Finalized);
             } else {

From 2f35c360b2ccc8adf83f71fd4759e673c38cbf92 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 17 Sep 2025 15:13:40 -0700
Subject: [PATCH 37/49] Add some metrics

---
 beacon_node/network/src/metrics.rs              |  6 ++++++
 beacon_node/network/src/sync/network_context.rs | 13 +++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs
index a2b5af8b086..6878d1f0755 100644
--- a/beacon_node/network/src/metrics.rs
+++ b/beacon_node/network/src/metrics.rs
@@ -484,6 +484,12 @@ pub static SYNC_ACTIVE_NETWORK_REQUESTS: LazyLock<Result<IntGaugeVec>> = LazyLoc
         &["type"],
     )
 });
+pub static SYNC_PENDING_ROOT_RANGE_REQUESTS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
+    try_create_int_gauge(
+        "sync_pending_root_range_requests",
+        "Current count of pending columns by root requests waiting for peers",
+    )
+});
 pub static SYNC_UNKNOWN_NETWORK_REQUESTS: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
     try_create_int_counter_vec(
         "sync_unknwon_network_request",
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 5a70f61a39a..3c8e01b7e77 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -638,6 +638,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             }
         }
 
+        metrics::set_gauge(
+            &metrics::SYNC_PENDING_ROOT_RANGE_REQUESTS,
+            self.pending_column_by_root_range_requests.len() as i64,
+        );
         // Re-insert entries that still need to be retried
         self.pending_column_by_root_range_requests
             .extend(entries_to_keep);
@@ -1801,6 +1805,11 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
 
             self.pending_column_by_root_range_requests
                 .insert(id.parent_request_id, data_columns_by_root_request);
+
+            metrics::set_gauge(
+                &metrics::SYNC_PENDING_ROOT_RANGE_REQUESTS,
+                self.pending_column_by_root_range_requests.len() as i64,
+            );
         }
 
         // Insert the requests into the existing block parent request
@@ -2074,6 +2083,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 "data_columns_by_range",
                 self.data_columns_by_range_requests.len(),
             ),
+            (
+                "data_columns_by_root_range",
+                self.data_columns_by_root_range_requests.len(),
+            ),
             ("custody_by_root", self.custody_by_root_requests.len()),
             (
                 "components_by_range",

From aa6a1bc850884476018ab647dae4d9413b8a307a Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 24 Sep 2025 12:16:05 -0700
Subject: [PATCH 38/49] Create a custom penalize_sync_peer method for clarity

---
 .../beacon_chain/src/block_verification.rs    | 35 ++++++++++++++++++-
 .../gossip_methods.rs                         |  6 ++--
 .../network_beacon_processor/sync_methods.rs  | 20 ++---------
 .../network/src/sync/block_lookups/mod.rs     |  4 ++-
 4 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs
index 1d10fae0a49..97b850e917e 100644
--- a/beacon_node/beacon_chain/src/block_verification.rs
+++ b/beacon_node/beacon_chain/src/block_verification.rs
@@ -418,7 +418,7 @@ pub enum ExecutionPayloadError {
 }
 
 impl ExecutionPayloadError {
-    pub fn penalize_peer(&self) -> bool {
+    pub fn penalize_gossip_peer(&self) -> bool {
         // This match statement should never have a default case so that we are
         // always forced to consider here whether or not to penalize a peer when
         // we add a new error condition.
@@ -447,6 +447,39 @@ impl ExecutionPayloadError {
             ExecutionPayloadError::UnverifiedNonOptimisticCandidate => false,
         }
     }
+
+    pub fn penalize_sync_peer(&self) -> bool {
+        // This match statement should never have a default case so that we are
+        // always forced to consider here whether or not to penalize a peer when
+        // we add a new error condition.
+        match self {
+            // The peer has nothing to do with this error, do not penalize them.
+            ExecutionPayloadError::NoExecutionConnection => false,
+            // The peer has nothing to do with this error, do not penalize them.
+            ExecutionPayloadError::RequestFailed(_) => false,
+            // For the sync case, we do not want a peer to keep sending us blocks that our
+            // execution engine considers invalid.
+            //
+            // Also, we ask peers for blocks over sync/rpc only when they indicate
+            // that they have fully validated a given block (using their status message).
+            //
+            // Hence, we should penalize for this error in the sync case.
+            ExecutionPayloadError::RejectedByExecutionEngine { .. } => true,
+            // There is no reason for an honest peer to propagate a block with an invalid
+            // payload time stamp.
+            ExecutionPayloadError::InvalidPayloadTimestamp { .. } => true,
+            // We do not want to receive these blocks over rpc even though the gossip
+            // case is still allowed.
+            ExecutionPayloadError::InvalidTerminalPoWBlock { .. } => true,
+            // We should penalize RPC blocks, since even an optimistic node shouldn't
+            // verify this block.
+            ExecutionPayloadError::InvalidActivationEpoch { .. } => true,
+            // As per `Self::InvalidActivationEpoch`.
+            ExecutionPayloadError::InvalidTerminalBlockHash { .. } => true,
+            // Do not penalize the peer since it's not their fault that *we're* optimistic.
+            ExecutionPayloadError::UnverifiedNonOptimisticCandidate => false,
+        }
+    }
 }
 
 impl From<execution_layer::Error> for ExecutionPayloadError {
diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs
index 5fc94c29587..20ed7a884a2 100644
--- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs
@@ -1330,7 +1330,9 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
                 return None;
             }
-            Err(ref e @ BlockError::ExecutionPayloadError(ref epe)) if !epe.penalize_peer() => {
+            Err(ref e @ BlockError::ExecutionPayloadError(ref epe))
+                if !epe.penalize_gossip_peer() =>
+            {
                 debug!(error = %e, "Could not verify block for gossip. Ignoring the block");
                 self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
                 return None;
@@ -1562,7 +1564,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                     "Block with unknown parent attempted to be processed"
                 );
             }
-            Err(e @ BlockError::ExecutionPayloadError(epe)) if !epe.penalize_peer() => {
+            Err(e @ BlockError::ExecutionPayloadError(epe)) if !epe.penalize_gossip_peer() => {
                 debug!(
                     error = %e,
                     "Failed to verify execution payload"
diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
index 897220ae8cd..c8bc1b0ef44 100644
--- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs
+++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs
@@ -11,8 +11,7 @@ use beacon_chain::data_availability_checker::AvailabilityCheckError;
 use beacon_chain::data_availability_checker::MaybeAvailableBlock;
 use beacon_chain::{
     AvailabilityProcessingStatus, BeaconChainTypes, BlockError, ChainSegmentResult,
-    ExecutionPayloadError, HistoricalBlockError, NotifyExecutionLayer,
-    validator_monitor::get_slot_delay_ms,
+    HistoricalBlockError, NotifyExecutionLayer, validator_monitor::get_slot_delay_ms,
 };
 use beacon_processor::{
     AsyncFn, BlockingFn, DuplicateCache,
@@ -773,7 +772,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 Err(ChainSegmentFailed {
                     message: format!("Block has an unknown parent: {}", parent_root),
                     // Peers are faulty if they send non-sequential blocks.
-                    peer_action: Some(PeerAction::LowToleranceError), // todo(pawan): revise this
+                    peer_action: Some(PeerAction::LowToleranceError),
                     faulty_component: Some(FaultyComponent::Blocks),
                 })
             }
@@ -852,20 +851,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
                 })
             }
             ref err @ BlockError::ExecutionPayloadError(ref epe) => {
-                if matches!(epe, ExecutionPayloadError::RejectedByExecutionEngine { .. }) {
-                    debug!(
-                        error = ?err,
-                        "Invalid execution payload rejected by EE"
-                    );
-                    Err(ChainSegmentFailed {
-                        message: format!(
-                            "Peer sent a block containing invalid execution payload. Reason: {:?}",
-                            err
-                        ),
-                        peer_action: Some(PeerAction::LowToleranceError),
-                        faulty_component: Some(FaultyComponent::Blocks), // todo(pawan): recheck this
-                    })
-                } else if !epe.penalize_peer() {
+                if !epe.penalize_sync_peer() {
                     // These errors indicate an issue with the EL and not the `ChainSegment`.
                     // Pause the syncing while the EL recovers
                     debug!(
diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs
index f8ffd298caf..dfc106383ed 100644
--- a/beacon_node/network/src/sync/block_lookups/mod.rs
+++ b/beacon_node/network/src/sync/block_lookups/mod.rs
@@ -617,7 +617,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
                         request_state.revert_to_awaiting_processing()?;
                         Action::ParentUnknown { parent_root }
                     }
-                    ref e @ BlockError::ExecutionPayloadError(ref epe) if !epe.penalize_peer() => {
+                    ref e @ BlockError::ExecutionPayloadError(ref epe)
+                        if !epe.penalize_sync_peer() =>
+                    {
                         // These errors indicate that the execution layer is offline
                         // and failed to validate the execution payload. Do not downscore peer.
                         debug!(

From 4b0b6550ae8b1b046d998024f8427f5f00212002 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Wed, 24 Sep 2025 12:31:06 -0700
Subject: [PATCH 39/49] Fix nits

---
 .../network/src/sync/block_sidecar_coupling.rs     | 14 +++++++++-----
 beacon_node/network/src/sync/network_context.rs    |  8 ++++----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 2400e05c800..8f5e6aafeca 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -34,6 +34,9 @@ use types::{
 pub struct RangeBlockComponentsRequest<E: EthSpec> {
     /// Blocks we have received awaiting for their corresponding sidecar.
     blocks_request: ByRangeRequest<BlocksByRangeRequestId, Vec<Arc<SignedBeaconBlock<E>>>>,
+    /// We store the peer that we requested the blocks from for this particular `RangeBlockComponentsRequest`.
+    /// This is to ensure that we penalize the block peer if the blocks turn out to be invalid
+    /// during processing.
     block_peer: PeerId,
     /// Sidecars we have received awaiting for their corresponding block.
     block_data_request: RangeBlockDataRequest<E>,
@@ -49,7 +52,8 @@ enum ByRangeRequest<I: PartialEq + std::fmt::Display, T> {
 enum RangeBlockDataRequest<E: EthSpec> {
     NoData,
     Blobs(ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>),
-    DataColumns {
+    /// These are data columns fetched by a range request.
+    DataColumnsFromRange {
         requests: HashMap<
             DataColumnsByRangeRequestId,
             ByRangeRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<E>>,
@@ -98,13 +102,13 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
     /// * `blocks_req_id` - Request ID for the blocks
     /// * `blobs_req_id` - Optional request ID for blobs (pre-Fulu fork)
     /// * `data_columns` - Optional tuple of (request_id->column_indices pairs, expected_custody_columns) for Fulu fork
-    /// * `request_columns_by_root` - Creates an uninitialized `RangeBlockDataRequest::DataColumnsFromRoot` variant if this is true.
+    /// * `data_columns_by_root` - Creates an uninitialized `RangeBlockDataRequest::DataColumnsFromRoot` variant if this is `Some`.
     ///   Note: this is only relevant is `data_columns == None`.
     #[allow(clippy::type_complexity)]
     pub fn new(
         blocks_req_id: BlocksByRangeRequestId,
         blobs_req_id: Option<BlobsByRangeRequestId>,
-        data_columns: Option<(
+        data_columns_by_range: Option<(
             Vec<(DataColumnsByRangeRequestId, Vec<ColumnIndex>)>,
             Vec<ColumnIndex>,
         )>,
@@ -114,7 +118,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         let block_peer = blocks_req_id.peer_id;
         let block_data_request = if let Some(blobs_req_id) = blobs_req_id {
             RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
-        } else if let Some((requests, expected_custody_columns)) = data_columns {
+        } else if let Some((requests, expected_custody_columns)) = data_columns_by_range {
             let request_to_column_indices: HashMap<_, _> = requests.into_iter().collect();
             RangeBlockDataRequest::DataColumns {
                 requests: request_to_column_indices
@@ -268,7 +272,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         match &mut self.block_data_request {
             RangeBlockDataRequest::NoData => Err("received blobs but expected no data".to_owned()),
             RangeBlockDataRequest::DataColumnsFromRoot { .. } => {
-                Err("received blobs but expected no data columns by root".to_owned())
+                Err("received blobs but expected data columns by root".to_owned())
             }
             RangeBlockDataRequest::Blobs(req) => req.finish(req_id, blobs),
             RangeBlockDataRequest::DataColumns { .. } => {
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 3c8e01b7e77..217208138af 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -514,7 +514,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
 
         // Attempt to find all required custody peers to request the failed columns from
         let columns_by_range_peers_to_request = self
-            .select_columns_by_range_peers_to_request(
+            .select_column_peers_to_request(
                 failed_columns,
                 peers,
                 active_request_count_by_peer,
@@ -586,7 +586,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 .cloned()
                 .collect();
 
-            match self.select_columns_by_range_peers_to_request(
+            match self.select_column_peers_to_request(
                 &custody_indices,
                 &synced_peers,
                 active_requests.clone(),
@@ -699,7 +699,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                     .iter()
                     .cloned()
                     .collect();
-                Some(self.select_columns_by_range_peers_to_request(
+                Some(self.select_column_peers_to_request(
                     &column_indexes,
                     peers,
                     active_request_count_by_peer,
@@ -800,7 +800,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         Ok(id.id)
     }
 
-    fn select_columns_by_range_peers_to_request(
+    fn select_column_peers_to_request(
         &self,
         custody_indexes: &HashSet<ColumnIndex>,
         peers: &HashSet<PeerId>,

From 7650032898178a8e33a0a5c37411a749ec0b671d Mon Sep 17 00:00:00 2001
From: dapplion <35266934+dapplion@users.noreply.github.com>
Date: Thu, 25 Sep 2025 23:52:47 +0200
Subject: [PATCH 40/49] Rename DataColumnsFromRange

---
 .../network/src/sync/block_sidecar_coupling.rs     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 8f5e6aafeca..9caf84be20d 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -120,7 +120,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::Blobs(ByRangeRequest::Active(blobs_req_id))
         } else if let Some((requests, expected_custody_columns)) = data_columns_by_range {
             let request_to_column_indices: HashMap<_, _> = requests.into_iter().collect();
-            RangeBlockDataRequest::DataColumns {
+            RangeBlockDataRequest::DataColumnsFromRange {
                 requests: request_to_column_indices
                     .keys()
                     .map(|id| (*id, ByRangeRequest::Active(*id)))
@@ -155,7 +155,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             block_and_blob: self.block_peer,
             data_columns: match &self.block_data_request {
                 RangeBlockDataRequest::NoData | RangeBlockDataRequest::Blobs(_) => HashMap::new(),
-                RangeBlockDataRequest::DataColumns {
+                RangeBlockDataRequest::DataColumnsFromRange {
                     request_to_column_indices,
                     ..
                 } => request_to_column_indices
@@ -180,7 +180,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
         failed_column_requests: Vec<(DataColumnsByRangeRequestId, Vec<u64>)>,
     ) -> Result<(), String> {
         match &mut self.block_data_request {
-            RangeBlockDataRequest::DataColumns {
+            RangeBlockDataRequest::DataColumnsFromRange {
                 requests,
                 expected_custody_columns: _,
                 request_to_column_indices,
@@ -275,7 +275,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 Err("received blobs but expected data columns by root".to_owned())
             }
             RangeBlockDataRequest::Blobs(req) => req.finish(req_id, blobs),
-            RangeBlockDataRequest::DataColumns { .. } => {
+            RangeBlockDataRequest::DataColumnsFromRange { .. } => {
                 Err("received blobs but expected data columns".to_owned())
             }
         }
@@ -300,7 +300,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::DataColumnsFromRoot { .. } => {
                 Err("received data columns by root but expected range".to_owned())
             }
-            RangeBlockDataRequest::DataColumns { requests, .. } => {
+            RangeBlockDataRequest::DataColumnsFromRange { requests, .. } => {
                 let req = requests
                     .get_mut(&req_id)
                     .ok_or(format!("unknown data columns by range req_id {req_id}"))?;
@@ -325,7 +325,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
             RangeBlockDataRequest::Blobs(_) => {
                 Err("received data columns but expected blobs".to_owned())
             }
-            RangeBlockDataRequest::DataColumns { .. } => {
+            RangeBlockDataRequest::DataColumnsFromRange { .. } => {
                 Err("received data columns by range but expected root".to_owned())
             }
             RangeBlockDataRequest::DataColumnsFromRoot { requests, .. } => {
@@ -366,7 +366,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                 ))
             }
 
-            RangeBlockDataRequest::DataColumns {
+            RangeBlockDataRequest::DataColumnsFromRange {
                 requests,
                 expected_custody_columns,
                 request_to_column_indices,

From 7488755e833d725de4de6e450cbb84aa0a3968b8 Mon Sep 17 00:00:00 2001
From: dapplion <35266934+dapplion@users.noreply.github.com>
Date: Thu, 25 Sep 2025 22:09:06 +0200
Subject: [PATCH 41/49] De-duplicate data columns by root request type

---
 .../lighthouse_network/src/rpc/methods.rs     |  12 ++
 .../src/sync/block_sidecar_coupling.rs        |   2 +-
 beacon_node/network/src/sync/manager.rs       |  26 ++--
 .../network/src/sync/network_context.rs       | 115 +++---------------
 .../src/sync/network_context/custody.rs       |  66 +++++-----
 .../src/sync/network_context/requests.rs      |   5 +-
 .../requests/data_columns_by_root.rs          |  76 +-----------
 7 files changed, 76 insertions(+), 226 deletions(-)

diff --git a/beacon_node/lighthouse_network/src/rpc/methods.rs b/beacon_node/lighthouse_network/src/rpc/methods.rs
index 9319973e597..4b930a091f4 100644
--- a/beacon_node/lighthouse_network/src/rpc/methods.rs
+++ b/beacon_node/lighthouse_network/src/rpc/methods.rs
@@ -535,6 +535,18 @@ impl<E: EthSpec> DataColumnsByRootRequest<E> {
         Ok(Self { data_column_ids })
     }
 
+    pub fn from_single_block(block_root: Hash256, indices: Vec<u64>) -> Result<Self, &'static str> {
+        let columns = VariableList::new(indices)
+            .map_err(|_| "Number of indices exceeds total number of columns")?;
+        DataColumnsByRootRequest::new(
+            vec![DataColumnsByRootIdentifier {
+                block_root,
+                columns,
+            }],
+            1,
+        )
+    }
+
     pub fn max_requested(&self) -> usize {
         self.data_column_ids.iter().map(|id| id.columns.len()).sum()
     }
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 9caf84be20d..fd221efc99d 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -53,7 +53,7 @@ enum RangeBlockDataRequest<E: EthSpec> {
     NoData,
     Blobs(ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>),
     /// These are data columns fetched by a range request.
-    DataColumnsFromRange {
+    DataColumns {
         requests: HashMap<
             DataColumnsByRangeRequestId,
             ByRangeRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<E>>,
diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs
index 34de8aa45bc..2bf4f831e82 100644
--- a/beacon_node/network/src/sync/manager.rs
+++ b/beacon_node/network/src/sync/manager.rs
@@ -1129,24 +1129,20 @@ impl<T: BeaconChainTypes> SyncManager<T> {
         peer_id: PeerId,
         data_column: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
     ) {
-        match req_id.requester {
-            DataColumnsByRootRequester::Custody(custody_id) => {
-                if let Some(resp) =
-                    self.network
-                        .on_data_columns_by_root_response(req_id, peer_id, data_column)
-                    && let Some(result) = self
+        if let Some(resp) =
+            self.network
+                .on_data_columns_by_root_response(req_id, peer_id, data_column)
+        {
+            match req_id.requester {
+                DataColumnsByRootRequester::Custody(custody_id) => {
+                    if let Some(result) = self
                         .network
                         .on_custody_by_root_response(custody_id, req_id, peer_id, resp)
-                {
-                    self.on_custody_by_root_result(custody_id.requester, result);
+                    {
+                        self.on_custody_by_root_result(custody_id.requester, result);
+                    }
                 }
-            }
-            DataColumnsByRootRequester::RangeSync { parent } => {
-                if let Some(resp) = self.network.on_data_columns_by_root_range_response(
-                    req_id,
-                    peer_id,
-                    data_column,
-                ) {
+                DataColumnsByRootRequester::RangeSync { parent } => {
                     self.on_range_components_response(
                         parent,
                         peer_id,
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 217208138af..1aa3813284b 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -2,7 +2,7 @@
 //! channel and stores a global RPC ID to perform requests.
 
 use self::custody::{ActiveCustodyRequest, Error as CustodyRequestError};
-pub use self::requests::{BlocksByRootSingleRequest, DataColumnsByRootSingleBlockRequest};
+pub use self::requests::BlocksByRootSingleRequest;
 use super::SyncMessage;
 use super::block_sidecar_coupling::RangeBlockComponentsRequest;
 use super::manager::BlockProcessType;
@@ -15,9 +15,7 @@ use crate::service::NetworkMessage;
 use crate::status::ToStatusMessage;
 use crate::sync::block_lookups::SingleLookupId;
 use crate::sync::block_sidecar_coupling::CouplingError;
-use crate::sync::network_context::requests::{
-    BlobsByRootSingleBlockRequest, DataColumnsByRootRangeRequestItems,
-};
+use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest;
 use crate::sync::range_sync::BatchPeers;
 use beacon_chain::block_verification_types::RpcBlock;
 use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState};
@@ -217,8 +215,6 @@ pub struct SyncNetworkContext<T: BeaconChainTypes> {
     /// A mapping of active DataColumnsByRange requests
     data_columns_by_range_requests:
         ActiveRequests<DataColumnsByRangeRequestId, DataColumnsByRangeRequestItems<T::EthSpec>>,
-    data_columns_by_root_range_requests:
-        ActiveRequests<DataColumnsByRootRequestId, DataColumnsByRootRangeRequestItems<T::EthSpec>>,
     /// Mapping of active custody column requests for a block root
     custody_by_root_requests: FnvHashMap<CustodyRequester, ActiveCustodyRequest<T>>,
 
@@ -309,7 +305,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             blocks_by_root_requests: ActiveRequests::new("blocks_by_root"),
             blobs_by_root_requests: ActiveRequests::new("blobs_by_root"),
             data_columns_by_root_requests: ActiveRequests::new("data_columns_by_root"),
-            data_columns_by_root_range_requests: ActiveRequests::new("data_columns_by_root_range"),
             blocks_by_range_requests: ActiveRequests::new("blocks_by_range"),
             blobs_by_range_requests: ActiveRequests::new("blobs_by_range"),
             data_columns_by_range_requests: ActiveRequests::new("data_columns_by_range"),
@@ -341,7 +336,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             blocks_by_range_requests,
             blobs_by_range_requests,
             data_columns_by_range_requests,
-            data_columns_by_root_range_requests,
             // custody_by_root_requests is a meta request of data_columns_by_root_requests
             custody_by_root_requests: _,
             // components_by_range_requests is a meta request of various _by_range requests
@@ -378,18 +372,12 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .into_iter()
             .map(|req_id| SyncRequestId::DataColumnsByRange(*req_id));
 
-        let data_column_by_root_range_ids = data_columns_by_root_range_requests
-            .active_requests_of_peer(peer_id)
-            .into_iter()
-            .map(|req_id| SyncRequestId::DataColumnsByRoot(*req_id));
-
         blocks_by_root_ids
             .chain(blobs_by_root_ids)
             .chain(data_column_by_root_ids)
             .chain(blocks_by_range_ids)
             .chain(blobs_by_range_ids)
             .chain(data_column_by_range_ids)
-            .chain(data_column_by_root_range_ids)
             .collect()
     }
 
@@ -446,7 +434,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             blocks_by_range_requests,
             blobs_by_range_requests,
             data_columns_by_range_requests,
-            data_columns_by_root_range_requests,
             // custody_by_root_requests is a meta request of data_columns_by_root_requests
             custody_by_root_requests: _,
             // components_by_range_requests is a meta request of various _by_range requests
@@ -469,7 +456,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             .chain(blocks_by_range_requests.iter_request_peers())
             .chain(blobs_by_range_requests.iter_request_peers())
             .chain(data_columns_by_range_requests.iter_request_peers())
-            .chain(data_columns_by_root_range_requests.iter_request_peers())
         {
             *active_request_count_by_peer.entry(peer_id).or_default() += 1;
         }
@@ -600,11 +586,11 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                         };
 
                         data_column_requests.push((
-                            self.send_data_columns_by_root_range_requests(
+                            self.send_data_columns_by_root_request(
+                                requester,
                                 peer,
                                 data_columns_by_root_request,
-                                requester,
-                                Span::none(),
+                                true,
                             )
                             .map_err(|e| {
                                 format!("Failed to send data columns by root request {:?}", e)
@@ -1161,13 +1147,13 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     }
 
     /// Request to send a single `data_columns_by_root` request to the network.
-    pub fn data_column_lookup_request(
+    pub fn send_data_columns_by_root_request(
         &mut self,
         requester: DataColumnsByRootRequester,
         peer_id: PeerId,
-        request: DataColumnsByRootSingleBlockRequest,
+        request: DataColumnsByRootBatchBlockRequest,
         expect_max_responses: bool,
-    ) -> Result<LookupRequestResult<DataColumnsByRootRequestId>, &'static str> {
+    ) -> Result<DataColumnsByRootRequestId, &'static str> {
         let id = DataColumnsByRootRequestId {
             id: self.next_id(),
             requester,
@@ -1177,17 +1163,18 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         self.send_network_msg(NetworkMessage::SendRequest {
             peer_id,
             request: RequestType::DataColumnsByRoot(
-                request.clone().try_into_request::<T::EthSpec>(
-                    self.fork_context.current_fork_name(),
-                    &self.chain.spec,
-                )?,
+                request
+                    .clone()
+                    .try_into_request(self.fork_context.current_fork_name(), &self.chain.spec)
+                    .map_err(|_| "invalid count of data column indices")?,
             ),
             app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRoot(id)),
-        })?;
+        })
+        .map_err(|_| "network send error")?;
 
         debug!(
             method = "DataColumnsByRoot",
-            block_root = ?request.block_root,
+            block_roots = ?request.block_roots,
             indices = ?request.indices,
             peer = %peer_id,
             %id,
@@ -1199,12 +1186,10 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             peer_id,
             expect_max_responses,
             DataColumnsByRootRequestItems::new(request),
-            // Span is tracked in `self.custody_columns_by_root_requests` in the
-            // `ActiveCustodyRequest` struct.
             Span::none(),
         );
 
-        Ok(LookupRequestResult::RequestSent(id))
+        Ok(id)
     }
 
     /// Request to fetch all needed custody columns of a specific block. This function may not send
@@ -1420,51 +1405,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         Ok((id, requested_columns))
     }
 
-    /// Send `DataColumnsByRoot` requests for progressing range sync.
-    fn send_data_columns_by_root_range_requests(
-        &mut self,
-        peer_id: PeerId,
-        request: DataColumnsByRootBatchBlockRequest,
-        requester: DataColumnsByRootRequester,
-        request_span: Span,
-    ) -> Result<DataColumnsByRootRequestId, RpcRequestSendError> {
-        let id = DataColumnsByRootRequestId {
-            id: self.next_id(),
-            requester,
-            peer: peer_id,
-        };
-
-        self.send_network_msg(NetworkMessage::SendRequest {
-            peer_id,
-            request: RequestType::DataColumnsByRoot(
-                request
-                    .clone()
-                    .try_into_request(self.fork_context.current_fork_name(), &self.chain.spec)
-                    .map_err(|e| RpcRequestSendError::InternalError(e.to_string()))?,
-            ),
-            app_request_id: AppRequestId::Sync(SyncRequestId::DataColumnsByRoot(id)),
-        })
-        .map_err(|_| RpcRequestSendError::InternalError("network send error".to_owned()))?;
-
-        debug!(
-            method = "DataColumnsByRoot",
-            ?request,
-            peer = %peer_id,
-            %id,
-            "Sync RPC request sent"
-        );
-
-        self.data_columns_by_root_range_requests.insert(
-            id,
-            peer_id,
-            // true = we are only requesting if we know there are blobs.
-            true,
-            DataColumnsByRootRangeRequestItems::new(request),
-            request_span,
-        );
-        Ok(id)
-    }
-
     pub fn is_execution_engine_online(&self) -> bool {
         self.execution_engine_state == EngineState::Online
     }
@@ -1665,19 +1605,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         self.on_rpc_response_result(id, "DataColumnsByRoot", resp, peer_id, |_| 1)
     }
 
-    #[allow(clippy::type_complexity)]
-    pub(crate) fn on_data_columns_by_root_range_response(
-        &mut self,
-        id: DataColumnsByRootRequestId,
-        peer_id: PeerId,
-        rpc_event: RpcEvent<Arc<DataColumnSidecar<T::EthSpec>>>,
-    ) -> Option<RpcResponseResult<Vec<Arc<DataColumnSidecar<T::EthSpec>>>>> {
-        let resp = self
-            .data_columns_by_root_range_requests
-            .on_response(id, rpc_event);
-        self.on_rpc_response_result(id, "DataColumnsByRootRange", resp, peer_id, |b| b.len())
-    }
-
     /// Requests data columns for the given blocks by root.
     ///
     /// We request by root because it is much easier to reason about
@@ -1775,11 +1702,11 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
             };
 
             data_column_requests.push((
-                self.send_data_columns_by_root_range_requests(
+                self.send_data_columns_by_root_request(
+                    requester,
                     peer,
                     data_columns_by_root_request,
-                    requester,
-                    Span::none(),
+                    true,
                 )
                 .map_err(|e| {
                     RpcResponseError::InternalError(format!(
@@ -2083,10 +2010,6 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
                 "data_columns_by_range",
                 self.data_columns_by_range_requests.len(),
             ),
-            (
-                "data_columns_by_root_range",
-                self.data_columns_by_root_range_requests.len(),
-            ),
             ("custody_by_root", self.custody_by_root_requests.len()),
             (
                 "components_by_range",
diff --git a/beacon_node/network/src/sync/network_context/custody.rs b/beacon_node/network/src/sync/network_context/custody.rs
index d8d30fd1941..147948a20ee 100644
--- a/beacon_node/network/src/sync/network_context/custody.rs
+++ b/beacon_node/network/src/sync/network_context/custody.rs
@@ -1,5 +1,5 @@
 use crate::sync::network_context::{
-    DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest,
+    DataColumnsByRootBatchBlockRequest, DataColumnsByRootRequestId,
 };
 use beacon_chain::BeaconChainTypes;
 use beacon_chain::validator_monitor::timestamp_now;
@@ -16,7 +16,7 @@ use tracing::{Span, debug, debug_span, warn};
 use types::{DataColumnSidecar, Hash256, data_column_sidecar::ColumnIndex};
 use types::{DataColumnSidecarList, EthSpec};
 
-use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext};
+use super::{PeerGroup, RpcResponseResult, SyncNetworkContext};
 
 const MAX_STALE_NO_PEERS_DURATION: Duration = Duration::from_secs(30);
 
@@ -279,12 +279,12 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
         }
 
         for (peer_id, indices) in columns_to_request_by_peer.into_iter() {
-            let request_result = cx
-                .data_column_lookup_request(
+            let req_id = cx
+                .send_data_columns_by_root_request(
                     DataColumnsByRootRequester::Custody(self.custody_id),
                     peer_id,
-                    DataColumnsByRootSingleBlockRequest {
-                        block_root: self.block_root,
+                    DataColumnsByRootBatchBlockRequest {
+                        block_roots: vec![self.block_root],
                         indices: indices.clone(),
                     },
                     // If peer is in the lookup peer set, it claims to have imported the block and
@@ -295,38 +295,32 @@ impl<T: BeaconChainTypes> ActiveCustodyRequest<T> {
                 )
                 .map_err(Error::SendFailed)?;
 
-            match request_result {
-                LookupRequestResult::RequestSent(req_id) => {
-                    *self.peer_attempts.entry(peer_id).or_insert(0) += 1;
+            *self.peer_attempts.entry(peer_id).or_insert(0) += 1;
 
-                    let client = cx.network_globals().client(&peer_id).kind;
-                    let batch_columns_req_span = debug_span!(
-                        "batch_columns_req",
-                        %peer_id,
-                        %client,
-                    );
-                    let _guard = batch_columns_req_span.clone().entered();
-                    for column_index in &indices {
-                        let column_request = self
-                            .column_requests
-                            .get_mut(column_index)
-                            // Should never happen: column_index is iterated from column_requests
-                            .ok_or(Error::BadState("unknown column_index".to_owned()))?;
-
-                        column_request.on_download_start(req_id)?;
-                    }
-
-                    self.active_batch_columns_requests.insert(
-                        req_id,
-                        ActiveBatchColumnsRequest {
-                            indices,
-                            span: batch_columns_req_span,
-                        },
-                    );
-                }
-                LookupRequestResult::NoRequestNeeded(_) => unreachable!(),
-                LookupRequestResult::Pending(_) => unreachable!(),
+            let client = cx.network_globals().client(&peer_id).kind;
+            let batch_columns_req_span = debug_span!(
+                "batch_columns_req",
+                %peer_id,
+                %client,
+            );
+            let _guard = batch_columns_req_span.clone().entered();
+            for column_index in &indices {
+                let column_request = self
+                    .column_requests
+                    .get_mut(column_index)
+                    // Should never happen: column_index is iterated from column_requests
+                    .ok_or(Error::BadState("unknown column_index".to_owned()))?;
+
+                column_request.on_download_start(req_id)?;
             }
+
+            self.active_batch_columns_requests.insert(
+                req_id,
+                ActiveBatchColumnsRequest {
+                    indices,
+                    span: batch_columns_req_span,
+                },
+            );
         }
 
         Ok(None)
diff --git a/beacon_node/network/src/sync/network_context/requests.rs b/beacon_node/network/src/sync/network_context/requests.rs
index 950fc3db312..2134860ef44 100644
--- a/beacon_node/network/src/sync/network_context/requests.rs
+++ b/beacon_node/network/src/sync/network_context/requests.rs
@@ -12,10 +12,7 @@ pub use blobs_by_root::{BlobsByRootRequestItems, BlobsByRootSingleBlockRequest};
 pub use blocks_by_range::BlocksByRangeRequestItems;
 pub use blocks_by_root::{BlocksByRootRequestItems, BlocksByRootSingleRequest};
 pub use data_columns_by_range::DataColumnsByRangeRequestItems;
-pub use data_columns_by_root::{
-    DataColumnsByRootBatchBlockRequest, DataColumnsByRootRangeRequestItems,
-    DataColumnsByRootRequestItems, DataColumnsByRootSingleBlockRequest,
-};
+pub use data_columns_by_root::{DataColumnsByRootBatchBlockRequest, DataColumnsByRootRequestItems};
 
 use crate::metrics;
 
diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
index 22a91e23792..c8bea7cc186 100644
--- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
+++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs
@@ -34,84 +34,12 @@ impl DataColumnsByRootBatchBlockRequest {
     }
 }
 
-#[derive(Debug, Clone)]
-pub struct DataColumnsByRootSingleBlockRequest {
-    pub block_root: Hash256,
-    pub indices: Vec<u64>,
-}
-
-impl DataColumnsByRootSingleBlockRequest {
-    pub fn try_into_request<E: EthSpec>(
-        self,
-        fork_name: ForkName,
-        spec: &ChainSpec,
-    ) -> Result<DataColumnsByRootRequest<E>, &'static str> {
-        let columns = VariableList::new(self.indices)
-            .map_err(|_| "Number of indices exceeds total number of columns")?;
-        DataColumnsByRootRequest::new(
-            vec![DataColumnsByRootIdentifier {
-                block_root: self.block_root,
-                columns,
-            }],
-            spec.max_request_blocks(fork_name),
-        )
-    }
-}
-
 pub struct DataColumnsByRootRequestItems<E: EthSpec> {
-    request: DataColumnsByRootSingleBlockRequest,
-    items: Vec<Arc<DataColumnSidecar<E>>>,
-}
-
-impl<E: EthSpec> DataColumnsByRootRequestItems<E> {
-    pub fn new(request: DataColumnsByRootSingleBlockRequest) -> Self {
-        Self {
-            request,
-            items: vec![],
-        }
-    }
-}
-
-impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRequestItems<E> {
-    type Item = Arc<DataColumnSidecar<E>>;
-
-    /// Appends a chunk to this multi-item request. If all expected chunks are received, this
-    /// method returns `Some`, resolving the request before the stream terminator.
-    /// The active request SHOULD be dropped after `add_response` returns an error
-    fn add(&mut self, data_column: Self::Item) -> Result<bool, LookupVerifyError> {
-        let block_root = data_column.block_root();
-        if self.request.block_root != block_root {
-            return Err(LookupVerifyError::UnrequestedBlockRoot(block_root));
-        }
-        if !data_column.verify_inclusion_proof() {
-            return Err(LookupVerifyError::InvalidInclusionProof);
-        }
-        if !self.request.indices.contains(&data_column.index) {
-            return Err(LookupVerifyError::UnrequestedIndex(data_column.index));
-        }
-        if self.items.iter().any(|d| d.index == data_column.index) {
-            return Err(LookupVerifyError::DuplicatedData(
-                data_column.slot(),
-                data_column.index,
-            ));
-        }
-
-        self.items.push(data_column);
-
-        Ok(self.items.len() >= self.request.indices.len())
-    }
-
-    fn consume(&mut self) -> Vec<Self::Item> {
-        std::mem::take(&mut self.items)
-    }
-}
-
-pub struct DataColumnsByRootRangeRequestItems<E: EthSpec> {
     request: DataColumnsByRootBatchBlockRequest,
     items: HashMap<Hash256, Vec<Arc<DataColumnSidecar<E>>>>,
 }
 
-impl<E: EthSpec> DataColumnsByRootRangeRequestItems<E> {
+impl<E: EthSpec> DataColumnsByRootRequestItems<E> {
     pub fn new(request: DataColumnsByRootBatchBlockRequest) -> Self {
         Self {
             request,
@@ -120,7 +48,7 @@ impl<E: EthSpec> DataColumnsByRootRangeRequestItems<E> {
     }
 }
 
-impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRangeRequestItems<E> {
+impl<E: EthSpec> ActiveRequestItems for DataColumnsByRootRequestItems<E> {
     type Item = Arc<DataColumnSidecar<E>>;
 
     /// Appends a chunk to this multi-item request. If all expected chunks are received, this

From c2aa4ae8be614d183edfb2c3848bed58d36749ca Mon Sep 17 00:00:00 2001
From: dapplion <35266934+dapplion@users.noreply.github.com>
Date: Thu, 25 Sep 2025 23:58:31 +0200
Subject: [PATCH 42/49] Revert type change in UnexpectedRequestId

---
 .../network/src/sync/network_context/custody.rs    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/beacon_node/network/src/sync/network_context/custody.rs b/beacon_node/network/src/sync/network_context/custody.rs
index 147948a20ee..eb34aae56c9 100644
--- a/beacon_node/network/src/sync/network_context/custody.rs
+++ b/beacon_node/network/src/sync/network_context/custody.rs
@@ -5,7 +5,7 @@ use beacon_chain::BeaconChainTypes;
 use beacon_chain::validator_monitor::timestamp_now;
 use fnv::FnvHashMap;
 use lighthouse_network::PeerId;
-use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester, Id};
+use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester};
 use lighthouse_tracing::SPAN_OUTGOING_CUSTODY_REQUEST;
 use parking_lot::RwLock;
 use std::collections::HashSet;
@@ -46,8 +46,8 @@ pub enum Error {
     /// There should only exist a single request at a time. Having multiple requests is a bug and
     /// can result in undefined state, so it's treated as a hard error and the lookup is dropped.
     UnexpectedRequestId {
-        expected_req_id: Id,
-        req_id: Id,
+        expected_req_id: DataColumnsByRootRequestId,
+        req_id: DataColumnsByRootRequestId,
     },
 }
 
@@ -424,8 +424,8 @@ impl<E: EthSpec> ColumnRequest<E> {
             Status::Downloading(expected_req_id) => {
                 if req_id != *expected_req_id {
                     return Err(Error::UnexpectedRequestId {
-                        expected_req_id: expected_req_id.id,
-                        req_id: req_id.id,
+                        expected_req_id: *expected_req_id,
+                        req_id,
                     });
                 }
                 self.status = Status::NotStarted(Instant::now());
@@ -457,8 +457,8 @@ impl<E: EthSpec> ColumnRequest<E> {
             Status::Downloading(expected_req_id) => {
                 if req_id != *expected_req_id {
                     return Err(Error::UnexpectedRequestId {
-                        expected_req_id: expected_req_id.id,
-                        req_id: req_id.id,
+                        expected_req_id: *expected_req_id,
+                        req_id,
                     });
                 }
                 self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);

From cf46d103cddbeee37bcf33952726a3f50865b52a Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 25 Sep 2025 15:54:32 -0700
Subject: [PATCH 43/49] Fix issues from review

---
 .../network/src/sync/backfill_sync/mod.rs     | 37 ++++++++++---------
 .../network/src/sync/range_sync/chain.rs      | 10 +----
 2 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index 6c94c1821a7..c4bd55ff8e1 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -616,28 +616,29 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
                 penalty,
                 faulty_component,
             } => {
-                let Some(batch_peers) = batch.processing_peers() else {
-                    error!(?batch_id, "Responsible peers not found for a failed batch");
-                    return self
-                        .fail_sync(BackFillError::BatchProcessingFailed(batch_id))
-                        .map(|_| ProcessResult::Successful);
-                };
-                // Penalize the peer appropriately.
-                match faulty_component {
-                    Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
-                        network.report_peer(batch_peers.block_and_blob, *penalty, "faulty_batch");
-                    }
-                    // todo(pawan): clean this up
-                    Some(FaultyComponent::Columns(faulty_columns)) => {
-                        for (peer, columns) in batch_peers.data_columns.iter() {
-                            for faulty_column in faulty_columns {
-                                if columns.contains(faulty_column) {
-                                    network.report_peer(*peer, *penalty, "faulty_batch");
+                if let Some(batch_peers) = batch.processing_peers() {
+                    // Penalize the peer appropriately.
+                    match faulty_component {
+                        Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
+                            network.report_peer(
+                                batch_peers.block_and_blob,
+                                *penalty,
+                                "faulty_batch",
+                            );
+                        }
+                        Some(FaultyComponent::Columns(faulty_columns)) => {
+                            for (peer, columns) in batch_peers.data_columns.iter() {
+                                for faulty_column in faulty_columns {
+                                    if columns.contains(faulty_column) {
+                                        network.report_peer(*peer, *penalty, "faulty_batch");
+                                    }
                                 }
                             }
                         }
+                        None => {}
                     }
-                    None => {}
+                } else {
+                    warn!(?batch_id, "Responsible peers not found for a failed batch");
                 }
                 match batch.processing_completed(BatchProcessingResult::FaultyFailure) {
                     Err(e) => {
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index b2eb1ef1b9c..8013a38ff62 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -476,18 +476,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
             }
         };
 
-        let peers = batch.processing_peers().cloned().ok_or_else(|| {
-            RemoveChain::WrongBatchState(format!(
-                "Processing target is in wrong state: {:?}",
-                batch.state(),
-            ))
-        })?;
-
         // Log the process result and the batch for debugging purposes.
         debug!(
             result = ?result,
             batch_epoch = %batch_id,
-            ?peers,
             batch_state = ?batch_state,
             ?batch,
             "Batch processing result"
@@ -554,7 +546,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 faulty_component,
             } => {
                 let Some(batch_peers) = batch.processing_peers() else {
-                    crit!(
+                    warn!(
                         current_state = ?batch.state(),
                         "Inconsistent state, batch must have been in processing state"
                     );

From d99df0af4f59c0c9a11b32cb26cb95fa8b8944c4 Mon Sep 17 00:00:00 2001
From: Jimmy Chen <jimmy@sigmaprime.io>
Date: Thu, 25 Sep 2025 12:52:07 +1000
Subject: [PATCH 44/49] Only send data coumn subnet discovery requests after
 peerdas is scheduled (#8109)

#8105 (to be confirmed)

I noticed a large number of failed discovery requests after deploying latest `unstable` to some of our testnet and mainnet nodes. This is because of a recent PeerDAS change to attempt to maintain sufficient peers across data column subnets - this shouldn't be enabled on network without peerdas scheduled, otherwise it will keep retrying discovery on these subnets and never succeed.

Also removed some unused files.





Co-Authored-By: Jimmy Chen <jchen.tc@gmail.com>

Co-Authored-By: Jimmy Chen <jimmy@sigmaprime.io>
---
 .../src/peer_manager/mod.rs                   |  69 +-
 .../src/subnet_service/attestation_subnets.rs | 681 ------------------
 .../src/subnet_service/sync_subnets.rs        | 345 ---------
 3 files changed, 67 insertions(+), 1028 deletions(-)
 delete mode 100644 beacon_node/network/src/subnet_service/attestation_subnets.rs
 delete mode 100644 beacon_node/network/src/subnet_service/sync_subnets.rs

diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs
index 592fccdc741..ad16bb0421c 100644
--- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs
+++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs
@@ -23,6 +23,7 @@ pub use libp2p::identity::Keypair;
 pub mod peerdb;
 
 use crate::peer_manager::peerdb::client::ClientKind;
+use crate::types::GossipKind;
 use libp2p::multiaddr;
 use network_utils::discovery_metrics;
 use network_utils::enr_ext::{EnrExt, peer_id_to_node_id};
@@ -1434,8 +1435,16 @@ impl<E: EthSpec> PeerManager<E> {
         // Update peer score metrics;
         self.update_peer_score_metrics();
 
-        // Maintain minimum count for custody peers.
-        self.maintain_custody_peers();
+        // Maintain minimum count for custody peers if we are subscribed to any data column topics (i.e. PeerDAS activated)
+        let peerdas_enabled = self
+            .network_globals
+            .gossipsub_subscriptions
+            .read()
+            .iter()
+            .any(|topic| matches!(topic.kind(), &GossipKind::DataColumnSidecar(_)));
+        if peerdas_enabled {
+            self.maintain_custody_peers();
+        }
 
         // Maintain minimum count for sync committee peers.
         self.maintain_sync_committee_peers();
@@ -3140,4 +3149,60 @@ mod tests {
             })
         }
     }
+
+    #[tokio::test]
+    async fn test_custody_peer_logic_only_runs_when_peerdas_enabled() {
+        use crate::types::{GossipEncoding, GossipTopic};
+
+        let mut peer_manager = build_peer_manager(5).await;
+
+        // Set up sampling subnets so maintain_custody_peers would have work to do
+        *peer_manager.network_globals.sampling_subnets.write() = std::collections::HashSet::from([
+            DataColumnSubnetId::new(0),
+            DataColumnSubnetId::new(1),
+        ]);
+
+        // Test 1: No data column subscriptions - custody peer logic should NOT run
+        peer_manager.heartbeat();
+
+        // Should be no new DiscoverSubnetPeers events since PeerDAS is not enabled
+        let discovery_events: Vec<_> = peer_manager
+            .events
+            .iter()
+            .filter(|event| matches!(event, PeerManagerEvent::DiscoverSubnetPeers(_)))
+            .collect();
+        assert!(
+            discovery_events.is_empty(),
+            "Should not generate discovery events when PeerDAS is disabled, but found: {:?}",
+            discovery_events
+        );
+
+        // Test 2: Add data column subscription - custody peer logic should run
+        let data_column_topic = GossipTopic::new(
+            GossipKind::DataColumnSidecar(DataColumnSubnetId::new(0)),
+            GossipEncoding::SSZSnappy,
+            [0, 0, 0, 0], // fork_digest
+        );
+        peer_manager
+            .network_globals
+            .gossipsub_subscriptions
+            .write()
+            .insert(data_column_topic);
+
+        // Clear any existing events to isolate the test
+        peer_manager.events.clear();
+
+        peer_manager.heartbeat();
+
+        // Should now have DiscoverSubnetPeers events since PeerDAS is enabled
+        let discovery_events: Vec<_> = peer_manager
+            .events
+            .iter()
+            .filter(|event| matches!(event, PeerManagerEvent::DiscoverSubnetPeers(_)))
+            .collect();
+        assert!(
+            !discovery_events.is_empty(),
+            "Should generate discovery events when PeerDAS is enabled, but found no discovery events"
+        );
+    }
 }
diff --git a/beacon_node/network/src/subnet_service/attestation_subnets.rs b/beacon_node/network/src/subnet_service/attestation_subnets.rs
deleted file mode 100644
index 0da27c6a21f..00000000000
--- a/beacon_node/network/src/subnet_service/attestation_subnets.rs
+++ /dev/null
@@ -1,681 +0,0 @@
-//! This service keeps track of which shard subnet the beacon node should be subscribed to at any
-//! given time. It schedules subscriptions to shard subnets, requests peer discoveries and
-//! determines whether attestations should be aggregated and/or passed to the beacon node.
-
-use super::SubnetServiceMessage;
-use std::collections::HashSet;
-use std::collections::{HashMap, VecDeque};
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::time::Duration;
-
-use beacon_chain::{BeaconChain, BeaconChainTypes};
-use delay_map::{HashMapDelay, HashSetDelay};
-use futures::prelude::*;
-use lighthouse_network::{discv5::enr::NodeId, NetworkConfig, Subnet, SubnetDiscovery};
-use slot_clock::SlotClock;
-use tracing::{debug, error, info, trace, warn};
-use types::{Attestation, EthSpec, Slot, SubnetId, ValidatorSubscription};
-
-use crate::metrics;
-
-/// The minimum number of slots ahead that we attempt to discover peers for a subscription. If the
-/// slot is less than this number, skip the peer discovery process.
-/// Subnet discovery query takes at most 30 secs, 2 slots take 24s.
-pub(crate) const MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD: u64 = 2;
-/// The fraction of a slot that we subscribe to a subnet before the required slot.
-///
-/// Currently a whole slot ahead.
-const ADVANCE_SUBSCRIBE_SLOT_FRACTION: u32 = 1;
-
-/// The number of slots after an aggregator duty where we remove the entry from
-/// `aggregate_validators_on_subnet` delay map.
-const UNSUBSCRIBE_AFTER_AGGREGATOR_DUTY: u32 = 2;
-
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
-pub(crate) enum SubscriptionKind {
-    /// Long lived subscriptions.
-    ///
-    /// These have a longer duration and are advertised in our ENR.
-    LongLived,
-    /// Short lived subscriptions.
-    ///
-    /// Subscribing to these subnets has a short duration and we don't advertise it in our ENR.
-    ShortLived,
-}
-
-/// A particular subnet at a given slot.
-#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy)]
-pub struct ExactSubnet {
-    /// The `SubnetId` associated with this subnet.
-    pub subnet_id: SubnetId,
-    /// The `Slot` associated with this subnet.
-    pub slot: Slot,
-}
-
-pub struct AttestationService<T: BeaconChainTypes> {
-    /// Queued events to return to the driving service.
-    events: VecDeque<SubnetServiceMessage>,
-
-    /// A reference to the beacon chain to process received attestations.
-    pub(crate) beacon_chain: Arc<BeaconChain<T>>,
-
-    /// Subnets we are currently subscribed to as short lived subscriptions.
-    ///
-    /// Once they expire, we unsubscribe from these.
-    /// We subscribe to subnets when we are an aggregator for an exact subnet.
-    short_lived_subscriptions: HashMapDelay<SubnetId, Slot>,
-
-    /// Subnets we are currently subscribed to as long lived subscriptions.
-    ///
-    /// We advertise these in our ENR. When these expire, the subnet is removed from our ENR.
-    /// These are required of all beacon nodes. The exact number is determined by the chain
-    /// specification.
-    long_lived_subscriptions: HashSet<SubnetId>,
-
-    /// Short lived subscriptions that need to be executed in the future.
-    scheduled_short_lived_subscriptions: HashSetDelay<ExactSubnet>,
-
-    /// A collection timeouts to track the existence of aggregate validator subscriptions at an
-    /// `ExactSubnet`.
-    aggregate_validators_on_subnet: Option<HashSetDelay<ExactSubnet>>,
-
-    /// The waker for the current thread.
-    waker: Option<std::task::Waker>,
-
-    /// The discovery mechanism of lighthouse is disabled.
-    discovery_disabled: bool,
-
-    /// We are always subscribed to all subnets.
-    subscribe_all_subnets: bool,
-
-    /// Our Discv5 node_id.
-    node_id: NodeId,
-
-    /// Future used to manage subscribing and unsubscribing from long lived subnets.
-    next_long_lived_subscription_event: Pin<Box<tokio::time::Sleep>>,
-
-    /// Whether this node is a block proposer-only node.
-    proposer_only: bool,
-}
-
-impl<T: BeaconChainTypes> AttestationService<T> {
-    /* Public functions */
-
-    /// Establish the service based on the passed configuration.
-    pub fn new(beacon_chain: Arc<BeaconChain<T>>, node_id: NodeId, config: &NetworkConfig) -> Self {
-        let slot_duration = beacon_chain.slot_clock.slot_duration();
-
-        if config.subscribe_all_subnets {
-            info!("Subscribing to all subnets");
-        } else {
-            info!(
-                subnets_per_node = beacon_chain.spec.subnets_per_node,
-                subscription_duration_in_epochs = beacon_chain.spec.epochs_per_subnet_subscription,
-                "Deterministic long lived subnets enabled"
-            );
-        }
-
-        let track_validators = !config.import_all_attestations;
-        let aggregate_validators_on_subnet =
-            track_validators.then(|| HashSetDelay::new(slot_duration));
-        let mut service = AttestationService {
-            events: VecDeque::with_capacity(10),
-            beacon_chain,
-            short_lived_subscriptions: HashMapDelay::new(slot_duration),
-            long_lived_subscriptions: HashSet::default(),
-            scheduled_short_lived_subscriptions: HashSetDelay::default(),
-            aggregate_validators_on_subnet,
-            waker: None,
-            discovery_disabled: config.disable_discovery,
-            subscribe_all_subnets: config.subscribe_all_subnets,
-            node_id,
-            next_long_lived_subscription_event: {
-                // Set a dummy sleep. Calculating the current subnet subscriptions will update this
-                // value with a smarter timing
-                Box::pin(tokio::time::sleep(Duration::from_secs(1)))
-            },
-            proposer_only: config.proposer_only,
-        };
-
-        // If we are not subscribed to all subnets, handle the deterministic set of subnets
-        if !config.subscribe_all_subnets {
-            service.recompute_long_lived_subnets();
-        }
-
-        service
-    }
-
-    /// Return count of all currently subscribed subnets (long-lived **and** short-lived).
-    #[cfg(test)]
-    pub fn subscription_count(&self) -> usize {
-        if self.subscribe_all_subnets {
-            self.beacon_chain.spec.attestation_subnet_count as usize
-        } else {
-            let count = self
-                .short_lived_subscriptions
-                .keys()
-                .chain(self.long_lived_subscriptions.iter())
-                .collect::<HashSet<_>>()
-                .len();
-            count
-        }
-    }
-
-    /// Returns whether we are subscribed to a subnet for testing purposes.
-    #[cfg(test)]
-    pub(crate) fn is_subscribed(
-        &self,
-        subnet_id: &SubnetId,
-        subscription_kind: SubscriptionKind,
-    ) -> bool {
-        match subscription_kind {
-            SubscriptionKind::LongLived => self.long_lived_subscriptions.contains(subnet_id),
-            SubscriptionKind::ShortLived => self.short_lived_subscriptions.contains_key(subnet_id),
-        }
-    }
-
-    #[cfg(test)]
-    pub(crate) fn long_lived_subscriptions(&self) -> &HashSet<SubnetId> {
-        &self.long_lived_subscriptions
-    }
-
-    /// Processes a list of validator subscriptions.
-    ///
-    /// This will:
-    /// - Register new validators as being known.
-    /// - Search for peers for required subnets.
-    /// - Request subscriptions for subnets on specific slots when required.
-    /// - Build the timeouts for each of these events.
-    ///
-    /// This returns a result simply for the ergonomics of using ?. The result can be
-    /// safely dropped.
-    pub fn validator_subscriptions(
-        &mut self,
-        subscriptions: impl Iterator<Item = ValidatorSubscription>,
-    ) -> Result<(), String> {
-        // If the node is in a proposer-only state, we ignore all subnet subscriptions.
-        if self.proposer_only {
-            return Ok(());
-        }
-
-        // Maps each subnet_id subscription to it's highest slot
-        let mut subnets_to_discover: HashMap<SubnetId, Slot> = HashMap::new();
-
-        // Registers the validator with the attestation service.
-        for subscription in subscriptions {
-            metrics::inc_counter(&metrics::SUBNET_SUBSCRIPTION_REQUESTS);
-
-            trace!(?subscription, "Validator subscription");
-
-            // Compute the subnet that is associated with this subscription
-            let subnet_id = match SubnetId::compute_subnet::<T::EthSpec>(
-                subscription.slot,
-                subscription.attestation_committee_index,
-                subscription.committee_count_at_slot,
-                &self.beacon_chain.spec,
-            ) {
-                Ok(subnet_id) => subnet_id,
-                Err(e) => {
-                    warn!(
-                        error = ?e,
-                        "Failed to compute subnet id for validator subscription"
-                    );
-                    continue;
-                }
-            };
-            // Ensure each subnet_id inserted into the map has the highest slot as it's value.
-            // Higher slot corresponds to higher min_ttl in the `SubnetDiscovery` entry.
-            if let Some(slot) = subnets_to_discover.get(&subnet_id) {
-                if subscription.slot > *slot {
-                    subnets_to_discover.insert(subnet_id, subscription.slot);
-                }
-            } else if !self.discovery_disabled {
-                subnets_to_discover.insert(subnet_id, subscription.slot);
-            }
-
-            let exact_subnet = ExactSubnet {
-                subnet_id,
-                slot: subscription.slot,
-            };
-
-            // Determine if the validator is an aggregator. If so, we subscribe to the subnet and
-            // if successful add the validator to a mapping of known aggregators for that exact
-            // subnet.
-
-            if subscription.is_aggregator {
-                metrics::inc_counter(&metrics::SUBNET_SUBSCRIPTION_AGGREGATOR_REQUESTS);
-                if let Err(e) = self.subscribe_to_short_lived_subnet(exact_subnet) {
-                    warn!(error = e, "Subscription to subnet error");
-                } else {
-                    trace!(?exact_subnet, "Subscribed to subnet for aggregator duties");
-                }
-            }
-        }
-
-        // If the discovery mechanism isn't disabled, attempt to set up a peer discovery for the
-        // required subnets.
-        if !self.discovery_disabled {
-            if let Err(e) = self.discover_peers_request(
-                subnets_to_discover
-                    .into_iter()
-                    .map(|(subnet_id, slot)| ExactSubnet { subnet_id, slot }),
-            ) {
-                warn!(error = e, "Discovery lookup request error");
-            };
-        }
-
-        Ok(())
-    }
-
-    fn recompute_long_lived_subnets(&mut self) {
-        // Ensure the next computation is scheduled even if assigning subnets fails.
-        let next_subscription_event = self
-            .recompute_long_lived_subnets_inner()
-            .unwrap_or_else(|_| self.beacon_chain.slot_clock.slot_duration());
-
-        debug!("Recomputing deterministic long lived subnets");
-        self.next_long_lived_subscription_event =
-            Box::pin(tokio::time::sleep(next_subscription_event));
-
-        if let Some(waker) = self.waker.as_ref() {
-            waker.wake_by_ref();
-        }
-    }
-
-    /// Gets the long lived subnets the node should be subscribed to during the current epoch and
-    /// the remaining duration for which they remain valid.
-    fn recompute_long_lived_subnets_inner(&mut self) -> Result<Duration, ()> {
-        let current_epoch = self.beacon_chain.epoch().map_err(|e| {
-            if !self
-                .beacon_chain
-                .slot_clock
-                .is_prior_to_genesis()
-                .unwrap_or(false)
-            {
-                error!(err = ?e,"Failed to get the current epoch from clock")
-            }
-        })?;
-
-        let (subnets, next_subscription_epoch) = SubnetId::compute_subnets_for_epoch::<T::EthSpec>(
-            self.node_id.raw(),
-            current_epoch,
-            &self.beacon_chain.spec,
-        )
-        .map_err(|e| error!(err = e, "Could not compute subnets for current epoch"))?;
-
-        let next_subscription_slot =
-            next_subscription_epoch.start_slot(T::EthSpec::slots_per_epoch());
-        let next_subscription_event = self
-            .beacon_chain
-            .slot_clock
-            .duration_to_slot(next_subscription_slot)
-            .ok_or_else(|| {
-                error!("Failed to compute duration to next to long lived subscription event")
-            })?;
-
-        self.update_long_lived_subnets(subnets.collect());
-
-        Ok(next_subscription_event)
-    }
-
-    /// Updates the long lived subnets.
-    ///
-    /// New subnets are registered as subscribed, removed subnets as unsubscribed and the Enr
-    /// updated accordingly.
-    fn update_long_lived_subnets(&mut self, mut subnets: HashSet<SubnetId>) {
-        info!(subnets = ?subnets.iter().collect::<Vec<_>>(),"Subscribing to long-lived subnets");
-        for subnet in &subnets {
-            // Add the events for those subnets that are new as long lived subscriptions.
-            if !self.long_lived_subscriptions.contains(subnet) {
-                // Check if this subnet is new and send the subscription event if needed.
-                if !self.short_lived_subscriptions.contains_key(subnet) {
-                    debug!(
-                        ?subnet,
-                        subscription_kind = ?SubscriptionKind::LongLived,
-                        "Subscribing to subnet"
-                    );
-                    self.queue_event(SubnetServiceMessage::Subscribe(Subnet::Attestation(
-                        *subnet,
-                    )));
-                }
-                self.queue_event(SubnetServiceMessage::EnrAdd(Subnet::Attestation(*subnet)));
-                if !self.discovery_disabled {
-                    self.queue_event(SubnetServiceMessage::DiscoverPeers(vec![SubnetDiscovery {
-                        subnet: Subnet::Attestation(*subnet),
-                        min_ttl: None,
-                    }]))
-                }
-            }
-        }
-
-        // Update the long_lived_subnets set and check for subnets that are being removed
-        std::mem::swap(&mut self.long_lived_subscriptions, &mut subnets);
-        for subnet in subnets {
-            if !self.long_lived_subscriptions.contains(&subnet) {
-                self.handle_removed_subnet(subnet, SubscriptionKind::LongLived);
-            }
-        }
-    }
-
-    /// Checks if we have subscribed aggregate validators for the subnet. If not, checks the gossip
-    /// verification, re-propagates and returns false.
-    pub fn should_process_attestation(
-        &self,
-        subnet: SubnetId,
-        attestation: &Attestation<T::EthSpec>,
-    ) -> bool {
-        // Proposer-only mode does not need to process attestations
-        if self.proposer_only {
-            return false;
-        }
-        self.aggregate_validators_on_subnet
-            .as_ref()
-            .map(|tracked_vals| {
-                tracked_vals.contains_key(&ExactSubnet {
-                    subnet_id: subnet,
-                    slot: attestation.data().slot,
-                })
-            })
-            .unwrap_or(true)
-    }
-
-    /* Internal private functions */
-
-    /// Adds an event to the event queue and notifies that this service is ready to be polled
-    /// again.
-    fn queue_event(&mut self, ev: SubnetServiceMessage) {
-        self.events.push_back(ev);
-        if let Some(waker) = &self.waker {
-            waker.wake_by_ref()
-        }
-    }
-    /// Checks if there are currently queued discovery requests and the time required to make the
-    /// request.
-    ///
-    /// If there is sufficient time, queues a peer discovery request for all the required subnets.
-    fn discover_peers_request(
-        &mut self,
-        exact_subnets: impl Iterator<Item = ExactSubnet>,
-    ) -> Result<(), &'static str> {
-        let current_slot = self
-            .beacon_chain
-            .slot_clock
-            .now()
-            .ok_or("Could not get the current slot")?;
-
-        let discovery_subnets: Vec<SubnetDiscovery> = exact_subnets
-            .filter_map(|exact_subnet| {
-                // Check if there is enough time to perform a discovery lookup.
-                if exact_subnet.slot
-                    >= current_slot.saturating_add(MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD)
-                {
-                    // Send out an event to start looking for peers.
-                    // Require the peer for an additional slot to ensure we keep the peer for the
-                    // duration of the subscription.
-                    let min_ttl = self
-                        .beacon_chain
-                        .slot_clock
-                        .duration_to_slot(exact_subnet.slot + 1)
-                        .map(|duration| std::time::Instant::now() + duration);
-                    Some(SubnetDiscovery {
-                        subnet: Subnet::Attestation(exact_subnet.subnet_id),
-                        min_ttl,
-                    })
-                } else {
-                    // We may want to check the global PeerInfo to see estimated timeouts for each
-                    // peer before they can be removed.
-                    warn!(
-                        subnet_id = ?exact_subnet,
-                        "Not enough time for a discovery search"
-                    );
-                    None
-                }
-            })
-            .collect();
-
-        if !discovery_subnets.is_empty() {
-            self.queue_event(SubnetServiceMessage::DiscoverPeers(discovery_subnets));
-        }
-        Ok(())
-    }
-
-    // Subscribes to the subnet if it should be done immediately, or schedules it if required.
-    fn subscribe_to_short_lived_subnet(
-        &mut self,
-        ExactSubnet { subnet_id, slot }: ExactSubnet,
-    ) -> Result<(), &'static str> {
-        let slot_duration = self.beacon_chain.slot_clock.slot_duration();
-
-        // The short time we schedule the subscription before it's actually required. This
-        // ensures we are subscribed on time, and allows consecutive subscriptions to the same
-        // subnet to overlap, reducing subnet churn.
-        let advance_subscription_duration = slot_duration / ADVANCE_SUBSCRIBE_SLOT_FRACTION;
-        // The time to the required slot.
-        let time_to_subscription_slot = self
-            .beacon_chain
-            .slot_clock
-            .duration_to_slot(slot)
-            .unwrap_or_default(); // If this is a past slot we will just get a 0 duration.
-
-        // Calculate how long before we need to subscribe to the subnet.
-        let time_to_subscription_start =
-            time_to_subscription_slot.saturating_sub(advance_subscription_duration);
-
-        // The time after a duty slot where we no longer need it in the `aggregate_validators_on_subnet`
-        // delay map.
-        let time_to_unsubscribe =
-            time_to_subscription_slot + UNSUBSCRIBE_AFTER_AGGREGATOR_DUTY * slot_duration;
-        if let Some(tracked_vals) = self.aggregate_validators_on_subnet.as_mut() {
-            tracked_vals.insert_at(ExactSubnet { subnet_id, slot }, time_to_unsubscribe);
-        }
-
-        // If the subscription should be done in the future, schedule it. Otherwise subscribe
-        // immediately.
-        if time_to_subscription_start.is_zero() {
-            // This is a current or past slot, we subscribe immediately.
-            self.subscribe_to_short_lived_subnet_immediately(subnet_id, slot + 1)?;
-        } else {
-            // This is a future slot, schedule subscribing.
-            trace!(subnet = ?subnet_id, ?time_to_subscription_start,"Scheduling subnet subscription");
-            self.scheduled_short_lived_subscriptions
-                .insert_at(ExactSubnet { subnet_id, slot }, time_to_subscription_start);
-        }
-
-        Ok(())
-    }
-
-    /* A collection of functions that handle the various timeouts */
-
-    /// Registers a subnet as subscribed.
-    ///
-    /// Checks that the time in which the subscription would end is not in the past. If we are
-    /// already subscribed, extends the timeout if necessary. If this is a new subscription, we send
-    /// out the appropriate events.
-    ///
-    /// On determinist long lived subnets, this is only used for short lived subscriptions.
-    fn subscribe_to_short_lived_subnet_immediately(
-        &mut self,
-        subnet_id: SubnetId,
-        end_slot: Slot,
-    ) -> Result<(), &'static str> {
-        if self.subscribe_all_subnets {
-            // Case not handled by this service.
-            return Ok(());
-        }
-
-        let time_to_subscription_end = self
-            .beacon_chain
-            .slot_clock
-            .duration_to_slot(end_slot)
-            .unwrap_or_default();
-
-        // First check this is worth doing.
-        if time_to_subscription_end.is_zero() {
-            return Err("Time when subscription would end has already passed.");
-        }
-
-        let subscription_kind = SubscriptionKind::ShortLived;
-
-        // We need to check and add a subscription for the right kind, regardless of the presence
-        // of the subnet as a subscription of the other kind. This is mainly since long lived
-        // subscriptions can be removed at any time when a validator goes offline.
-
-        let (subscriptions, already_subscribed_as_other_kind) = (
-            &mut self.short_lived_subscriptions,
-            self.long_lived_subscriptions.contains(&subnet_id),
-        );
-
-        match subscriptions.get(&subnet_id) {
-            Some(current_end_slot) => {
-                // We are already subscribed. Check if we need to extend the subscription.
-                if &end_slot > current_end_slot {
-                    trace!(
-                        subnet = ?subnet_id,
-                        prev_end_slot = %current_end_slot,
-                        new_end_slot = %end_slot,
-                        ?subscription_kind,
-                        "Extending subscription to subnet"
-                    );
-                    subscriptions.insert_at(subnet_id, end_slot, time_to_subscription_end);
-                }
-            }
-            None => {
-                // This is a new subscription. Add with the corresponding timeout and send the
-                // notification.
-                subscriptions.insert_at(subnet_id, end_slot, time_to_subscription_end);
-
-                // Inform of the subscription.
-                if !already_subscribed_as_other_kind {
-                    debug!(
-                        subnet = ?subnet_id,
-                        %end_slot,
-                        ?subscription_kind,
-                        "Subscribing to subnet"
-                    );
-                    self.queue_event(SubnetServiceMessage::Subscribe(Subnet::Attestation(
-                        subnet_id,
-                    )));
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    // Unsubscribes from a subnet that was removed if it does not continue to exist as a
-    // subscription of the other kind. For long lived subscriptions, it also removes the
-    // advertisement from our ENR.
-    fn handle_removed_subnet(&mut self, subnet_id: SubnetId, subscription_kind: SubscriptionKind) {
-        let exists_in_other_subscriptions = match subscription_kind {
-            SubscriptionKind::LongLived => self.short_lived_subscriptions.contains_key(&subnet_id),
-            SubscriptionKind::ShortLived => self.long_lived_subscriptions.contains(&subnet_id),
-        };
-
-        if !exists_in_other_subscriptions {
-            // Subscription no longer exists as short lived or long lived.
-            debug!(
-                subnet = ?subnet_id,
-                ?subscription_kind,
-                "Unsubscribing from subnet"
-            );
-            self.queue_event(SubnetServiceMessage::Unsubscribe(Subnet::Attestation(
-                subnet_id,
-            )));
-        }
-
-        if subscription_kind == SubscriptionKind::LongLived {
-            // Remove from our ENR even if we remain subscribed in other way.
-            self.queue_event(SubnetServiceMessage::EnrRemove(Subnet::Attestation(
-                subnet_id,
-            )));
-        }
-    }
-}
-
-impl<T: BeaconChainTypes> Stream for AttestationService<T> {
-    type Item = SubnetServiceMessage;
-
-    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        // Update the waker if needed.
-        if let Some(waker) = &self.waker {
-            if !waker.will_wake(cx.waker()) {
-                self.waker = Some(cx.waker().clone());
-            }
-        } else {
-            self.waker = Some(cx.waker().clone());
-        }
-
-        // Send out any generated events.
-        if let Some(event) = self.events.pop_front() {
-            return Poll::Ready(Some(event));
-        }
-
-        // If we aren't subscribed to all subnets, handle the deterministic long-lived subnets
-        if !self.subscribe_all_subnets {
-            match self.next_long_lived_subscription_event.as_mut().poll(cx) {
-                Poll::Ready(_) => {
-                    self.recompute_long_lived_subnets();
-                    // We re-wake the task as there could be other subscriptions to process
-                    self.waker
-                        .as_ref()
-                        .expect("Waker has been set")
-                        .wake_by_ref();
-                }
-                Poll::Pending => {}
-            }
-        }
-
-        // Process scheduled subscriptions that might be ready, since those can extend a soon to
-        // expire subscription.
-        match self.scheduled_short_lived_subscriptions.poll_next_unpin(cx) {
-            Poll::Ready(Some(Ok(ExactSubnet { subnet_id, slot }))) => {
-                if let Err(e) =
-                    self.subscribe_to_short_lived_subnet_immediately(subnet_id, slot + 1)
-                {
-                    debug!(subnet = ?subnet_id, err = e,"Failed to subscribe to short lived subnet");
-                }
-                self.waker
-                    .as_ref()
-                    .expect("Waker has been set")
-                    .wake_by_ref();
-            }
-            Poll::Ready(Some(Err(e))) => {
-                error!(
-                    error = e,
-                    "Failed to check for scheduled subnet subscriptions"
-                );
-            }
-            Poll::Ready(None) | Poll::Pending => {}
-        }
-
-        // Finally process any expired subscriptions.
-        match self.short_lived_subscriptions.poll_next_unpin(cx) {
-            Poll::Ready(Some(Ok((subnet_id, _end_slot)))) => {
-                self.handle_removed_subnet(subnet_id, SubscriptionKind::ShortLived);
-                // We re-wake the task as there could be other subscriptions to process
-                self.waker
-                    .as_ref()
-                    .expect("Waker has been set")
-                    .wake_by_ref();
-            }
-            Poll::Ready(Some(Err(e))) => {
-                error!(error = e, "Failed to check for subnet unsubscription times");
-            }
-            Poll::Ready(None) | Poll::Pending => {}
-        }
-
-        // Poll to remove entries on expiration, no need to act on expiration events.
-        if let Some(tracked_vals) = self.aggregate_validators_on_subnet.as_mut() {
-            if let Poll::Ready(Some(Err(e))) = tracked_vals.poll_next_unpin(cx) {
-                error!(
-                    error = e,
-                    "Failed to check for aggregate validator on subnet expirations"
-                );
-            }
-        }
-
-        Poll::Pending
-    }
-}
diff --git a/beacon_node/network/src/subnet_service/sync_subnets.rs b/beacon_node/network/src/subnet_service/sync_subnets.rs
deleted file mode 100644
index 6b3834e1958..00000000000
--- a/beacon_node/network/src/subnet_service/sync_subnets.rs
+++ /dev/null
@@ -1,345 +0,0 @@
-//! This service keeps track of which sync committee subnet the beacon node should be subscribed to at any
-//! given time. It schedules subscriptions to sync committee subnets and requests peer discoveries.
-
-use std::collections::{hash_map::Entry, HashMap, VecDeque};
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::time::Duration;
-
-use futures::prelude::*;
-use tracing::{debug, error, trace, warn};
-
-use super::SubnetServiceMessage;
-use beacon_chain::{BeaconChain, BeaconChainTypes};
-use delay_map::HashSetDelay;
-use lighthouse_network::{NetworkConfig, Subnet, SubnetDiscovery};
-use slot_clock::SlotClock;
-use types::{Epoch, EthSpec, SyncCommitteeSubscription, SyncSubnetId};
-
-use crate::metrics;
-
-/// The minimum number of slots ahead that we attempt to discover peers for a subscription. If the
-/// slot is less than this number, skip the peer discovery process.
-/// Subnet discovery query takes at most 30 secs, 2 slots take 24s.
-const MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD: u64 = 2;
-
-/// A particular subnet at a given slot.
-#[derive(PartialEq, Eq, Hash, Clone, Debug)]
-pub struct ExactSubnet {
-    /// The `SyncSubnetId` associated with this subnet.
-    pub subnet_id: SyncSubnetId,
-    /// The epoch until which we need to stay subscribed to the subnet.
-    pub until_epoch: Epoch,
-}
-pub struct SyncCommitteeService<T: BeaconChainTypes> {
-    /// Queued events to return to the driving service.
-    events: VecDeque<SubnetServiceMessage>,
-
-    /// A reference to the beacon chain to process received attestations.
-    pub(crate) beacon_chain: Arc<BeaconChain<T>>,
-
-    /// The collection of all currently subscribed subnets.
-    subscriptions: HashMap<SyncSubnetId, Epoch>,
-
-    /// A collection of timeouts for when to unsubscribe from a subnet.
-    unsubscriptions: HashSetDelay<SyncSubnetId>,
-
-    /// The waker for the current thread.
-    waker: Option<std::task::Waker>,
-
-    /// The discovery mechanism of lighthouse is disabled.
-    discovery_disabled: bool,
-
-    /// We are always subscribed to all subnets.
-    subscribe_all_subnets: bool,
-
-    /// Whether this node is a block proposer-only node.
-    proposer_only: bool,
-}
-
-impl<T: BeaconChainTypes> SyncCommitteeService<T> {
-    /* Public functions */
-
-    pub fn new(beacon_chain: Arc<BeaconChain<T>>, config: &NetworkConfig) -> Self {
-        let spec = &beacon_chain.spec;
-        let epoch_duration_secs =
-            beacon_chain.slot_clock.slot_duration().as_secs() * T::EthSpec::slots_per_epoch();
-        let default_timeout =
-            epoch_duration_secs.saturating_mul(spec.epochs_per_sync_committee_period.as_u64());
-
-        SyncCommitteeService {
-            events: VecDeque::with_capacity(10),
-            beacon_chain,
-            subscriptions: HashMap::new(),
-            unsubscriptions: HashSetDelay::new(Duration::from_secs(default_timeout)),
-            waker: None,
-            subscribe_all_subnets: config.subscribe_all_subnets,
-            discovery_disabled: config.disable_discovery,
-            proposer_only: config.proposer_only,
-        }
-    }
-
-    /// Return count of all currently subscribed subnets.
-    #[cfg(test)]
-    pub fn subscription_count(&self) -> usize {
-        use types::consts::altair::SYNC_COMMITTEE_SUBNET_COUNT;
-        if self.subscribe_all_subnets {
-            SYNC_COMMITTEE_SUBNET_COUNT as usize
-        } else {
-            self.subscriptions.len()
-        }
-    }
-
-    /// Processes a list of sync committee subscriptions.
-    ///
-    /// This will:
-    /// - Search for peers for required subnets.
-    /// - Request subscriptions required subnets.
-    /// - Build the timeouts for each of these events.
-    ///
-    /// This returns a result simply for the ergonomics of using ?. The result can be
-    /// safely dropped.
-    pub fn validator_subscriptions(
-        &mut self,
-        subscriptions: Vec<SyncCommitteeSubscription>,
-    ) -> Result<(), String> {
-        // A proposer-only node does not subscribe to any sync-committees
-        if self.proposer_only {
-            return Ok(());
-        }
-
-        let mut subnets_to_discover = Vec::new();
-        for subscription in subscriptions {
-            metrics::inc_counter(&metrics::SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS);
-            //NOTE: We assume all subscriptions have been verified before reaching this service
-
-            // Registers the validator with the subnet service.
-            // This will subscribe to long-lived random subnets if required.
-            trace!(?subscription, "Sync committee subscription");
-
-            let subnet_ids = match SyncSubnetId::compute_subnets_for_sync_committee::<T::EthSpec>(
-                &subscription.sync_committee_indices,
-            ) {
-                Ok(subnet_ids) => subnet_ids,
-                Err(e) => {
-                    warn!(
-                        error = ?e,
-                        validator_index = subscription.validator_index,
-                        "Failed to compute subnet id for sync committee subscription"
-                    );
-                    continue;
-                }
-            };
-
-            for subnet_id in subnet_ids {
-                let exact_subnet = ExactSubnet {
-                    subnet_id,
-                    until_epoch: subscription.until_epoch,
-                };
-                subnets_to_discover.push(exact_subnet.clone());
-                if let Err(e) = self.subscribe_to_subnet(exact_subnet.clone()) {
-                    warn!(
-                        error = e,
-                        validator_index = subscription.validator_index,
-                        "Subscription to sync subnet error"
-                    );
-                } else {
-                    trace!(
-                        ?exact_subnet,
-                        validator_index = subscription.validator_index,
-                        "Subscribed to subnet for sync committee duties"
-                    );
-                }
-            }
-        }
-        // If the discovery mechanism isn't disabled, attempt to set up a peer discovery for the
-        // required subnets.
-        if !self.discovery_disabled {
-            if let Err(e) = self.discover_peers_request(subnets_to_discover.iter()) {
-                warn!(error = e, "Discovery lookup request error");
-            };
-        }
-
-        // pre-emptively wake the thread to check for new events
-        if let Some(waker) = &self.waker {
-            waker.wake_by_ref();
-        }
-        Ok(())
-    }
-
-    /* Internal private functions */
-
-    /// Checks if there are currently queued discovery requests and the time required to make the
-    /// request.
-    ///
-    /// If there is sufficient time, queues a peer discovery request for all the required subnets.
-    fn discover_peers_request<'a>(
-        &mut self,
-        exact_subnets: impl Iterator<Item = &'a ExactSubnet>,
-    ) -> Result<(), &'static str> {
-        let current_slot = self
-            .beacon_chain
-            .slot_clock
-            .now()
-            .ok_or("Could not get the current slot")?;
-
-        let slots_per_epoch = T::EthSpec::slots_per_epoch();
-
-        let discovery_subnets: Vec<SubnetDiscovery> = exact_subnets
-            .filter_map(|exact_subnet| {
-                let until_slot = exact_subnet.until_epoch.end_slot(slots_per_epoch);
-                // check if there is enough time to perform a discovery lookup
-                if until_slot >= current_slot.saturating_add(MIN_PEER_DISCOVERY_SLOT_LOOK_AHEAD) {
-                    // if the slot is more than epoch away, add an event to start looking for peers
-                    // add one slot to ensure we keep the peer for the subscription slot
-                    let min_ttl = self
-                        .beacon_chain
-                        .slot_clock
-                        .duration_to_slot(until_slot + 1)
-                        .map(|duration| std::time::Instant::now() + duration);
-                    Some(SubnetDiscovery {
-                        subnet: Subnet::SyncCommittee(exact_subnet.subnet_id),
-                        min_ttl,
-                    })
-                } else {
-                    // We may want to check the global PeerInfo to see estimated timeouts for each
-                    // peer before they can be removed.
-                    warn!(
-                        subnet_id = ?exact_subnet,
-                        "Not enough time for a discovery search"
-                    );
-                    None
-                }
-            })
-            .collect();
-
-        if !discovery_subnets.is_empty() {
-            self.events
-                .push_back(SubnetServiceMessage::DiscoverPeers(discovery_subnets));
-        }
-        Ok(())
-    }
-
-    /// Adds a subscription event and an associated unsubscription event if required.
-    fn subscribe_to_subnet(&mut self, exact_subnet: ExactSubnet) -> Result<(), &'static str> {
-        // Return if we have subscribed to all subnets
-        if self.subscribe_all_subnets {
-            return Ok(());
-        }
-
-        // Return if we already have a subscription for exact_subnet
-        if self.subscriptions.get(&exact_subnet.subnet_id) == Some(&exact_subnet.until_epoch) {
-            return Ok(());
-        }
-
-        // Return if we already have subscription set to expire later than the current request.
-        if let Some(until_epoch) = self.subscriptions.get(&exact_subnet.subnet_id) {
-            if *until_epoch >= exact_subnet.until_epoch {
-                return Ok(());
-            }
-        }
-
-        // initialise timing variables
-        let current_slot = self
-            .beacon_chain
-            .slot_clock
-            .now()
-            .ok_or("Could not get the current slot")?;
-
-        let slots_per_epoch = T::EthSpec::slots_per_epoch();
-        let until_slot = exact_subnet.until_epoch.end_slot(slots_per_epoch);
-        // Calculate the duration to the unsubscription event.
-        let expected_end_subscription_duration = if current_slot >= until_slot {
-            warn!(
-                %current_slot,
-                ?exact_subnet,
-                "Sync committee subscription is past expiration"
-            );
-            return Ok(());
-        } else {
-            let slot_duration = self.beacon_chain.slot_clock.slot_duration();
-
-            // the duration until we no longer need this subscription. We assume a single slot is
-            // sufficient.
-            self.beacon_chain
-                .slot_clock
-                .duration_to_slot(until_slot)
-                .ok_or("Unable to determine duration to unsubscription slot")?
-                + slot_duration
-        };
-
-        if let Entry::Vacant(e) = self.subscriptions.entry(exact_subnet.subnet_id) {
-            // We are not currently subscribed and have no waiting subscription, create one
-            debug!(subnet = *exact_subnet.subnet_id, until_epoch = ?exact_subnet.until_epoch, "Subscribing to subnet");
-            e.insert(exact_subnet.until_epoch);
-            self.events
-                .push_back(SubnetServiceMessage::Subscribe(Subnet::SyncCommittee(
-                    exact_subnet.subnet_id,
-                )));
-
-            // add the subnet to the ENR bitfield
-            self.events
-                .push_back(SubnetServiceMessage::EnrAdd(Subnet::SyncCommittee(
-                    exact_subnet.subnet_id,
-                )));
-
-            // add an unsubscription event to remove ourselves from the subnet once completed
-            self.unsubscriptions
-                .insert_at(exact_subnet.subnet_id, expected_end_subscription_duration);
-        } else {
-            // We are already subscribed, extend the unsubscription duration
-            self.unsubscriptions
-                .update_timeout(&exact_subnet.subnet_id, expected_end_subscription_duration);
-        }
-
-        Ok(())
-    }
-
-    /// A queued unsubscription is ready.
-    fn handle_unsubscriptions(&mut self, subnet_id: SyncSubnetId) {
-        debug!(subnet = *subnet_id, "Unsubscribing from subnet");
-
-        self.subscriptions.remove(&subnet_id);
-        self.events
-            .push_back(SubnetServiceMessage::Unsubscribe(Subnet::SyncCommittee(
-                subnet_id,
-            )));
-
-        self.events
-            .push_back(SubnetServiceMessage::EnrRemove(Subnet::SyncCommittee(
-                subnet_id,
-            )));
-    }
-}
-
-impl<T: BeaconChainTypes> Stream for SyncCommitteeService<T> {
-    type Item = SubnetServiceMessage;
-
-    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        // update the waker if needed
-        if let Some(waker) = &self.waker {
-            if !waker.will_wake(cx.waker()) {
-                self.waker = Some(cx.waker().clone());
-            }
-        } else {
-            self.waker = Some(cx.waker().clone());
-        }
-
-        // process any un-subscription events
-        match self.unsubscriptions.poll_next_unpin(cx) {
-            Poll::Ready(Some(Ok(exact_subnet))) => self.handle_unsubscriptions(exact_subnet),
-            Poll::Ready(Some(Err(e))) => {
-                error!(error = e, "Failed to check for subnet unsubscription times");
-            }
-            Poll::Ready(None) | Poll::Pending => {}
-        }
-
-        // process any generated events
-        if let Some(event) = self.events.pop_front() {
-            return Poll::Ready(Some(event));
-        }
-
-        Poll::Pending
-    }
-}

From 3f8998f11fecc864882e98142b84b67be74e1572 Mon Sep 17 00:00:00 2001
From: Lion - dapplion <35266934+dapplion@users.noreply.github.com>
Date: Thu, 25 Sep 2025 05:52:27 +0200
Subject: [PATCH 45/49] Only mark block lookups as pending if block is
 importing from gossip (#8112)

- PR https://github.com/sigp/lighthouse/pull/8045 introduced a regression of how lookup sync interacts with the da_checker.

Now in unstable block import from the HTTP API also insert the block in the da_checker while the block is being execution verified. If lookup sync finds the block in the da_checker in `NotValidated` state it expects a `GossipBlockProcessResult` message sometime later. That message is only sent after block import in gossip.

I confirmed in our node's logs for 4/4 cases of stuck lookups are caused by this sequence of events:
- Receive block through API, insert into da_checker in fn process_block in put_pre_execution_block
- Create lookup and leave in AwaitingDownload(block in processing cache) state
- Block from HTTP API finishes importing
- Lookup is left stuck

Closes https://github.com/sigp/lighthouse/issues/8104

  - https://github.com/sigp/lighthouse/pull/8110 was my initial solution attempt but we can't send the `GossipBlockProcessResult` event from the `http_api` crate without adding new channels, which seems messy.

For a given node it's rare that a lookup is created at the same time that a block is being published. This PR solves https://github.com/sigp/lighthouse/issues/8104 by allowing lookup sync to import the block twice in that case.

Co-Authored-By: dapplion <35266934+dapplion@users.noreply.github.com>
---
 .../beacon_chain/src/beacon_block_streamer.rs |  2 +-
 beacon_node/beacon_chain/src/beacon_chain.rs  |  9 +++--
 .../src/data_availability_checker.rs          |  7 ++--
 .../overflow_lru_cache.rs                     | 37 +++++++++++++------
 .../sync/block_lookups/single_block_lookup.rs |  2 +-
 .../network/src/sync/network_context.rs       | 34 +++++++++++------
 beacon_node/network/src/sync/tests/lookups.rs |  6 +--
 consensus/types/src/beacon_block.rs           |  1 +
 8 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/beacon_node/beacon_chain/src/beacon_block_streamer.rs b/beacon_node/beacon_chain/src/beacon_block_streamer.rs
index d4ce38927b2..c816a0b29f3 100644
--- a/beacon_node/beacon_chain/src/beacon_block_streamer.rs
+++ b/beacon_node/beacon_chain/src/beacon_block_streamer.rs
@@ -404,7 +404,7 @@ impl<T: BeaconChainTypes> BeaconBlockStreamer<T> {
         if self.check_caches == CheckCaches::Yes {
             match self.beacon_chain.get_block_process_status(&root) {
                 BlockProcessStatus::Unknown => None,
-                BlockProcessStatus::NotValidated(block)
+                BlockProcessStatus::NotValidated(block, _)
                 | BlockProcessStatus::ExecutionValidated(block) => {
                     metrics::inc_counter(&metrics::BEACON_REQRESP_PRE_IMPORT_CACHE_HITS);
                     Some(block)
diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs
index 4f0c6aada0a..08e0d1c6745 100644
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -334,7 +334,7 @@ pub enum BlockProcessStatus<E: EthSpec> {
     /// Block is not in any pre-import cache. Block may be in the data-base or in the fork-choice.
     Unknown,
     /// Block is currently processing but not yet validated.
-    NotValidated(Arc<SignedBeaconBlock<E>>),
+    NotValidated(Arc<SignedBeaconBlock<E>>, BlockImportSource),
     /// Block is fully valid, but not yet imported. It's cached in the da_checker while awaiting
     /// missing block components.
     ExecutionValidated(Arc<SignedBeaconBlock<E>>),
@@ -3351,8 +3351,11 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
             );
         }
 
-        self.data_availability_checker
-            .put_pre_execution_block(block_root, unverified_block.block_cloned())?;
+        self.data_availability_checker.put_pre_execution_block(
+            block_root,
+            unverified_block.block_cloned(),
+            block_source,
+        )?;
 
         // Start the Prometheus timer.
         let _full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES);
diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs
index a0ad1c2112d..43b7d8f7ea3 100644
--- a/beacon_node/beacon_chain/src/data_availability_checker.rs
+++ b/beacon_node/beacon_chain/src/data_availability_checker.rs
@@ -21,8 +21,8 @@ use task_executor::TaskExecutor;
 use tracing::{debug, error, instrument};
 use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList};
 use types::{
-    BlobSidecarList, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec, Hash256,
-    SignedBeaconBlock, Slot,
+    BlobSidecarList, BlockImportSource, ChainSpec, DataColumnSidecar, DataColumnSidecarList, Epoch,
+    EthSpec, Hash256, SignedBeaconBlock, Slot,
 };
 
 mod error;
@@ -354,9 +354,10 @@ impl<T: BeaconChainTypes> DataAvailabilityChecker<T> {
         &self,
         block_root: Hash256,
         block: Arc<SignedBeaconBlock<T::EthSpec>>,
+        source: BlockImportSource,
     ) -> Result<(), Error> {
         self.availability_cache
-            .put_pre_execution_block(block_root, block)
+            .put_pre_execution_block(block_root, block, source)
     }
 
     /// Removes a pre-execution block from the cache.
diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs
index bb440096627..42f6dbd8567 100644
--- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs
+++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs
@@ -19,13 +19,14 @@ use tracing::{Span, debug, debug_span};
 use types::beacon_block_body::KzgCommitments;
 use types::blob_sidecar::BlobIdentifier;
 use types::{
-    BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, Epoch, EthSpec,
-    Hash256, RuntimeFixedVector, RuntimeVariableList, SignedBeaconBlock,
+    BlobSidecar, BlockImportSource, ChainSpec, ColumnIndex, DataColumnSidecar,
+    DataColumnSidecarList, Epoch, EthSpec, Hash256, RuntimeFixedVector, RuntimeVariableList,
+    SignedBeaconBlock,
 };
 
 #[derive(Clone)]
 pub enum CachedBlock<E: EthSpec> {
-    PreExecution(Arc<SignedBeaconBlock<E>>),
+    PreExecution(Arc<SignedBeaconBlock<E>>, BlockImportSource),
     Executed(Box<DietAvailabilityPendingExecutedBlock<E>>),
 }
 
@@ -42,7 +43,7 @@ impl<E: EthSpec> CachedBlock<E> {
 
     fn as_block(&self) -> &SignedBeaconBlock<E> {
         match self {
-            CachedBlock::PreExecution(b) => b,
+            CachedBlock::PreExecution(b, _) => b,
             CachedBlock::Executed(b) => b.as_block(),
         }
     }
@@ -135,9 +136,13 @@ impl<E: EthSpec> PendingComponents<E> {
 
     /// Inserts a pre-execution block into the cache.
     /// This does NOT override an existing executed block.
-    pub fn insert_pre_execution_block(&mut self, block: Arc<SignedBeaconBlock<E>>) {
+    pub fn insert_pre_execution_block(
+        &mut self,
+        block: Arc<SignedBeaconBlock<E>>,
+        source: BlockImportSource,
+    ) {
         if self.block.is_none() {
-            self.block = Some(CachedBlock::PreExecution(block))
+            self.block = Some(CachedBlock::PreExecution(block, source))
         }
     }
 
@@ -433,7 +438,9 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
             .peek(block_root)
             .and_then(|pending_components| {
                 pending_components.block.as_ref().map(|block| match block {
-                    CachedBlock::PreExecution(b) => BlockProcessStatus::NotValidated(b.clone()),
+                    CachedBlock::PreExecution(b, source) => {
+                        BlockProcessStatus::NotValidated(b.clone(), *source)
+                    }
                     CachedBlock::Executed(b) => {
                         BlockProcessStatus::ExecutionValidated(b.block_cloned())
                     }
@@ -693,11 +700,12 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
         &self,
         block_root: Hash256,
         block: Arc<SignedBeaconBlock<T::EthSpec>>,
+        source: BlockImportSource,
     ) -> Result<(), AvailabilityCheckError> {
         let epoch = block.epoch();
         let pending_components =
             self.update_or_insert_pending_components(block_root, epoch, |pending_components| {
-                pending_components.insert_pre_execution_block(block);
+                pending_components.insert_pre_execution_block(block, source);
                 Ok(())
             })?;
 
@@ -718,7 +726,7 @@ impl<T: BeaconChainTypes> DataAvailabilityCheckerInner<T> {
     /// This does NOT remove an existing executed block.
     pub fn remove_pre_execution_block(&self, block_root: &Hash256) {
         // The read lock is immediately dropped so we can safely remove the block from the cache.
-        if let Some(BlockProcessStatus::NotValidated(_)) = self.get_cached_block(block_root) {
+        if let Some(BlockProcessStatus::NotValidated(_, _)) = self.get_cached_block(block_root) {
             self.critical.write().pop(block_root);
         }
     }
@@ -1459,9 +1467,13 @@ mod pending_components_tests {
         let mut pending_component = <PendingComponents<E>>::empty(block_root, max_len);
 
         let pre_execution_block = Arc::new(pre_execution_block);
-        pending_component.insert_pre_execution_block(pre_execution_block.clone());
+        pending_component
+            .insert_pre_execution_block(pre_execution_block.clone(), BlockImportSource::Gossip);
         assert!(
-            matches!(pending_component.block, Some(CachedBlock::PreExecution(_))),
+            matches!(
+                pending_component.block,
+                Some(CachedBlock::PreExecution(_, _))
+            ),
             "pre execution block inserted"
         );
 
@@ -1471,7 +1483,8 @@ mod pending_components_tests {
             "executed block inserted"
         );
 
-        pending_component.insert_pre_execution_block(pre_execution_block);
+        pending_component
+            .insert_pre_execution_block(pre_execution_block, BlockImportSource::Gossip);
         assert!(
             matches!(pending_component.block, Some(CachedBlock::Executed(_))),
             "executed block should remain"
diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs
index 36509d2563e..8fb3248a871 100644
--- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs
+++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs
@@ -219,7 +219,7 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
                 // can assert that this is the correct value of `blob_kzg_commitments_count`.
                 match cx.chain.get_block_process_status(&self.block_root) {
                     BlockProcessStatus::Unknown => None,
-                    BlockProcessStatus::NotValidated(block)
+                    BlockProcessStatus::NotValidated(block, _)
                     | BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()),
                 }
             }) {
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index 1aa3813284b..20b927724ec 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -52,8 +52,8 @@ use tokio::sync::mpsc;
 use tracing::{Span, debug, debug_span, error, warn};
 use types::blob_sidecar::FixedBlobSidecarList;
 use types::{
-    BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, DataColumnSubnetId,
-    EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
+    BlobSidecar, BlockImportSource, ColumnIndex, DataColumnSidecar, DataColumnSidecarList,
+    DataColumnSubnetId, Epoch, EthSpec, ForkContext, Hash256, SignedBeaconBlock, Slot,
 };
 
 pub mod custody;
@@ -975,14 +975,26 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         match self.chain.get_block_process_status(&block_root) {
             // Unknown block, continue request to download
             BlockProcessStatus::Unknown => {}
-            // Block is known are currently processing, expect a future event with the result of
-            // processing.
-            BlockProcessStatus::NotValidated { .. } => {
-                // Lookup sync event safety: If the block is currently in the processing cache, we
-                // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will
-                // make progress on this lookup
-                return Ok(LookupRequestResult::Pending("block in processing cache"));
-            }
+            // Block is known and currently processing. Imports from gossip and HTTP API insert the
+            // block in the da_cache. However, HTTP API is unable to notify sync when it completes
+            // block import. Returning `Pending` here will result in stuck lookups if the block is
+            // importing from sync.
+            BlockProcessStatus::NotValidated(_, source) => match source {
+                BlockImportSource::Gossip => {
+                    // Lookup sync event safety: If the block is currently in the processing cache, we
+                    // are guaranteed to receive a `SyncMessage::GossipBlockProcessResult` that will
+                    // make progress on this lookup
+                    return Ok(LookupRequestResult::Pending("block in processing cache"));
+                }
+                BlockImportSource::Lookup
+                | BlockImportSource::RangeSync
+                | BlockImportSource::HttpApi => {
+                    // Lookup, RangeSync or HttpApi block import don't emit the GossipBlockProcessResult
+                    // event. If a lookup happens to be created during block import from one of
+                    // those sources just import the block twice. Otherwise the lookup will get
+                    // stuck. Double imports are fine, they just waste resources.
+                }
+            },
             // Block is fully validated. If it's not yet imported it's waiting for missing block
             // components. Consider this request completed and do nothing.
             BlockProcessStatus::ExecutionValidated { .. } => {
@@ -1478,7 +1490,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
     /// blocks and blobs.
     pub fn batch_type(
         &self,
-        epoch: types::Epoch,
+        epoch: Epoch,
         request_type: RangeRequestType,
     ) -> ByRangeRequestType {
         // Induces a compile time panic if this doesn't hold true.
diff --git a/beacon_node/network/src/sync/tests/lookups.rs b/beacon_node/network/src/sync/tests/lookups.rs
index 27968a06351..fc641861754 100644
--- a/beacon_node/network/src/sync/tests/lookups.rs
+++ b/beacon_node/network/src/sync/tests/lookups.rs
@@ -41,8 +41,8 @@ use slot_clock::{SlotClock, TestingSlotClock};
 use tokio::sync::mpsc;
 use tracing::info;
 use types::{
-    BeaconState, BeaconStateBase, BlobSidecar, DataColumnSidecar, EthSpec, ForkContext, ForkName,
-    Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot,
+    BeaconState, BeaconStateBase, BlobSidecar, BlockImportSource, DataColumnSidecar, EthSpec,
+    ForkContext, ForkName, Hash256, MinimalEthSpec as E, SignedBeaconBlock, Slot,
     data_column_sidecar::ColumnIndex,
     test_utils::{SeedableRng, TestRandom, XorShiftRng},
 };
@@ -1113,7 +1113,7 @@ impl TestRig {
         self.harness
             .chain
             .data_availability_checker
-            .put_pre_execution_block(block.canonical_root(), block)
+            .put_pre_execution_block(block.canonical_root(), block, BlockImportSource::Gossip)
             .unwrap();
     }
 
diff --git a/consensus/types/src/beacon_block.rs b/consensus/types/src/beacon_block.rs
index f4e4e369661..61c32dd4ac9 100644
--- a/consensus/types/src/beacon_block.rs
+++ b/consensus/types/src/beacon_block.rs
@@ -843,6 +843,7 @@ impl<'de, E: EthSpec, Payload: AbstractExecPayload<E>> ContextDeserialize<'de, F
     }
 }
 
+#[derive(Clone, Copy)]
 pub enum BlockImportSource {
     Gossip,
     Lookup,

From 421e954c291fa78e6713776d1f1ef948231005dd Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 25 Sep 2025 16:05:45 -0700
Subject: [PATCH 46/49] Revert "Revert type change in UnexpectedRequestId"

This reverts commit 6ea14016f3d164456bc4c3cae0355ab532fe1a86.
---
 .../network/src/sync/network_context/custody.rs    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/beacon_node/network/src/sync/network_context/custody.rs b/beacon_node/network/src/sync/network_context/custody.rs
index eb34aae56c9..147948a20ee 100644
--- a/beacon_node/network/src/sync/network_context/custody.rs
+++ b/beacon_node/network/src/sync/network_context/custody.rs
@@ -5,7 +5,7 @@ use beacon_chain::BeaconChainTypes;
 use beacon_chain::validator_monitor::timestamp_now;
 use fnv::FnvHashMap;
 use lighthouse_network::PeerId;
-use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester};
+use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester, Id};
 use lighthouse_tracing::SPAN_OUTGOING_CUSTODY_REQUEST;
 use parking_lot::RwLock;
 use std::collections::HashSet;
@@ -46,8 +46,8 @@ pub enum Error {
     /// There should only exist a single request at a time. Having multiple requests is a bug and
     /// can result in undefined state, so it's treated as a hard error and the lookup is dropped.
     UnexpectedRequestId {
-        expected_req_id: DataColumnsByRootRequestId,
-        req_id: DataColumnsByRootRequestId,
+        expected_req_id: Id,
+        req_id: Id,
     },
 }
 
@@ -424,8 +424,8 @@ impl<E: EthSpec> ColumnRequest<E> {
             Status::Downloading(expected_req_id) => {
                 if req_id != *expected_req_id {
                     return Err(Error::UnexpectedRequestId {
-                        expected_req_id: *expected_req_id,
-                        req_id,
+                        expected_req_id: expected_req_id.id,
+                        req_id: req_id.id,
                     });
                 }
                 self.status = Status::NotStarted(Instant::now());
@@ -457,8 +457,8 @@ impl<E: EthSpec> ColumnRequest<E> {
             Status::Downloading(expected_req_id) => {
                 if req_id != *expected_req_id {
                     return Err(Error::UnexpectedRequestId {
-                        expected_req_id: *expected_req_id,
-                        req_id,
+                        expected_req_id: expected_req_id.id,
+                        req_id: req_id.id,
                     });
                 }
                 self.status = Status::Downloaded(peer_id, data_column, seen_timestamp);

From 826a06eb632cea424853facef5edb0151a6ef4e5 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Thu, 25 Sep 2025 16:21:55 -0700
Subject: [PATCH 47/49] Fix variant name

---
 beacon_node/network/src/sync/block_sidecar_coupling.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index fd221efc99d..9caf84be20d 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -53,7 +53,7 @@ enum RangeBlockDataRequest<E: EthSpec> {
     NoData,
     Blobs(ByRangeRequest<BlobsByRangeRequestId, Vec<Arc<BlobSidecar<E>>>>),
     /// These are data columns fetched by a range request.
-    DataColumns {
+    DataColumnsFromRange {
         requests: HashMap<
             DataColumnsByRangeRequestId,
             ByRangeRequest<DataColumnsByRangeRequestId, DataColumnSidecarList<E>>,

From 5c562c6543353aea0e4c71cf496cc3f362d7d47e Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Mon, 6 Oct 2025 11:17:57 -0700
Subject: [PATCH 48/49] Fix some more issues

---
 .../network/src/sync/backfill_sync/mod.rs     |  9 ----
 .../src/sync/block_sidecar_coupling.rs        |  6 ++-
 .../network/src/sync/range_sync/chain.rs      | 43 ++++++++++---------
 3 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs
index c4bd55ff8e1..f92c666832b 100644
--- a/beacon_node/network/src/sync/backfill_sync/mod.rs
+++ b/beacon_node/network/src/sync/backfill_sync/mod.rs
@@ -558,19 +558,10 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
             }
         };
 
-        let Some(batch_peers) = batch.processing_peers() else {
-            self.fail_sync(BackFillError::BatchInvalidState(
-                batch_id,
-                String::from("Peer does not exist"),
-            ))?;
-            return Ok(ProcessResult::Successful);
-        };
-
         debug!(
             ?result,
             %batch,
             batch_epoch = %batch_id,
-            ?batch_peers,
             // client = %network.client_type(peer),
             "Backfill batch processed"
         );
diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 9caf84be20d..2981b08be79 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -594,7 +594,8 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     return Err(CouplingError::DataColumnPeerFailure {
                         error: format!("No columns for block {block_root:?} with data"),
                         faulty_peers: responsible_peers,
-                        action: PeerAction::LowToleranceError,
+                        // The block peer might be malcicious so don't downscore the column peer too bad
+                        action: PeerAction::MidToleranceError,
                         exceeded_retries,
 
                     });
@@ -619,7 +620,8 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                     return Err(CouplingError::DataColumnPeerFailure {
                         error: format!("Peers did not return column for block_root {block_root:?} {naughty_peers:?}"),
                         faulty_peers: naughty_peers,
-                        action: PeerAction::LowToleranceError,
+                        // The block peer might be malcicious so don't downscore the column peer too bad
+                        action: PeerAction::MidToleranceError,
                         exceeded_retries
                     });
                 }
diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs
index 8013a38ff62..08413120559 100644
--- a/beacon_node/network/src/sync/range_sync/chain.rs
+++ b/beacon_node/network/src/sync/range_sync/chain.rs
@@ -545,32 +545,33 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
                 penalty,
                 faulty_component,
             } => {
-                let Some(batch_peers) = batch.processing_peers() else {
+                if let Some(batch_peers) = batch.processing_peers() {
+                    // Penalize the peer appropriately.
+                    match faulty_component {
+                        Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
+                            network.report_peer(
+                                batch_peers.block_and_blob,
+                                *penalty,
+                                "faulty_batch",
+                            );
+                        }
+                        Some(FaultyComponent::Columns(faulty_columns)) => {
+                            for (peer, columns) in batch_peers.data_columns.iter() {
+                                for faulty_column in faulty_columns {
+                                    if columns.contains(faulty_column) {
+                                        network.report_peer(*peer, *penalty, "faulty_batch");
+                                    }
+                                }
+                            }
+                        }
+                        None => {}
+                    }
+                } else {
                     warn!(
                         current_state = ?batch.state(),
                         "Inconsistent state, batch must have been in processing state"
                     );
-                    return Err(RemoveChain::ChainFailed {
-                        blacklist: false,
-                        failing_batch: batch_id,
-                    });
                 };
-                // Penalize the peer appropriately.
-                match faulty_component {
-                    Some(FaultyComponent::Blocks) | Some(FaultyComponent::Blobs) => {
-                        network.report_peer(batch_peers.block_and_blob, *penalty, "faulty_batch");
-                    }
-                    Some(FaultyComponent::Columns(faulty_columns)) => {
-                        for (peer, columns) in batch_peers.data_columns.iter() {
-                            for faulty_column in faulty_columns {
-                                if columns.contains(faulty_column) {
-                                    network.report_peer(*peer, *penalty, "faulty_batch");
-                                }
-                            }
-                        }
-                    }
-                    None => {}
-                }
 
                 // Check if this batch is allowed to continue
                 match batch.processing_completed(BatchProcessingResult::FaultyFailure)? {

From 9b2de095c5b825a77953b007e2f5f58cd5ecbff8 Mon Sep 17 00:00:00 2001
From: Pawan Dhananjay <pawandhananjay@gmail.com>
Date: Mon, 6 Oct 2025 17:22:44 -0700
Subject: [PATCH 49/49] Rethink peer scoring

---
 .../src/sync/block_sidecar_coupling.rs        |  4 +--
 .../network/src/sync/network_context.rs       | 36 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs
index 2981b08be79..40bd9717a3c 100644
--- a/beacon_node/network/src/sync/block_sidecar_coupling.rs
+++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs
@@ -595,7 +595,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                         error: format!("No columns for block {block_root:?} with data"),
                         faulty_peers: responsible_peers,
                         // The block peer might be malcicious so don't downscore the column peer too bad
-                        action: PeerAction::MidToleranceError,
+                        action: PeerAction::HighToleranceError,
                         exceeded_retries,
 
                     });
@@ -621,7 +621,7 @@ impl<E: EthSpec> RangeBlockComponentsRequest<E> {
                         error: format!("Peers did not return column for block_root {block_root:?} {naughty_peers:?}"),
                         faulty_peers: naughty_peers,
                         // The block peer might be malcicious so don't downscore the column peer too bad
-                        action: PeerAction::MidToleranceError,
+                        action: PeerAction::HighToleranceError,
                         exceeded_retries
                     });
                 }
diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs
index b02dd2f850b..7870a0b4904 100644
--- a/beacon_node/network/src/sync/network_context.rs
+++ b/beacon_node/network/src/sync/network_context.rs
@@ -1854,7 +1854,41 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
         }
 
         if let Some(Err(RpcResponseError::VerifyError(e))) = &resp {
-            self.report_peer(peer_id, PeerAction::LowToleranceError, e.into());
+            warn!(?e, "Verification error on rpc response");
+            match e {
+                LookupVerifyError::NotEnoughResponsesReturned { .. } => {
+                    // This is a special case because in the case of a columns by root requests, there are 3 cases
+                    // 1. the columns peer is honest and doesn't have the columns that we requested from it
+                    // because its on a different chain.
+                    // 2. the columns peer is honest but the block peer maliciously fed us bogus blocks for which
+                    //  there are no corresponding columns.
+                    // 3. The column peer is buggy but non-malicious
+                    //
+                    // There is no way to differentiate between these 3 cases until we can verify the block
+                    // before requesting the columns.
+                    // Hence, we currently do not downscore them with a `LowToleranceError`.
+                    //
+                    // However, since majority of these errors are of type 3 currently, we downscore these errors with a
+                    // HighTolerance error to avoid getting stuck in sync with buggy peers.
+                    if method.contains("DataColumns") {
+                        self.report_peer(peer_id, PeerAction::HighToleranceError, e.into())
+                    } else {
+                        self.report_peer(peer_id, PeerAction::LowToleranceError, e.into())
+                    }
+                }
+                LookupVerifyError::UnrequestedSlot(_)
+                | LookupVerifyError::DuplicatedData(_, _)
+                | LookupVerifyError::TooManyResponses
+                | LookupVerifyError::UnrequestedBlockRoot(_)
+                | LookupVerifyError::UnrequestedIndex(_) => {
+                    // Recoverable errors, don't downscore heavily
+                    self.report_peer(peer_id, PeerAction::HighToleranceError, e.into())
+                }
+                LookupVerifyError::InternalError(_) => {} // do not downscore peer for internal errors
+                LookupVerifyError::InvalidInclusionProof => {
+                    self.report_peer(peer_id, PeerAction::LowToleranceError, e.into())
+                }
+            }
         }
         resp
     }