Skip to content

Commit d71ea30

Browse files
agnxshtersec
andauthored
reconstruction timeout + vcus detection on no column sidecars in quarantine (#7610)
* reconstruction timeout + vcus detection on no column sidecars in quarantine * reworked some logging * Update beacon_chain/spec/peerdas_helpers.nim * Update beacon_chain/spec/peerdas_helpers.nim --------- Co-authored-by: tersec <[email protected]>
1 parent ebeb7e6 commit d71ea30

File tree

3 files changed

+34
-15
lines changed

3 files changed

+34
-15
lines changed

beacon_chain/gossip_processing/block_processor.nim

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,11 @@ proc verifySidecars(
222222

223223
proc storeSidecars(self: BlockProcessor, sidecarsOpt: Opt[BlobSidecars]) =
224224
if sidecarsOpt.isSome():
225-
debug "Inserting blobs into database", blobs = sidecarsOpt[].len
226225
for b in sidecarsOpt[]:
227226
self.consensusManager.dag.db.putBlobSidecar(b[])
228227

229228
proc storeSidecars(self: BlockProcessor, sidecarsOpt: Opt[DataColumnSidecars]) =
230229
if sidecarsOpt.isSome():
231-
debug "Inserting columns into database", columns = sidecarsOpt[].len
232230
for c in sidecarsOpt[]:
233231
self.consensusManager.dag.db.putDataColumnSidecar(c[])
234232

beacon_chain/nimbus_beacon_node.nim

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import
1111
system/ansi_c,
12-
std/[os, random, terminal, times],
12+
std/[os, random, strutils, terminal, times],
1313
chronos, chronicles,
1414
metrics, metrics/chronos_httpserver,
1515
stew/[byteutils, io2],
@@ -1726,7 +1726,7 @@ proc reconstructDataColumns(node: BeaconNode, slot: Slot) =
17261726
if node.dag.db.getDataColumnSidecar(forkyBlck.root, i, colData):
17271727
columns.add(newClone(colData))
17281728
indices.incl(i)
1729-
debug "Stored data columns", columns = indices.len
1729+
debug "PeerDAS: Data columns before reconstruction", columns = indices.len
17301730

17311731
# Make sure the node has obtained 50%+ of all the columns
17321732
if columns.lenu64 < (maxColCount div 2):
@@ -1742,7 +1742,7 @@ proc reconstructDataColumns(node: BeaconNode, slot: Slot) =
17421742
# Reconstruct columns
17431743
let recovered = recover_cells_and_proofs_parallel(
17441744
node.batchVerifier[].taskpool, columns).valueOr:
1745-
error "Error in data column reconstruction"
1745+
error "Data column reconstruction incomplete"
17461746
return
17471747
let rowCount = recovered.len
17481748
var reconCounter = 0
@@ -1975,7 +1975,7 @@ proc onSlotEnd(node: BeaconNode, slot: Slot) {.async.} =
19751975

19761976
if (not node.config.peerdasSupernode) and
19771977
(slot.epoch() + 1).start_slot() - slot == 1 and
1978-
node.quarantine.sidecarless.len == 0 and
1978+
node.dataColumnQuarantine[].len == 0 and
19791979
node.attachedValidatorBalanceTotal > 0.Gwei:
19801980
# Detect new validator custody at the last slot of every epoch
19811981
node.validatorCustody.detectNewValidatorCustody(slot,

beacon_chain/spec/peerdas_helpers.nim

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# Uncategorized helper functions from the spec
1111
import
12-
chronicles, results, taskpools,
12+
chronos, chronicles, results, taskpools,
1313
eth/p2p/discoveryv5/node,
1414
kzg4844/kzg,
1515
ssz_serialization/[
@@ -163,30 +163,51 @@ proc recover_cells_and_proofs_parallel*(
163163

164164
for column in dataColumns:
165165
if not (blobCount == column.column.len):
166-
return err ("DataColumns do not have the same length")
166+
return err("DataColumns do not have the same length")
167167

168-
# spawn threads for recovery
169168
var
170-
pendingFuts = newSeq[Flowvar[Result[CellsAndProofs, void]]](blobCount)
169+
pendingFuts: seq[Flowvar[Result[CellsAndProofs, void]]]
171170
res = newSeq[CellsAndProofs](blobCount)
172-
for blobIdx in 0..<blobCount:
171+
172+
let startTime = Moment.now()
173+
const reconstructionTimeout = 2.seconds
174+
175+
# ---- Spawn phase with time limit ----
176+
for blobIdx in 0 ..< blobCount:
177+
let now = Moment.now()
178+
if (now - startTime) > reconstructionTimeout:
179+
debug "PeerDAS reconstruction timed out while preparing columns",
180+
spawned = pendingFuts.len, total = blobCount
181+
break # Stop spawning new tasks
182+
173183
var
174184
cellIndices = newSeq[CellIndex](columnCount)
175185
cells = newSeq[Cell](columnCount)
176186
for i in 0 ..< dataColumns.len:
177187
cellIndices[i] = dataColumns[i][].index
178188
cells[i] = dataColumns[i][].column[blobIdx]
179-
pendingFuts[blobIdx] =
180-
tp.spawn recoverCellsAndKzgProofsTask(cellIndices, cells)
189+
pendingFuts.add(tp.spawn recoverCellsAndKzgProofsTask(cellIndices, cells))
190+
191+
# ---- Sync phase ----
192+
for i in 0 ..< pendingFuts.len:
193+
let now = Moment.now()
194+
if (now - startTime) > reconstructionTimeout:
195+
debug "PeerDAS reconstruction timed out",
196+
completed = i, totalSpawned = pendingFuts.len
197+
return err("Data column reconstruction timed out")
181198

182-
# sync threads
183-
for i in 0..<blobCount:
184199
let futRes = sync pendingFuts[i]
185200
if futRes.isErr:
186201
return err("KZG cells and proofs recovery failed")
202+
187203
res[i] = futRes.get
204+
205+
if pendingFuts.len < blobCount:
206+
return err("Data column reconstruction timed out")
207+
188208
ok(res)
189209

210+
190211
proc assemble_data_column_sidecars*(
191212
signed_beacon_block: fulu.SignedBeaconBlock | gloas.SignedBeaconBlock,
192213
blobs: seq[KzgBlob], cell_proofs: seq[KzgProof]): seq[fulu.DataColumnSidecar] =

0 commit comments

Comments
 (0)