Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
a680ce1
API: Add participation key generation endpoint to algod API (#5781)
winder Oct 17, 2023
6c482ab
API: minor style improvements in keygen code. (#5786)
winder Oct 17, 2023
26facb5
AVM: Reorganize the crypto opcodes a bit to simplify incentive work (…
jannotti Oct 18, 2023
ad162dc
config: Update description for IncomingConnectionsLimit (#5789)
ohill Oct 19, 2023
51db0fa
tests: Fix flaky TestAccountSelected test (#5788)
ohill Oct 19, 2023
d4b4086
CICD: Fix RPM repository updating (#5790)
onetechnical Oct 19, 2023
40c16e5
cleanup: Use Go 1.19 atomic types (#5792)
ohill Oct 23, 2023
b18a78e
docs: Add comment to initConsensusProtocols (#5791)
winder Oct 23, 2023
29072a0
follower: update follower node error messages. (#5797)
winder Oct 23, 2023
c4e94fe
Feature: Catchup Eval Stake Exception Round Handling (#5795)
gmalouf Oct 23, 2023
19c1bf5
avm: preserve line/column for assembler warnings (#5796)
dragmz Oct 24, 2023
4b824ee
catchpoint: store certs with blocks during catchpoint restore (#5798)
ohill Oct 24, 2023
70cf5b0
tools: improve heapwatch chart drawing scripts (#5801)
algorandskiy Oct 25, 2023
1aeb841
Merge remote-tracking branch 'origin/rel/stable' into relstable3.19.0…
Algo-devops-service Oct 25, 2023
5b2349f
Bump Version, Remove buildnumber.dat and genesistimestamp.dat files.
Algo-devops-service Oct 25, 2023
8c54c22
Merge pull request #5804 from Algo-devops-service/relstable3.19.0-rem…
algojohnlee Oct 26, 2023
f5d901a
catchup: pause catchup if ledger lagging behind (#5794)
algorandskiy Oct 26, 2023
92af372
tests: add refreshAvailablePeers catchup unit test (#5815)
algorandskiy Nov 2, 2023
af2a7ee
typo: fix comment in catchup/universalFetcher (#5811)
eltociear Nov 6, 2023
5a2ef5e
ledger: support WaitWithCancel for unsuccessful WaitForBlock API call…
ohill Nov 6, 2023
c1207a4
ledger: make catchpoint generation backward compatible (#5598)
algorandskiy Nov 6, 2023
7ebb9f4
catchup: Provide more information to client when requested block not …
cce Nov 8, 2023
03efd42
catchup: fetchAndWrite/fetchRound quit early on errNoBlockForRound (#…
algorandskiy Nov 8, 2023
3605084
build: regenerate api routes and resolve merge error in catchup (#5821)
algorandskiy Nov 8, 2023
8e30dd4
txHandler: applications rate limiter (#5734)
algorandskiy Nov 9, 2023
22b7096
catchup: Dynamic parallel catchup (#5802)
winder Nov 9, 2023
1bb78de
Build: bump github.com/consensys/gnark-crypto from 0.12.0 to 0.12.1 (…
dependabot[bot] Nov 9, 2023
3d3440c
Update the Version, BuildNumber, genesistimestamp.data
Algo-devops-service Nov 9, 2023
8165f5c
Merge pull request #5826 from Algo-devops-service/relbeta3.20.0
algojohnlee Nov 9, 2023
90b10d2
algocfg: Add print option to algocfg. (#5824)
winder Nov 13, 2023
243223f
typo: fix typos in md files (#5834)
omahs Nov 15, 2023
5d047a4
ledger: rollback vetting historical stateproof blocks (#5830)
algorandskiy Nov 16, 2023
948f08b
config: move crash and stateproof DB defaults to hot dir (#5817)
cce Nov 16, 2023
c7d958b
Merge branch 'master' into relbeta3.20.0
Algo-devops-service Nov 16, 2023
fd6e88c
Merge pull request #5835 from Algo-devops-service/relbeta3.20.0
algojohnlee Nov 16, 2023
7040500
catchup: use specialized backoff behavior for follower mode (#5836)
cce Nov 16, 2023
ff0ee44
statetrie: nibbles (#5759)
bbroder-uji Nov 16, 2023
03e1fd2
Merge branch 'master' into relbeta3.20.0
Algo-devops-service Nov 16, 2023
07e0a1b
catchup: increase followLatestBackoff to 100ms (#5838)
cce Nov 16, 2023
d22419d
Merge branch 'master' into relbeta3.20.0
onetechnical Nov 17, 2023
c724cd3
Merge pull request #5837 from Algo-devops-service/relbeta3.20.0
algojohnlee Nov 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Please refer to our [CONTRIBUTING](CONTRIBUTING.md) document.

## Project Layout

`go-algorand` is split into various subsystems containing varius packages.
`go-algorand` is split into various subsystems containing various packages.

### Core

Expand Down
31 changes: 17 additions & 14 deletions catchup/catchpointService.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/algorand/go-deadlock"

"github.com/algorand/go-algorand/agreement"
"github.com/algorand/go-algorand/config"
"github.com/algorand/go-algorand/data/basics"
"github.com/algorand/go-algorand/data/bookkeeping"
Expand Down Expand Up @@ -370,6 +371,7 @@ func (cs *CatchpointCatchupService) processStageLatestBlockDownload() (err error

attemptsCount := 0
var blk *bookkeeping.Block
var cert *agreement.Certificate
// check to see if the current ledger might have this block. If so, we should try this first instead of downloading anything.
if ledgerBlock, err := cs.ledger.Block(blockRound); err == nil {
blk = &ledgerBlock
Expand All @@ -384,7 +386,7 @@ func (cs *CatchpointCatchupService) processStageLatestBlockDownload() (err error
blockDownloadDuration := time.Duration(0)
if blk == nil {
var stop bool
blk, blockDownloadDuration, psp, stop, err = cs.fetchBlock(blockRound, uint64(attemptsCount))
blk, cert, blockDownloadDuration, psp, stop, err = cs.fetchBlock(blockRound, uint64(attemptsCount))
if stop {
return err
} else if blk == nil {
Expand Down Expand Up @@ -462,7 +464,7 @@ func (cs *CatchpointCatchupService) processStageLatestBlockDownload() (err error
return cs.abort(fmt.Errorf("processStageLatestBlockDownload failed when calling StoreBalancesRound : %v", err))
}

err = cs.ledgerAccessor.StoreFirstBlock(cs.ctx, blk)
err = cs.ledgerAccessor.StoreFirstBlock(cs.ctx, blk, cert)
if err != nil {
if attemptsCount <= cs.config.CatchupBlockDownloadRetryAttempts {
// try again.
Expand Down Expand Up @@ -542,6 +544,7 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
prevBlock := &topBlock
blocksFetched := uint64(1) // we already got the first block in the previous step.
var blk *bookkeeping.Block
var cert *agreement.Certificate
for retryCount := uint64(1); blocksFetched <= lookback; {
if err := cs.ctx.Err(); err != nil {
return cs.stopOrAbort()
Expand All @@ -564,7 +567,7 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
blockDownloadDuration := time.Duration(0)
if blk == nil {
var stop bool
blk, blockDownloadDuration, psp, stop, err = cs.fetchBlock(topBlock.Round()-basics.Round(blocksFetched), retryCount)
blk, cert, blockDownloadDuration, psp, stop, err = cs.fetchBlock(topBlock.Round()-basics.Round(blocksFetched), retryCount)
if stop {
return err
} else if blk == nil {
Expand Down Expand Up @@ -624,7 +627,7 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
}

// all good, persist and move on.
err = cs.ledgerAccessor.StoreBlock(cs.ctx, blk)
err = cs.ledgerAccessor.StoreBlock(cs.ctx, blk, cert)
if err != nil {
cs.log.Warnf("processStageBlocksDownload failed to store downloaded staging block for round %d", blk.Round())
cs.updateBlockRetrievalStatistics(-1, -1)
Expand All @@ -649,17 +652,17 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
// fetchBlock uses the internal peer selector blocksDownloadPeerSelector to pick a peer and then attempt to fetch the block requested from that peer.
// The method return stop=true if the caller should exit the current operation
// If the method return a nil block, the caller is expected to retry the operation, increasing the retry counter as needed.
func (cs *CatchpointCatchupService) fetchBlock(round basics.Round, retryCount uint64) (blk *bookkeeping.Block, downloadDuration time.Duration, psp *peerSelectorPeer, stop bool, err error) {
func (cs *CatchpointCatchupService) fetchBlock(round basics.Round, retryCount uint64) (blk *bookkeeping.Block, cert *agreement.Certificate, downloadDuration time.Duration, psp *peerSelectorPeer, stop bool, err error) {
psp, err = cs.blocksDownloadPeerSelector.getNextPeer()
if err != nil {
if err == errPeerSelectorNoPeerPoolsAvailable {
cs.log.Infof("fetchBlock: unable to obtain a list of peers to retrieve the latest block from; will retry shortly.")
// this is a possible on startup, since the network package might have yet to retrieve the list of peers.
time.Sleep(noPeersAvailableSleepInterval)
return nil, time.Duration(0), psp, false, nil
return nil, nil, time.Duration(0), psp, false, nil
}
err = fmt.Errorf("fetchBlock: unable to obtain a list of peers to retrieve the latest block from : %w", err)
return nil, time.Duration(0), psp, true, cs.abort(err)
return nil, nil, time.Duration(0), psp, true, cs.abort(err)
}
peer := psp.Peer

Expand All @@ -669,26 +672,26 @@ func (cs *CatchpointCatchupService) fetchBlock(round basics.Round, retryCount ui
cs.blocksDownloadPeerSelector.rankPeer(psp, peerRankInvalidDownload)
if retryCount <= uint64(cs.config.CatchupBlockDownloadRetryAttempts) {
// try again.
return nil, time.Duration(0), psp, false, nil
return nil, nil, time.Duration(0), psp, false, nil
}
return nil, time.Duration(0), psp, true, cs.abort(fmt.Errorf("fetchBlock: recurring non-HTTP peer was provided by the peer selector"))
return nil, nil, time.Duration(0), psp, true, cs.abort(fmt.Errorf("fetchBlock: recurring non-HTTP peer was provided by the peer selector"))
}
fetcher := makeUniversalBlockFetcher(cs.log, cs.net, cs.config)
blk, _, downloadDuration, err = fetcher.fetchBlock(cs.ctx, round, httpPeer)
blk, cert, downloadDuration, err = fetcher.fetchBlock(cs.ctx, round, httpPeer)
if err != nil {
if cs.ctx.Err() != nil {
return nil, time.Duration(0), psp, true, cs.stopOrAbort()
return nil, nil, time.Duration(0), psp, true, cs.stopOrAbort()
}
if retryCount <= uint64(cs.config.CatchupBlockDownloadRetryAttempts) {
// try again.
cs.log.Infof("Failed to download block %d on attempt %d out of %d. %v", round, retryCount, cs.config.CatchupBlockDownloadRetryAttempts, err)
cs.blocksDownloadPeerSelector.rankPeer(psp, peerRankDownloadFailed)
return nil, time.Duration(0), psp, false, nil
return nil, nil, time.Duration(0), psp, false, nil
}
return nil, time.Duration(0), psp, true, cs.abort(fmt.Errorf("fetchBlock failed after multiple blocks download attempts"))
return nil, nil, time.Duration(0), psp, true, cs.abort(fmt.Errorf("fetchBlock failed after multiple blocks download attempts"))
}
// success
return blk, downloadDuration, psp, false, nil
return blk, cert, downloadDuration, psp, false, nil
}

// processStageLedgerDownload is the fifth catchpoint catchup stage. It completes the catchup process, swap the new tables and restart the node functionality.
Expand Down
50 changes: 31 additions & 19 deletions catchup/peerSelector.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ const (
peerRank4LowBlockTime = 801
peerRank4HighBlockTime = 999

// peerRankNoBlockForRound is used for responses failed because of no block for round
// This indicates a peer is either behind or a block has not happened yet, or does not have a block that is old enough.
peerRankNoBlockForRound = 2000

// peerRankDownloadFailed is used for responses which could be temporary, such as missing files, or such that we don't
// have clear resolution
peerRankDownloadFailed = 10000
Expand Down Expand Up @@ -110,11 +114,13 @@ type peerPool struct {
// client to provide feedback regarding the peer's performance, and to have the subsequent
// query(s) take advantage of that intel.
type peerSelector struct {
mu deadlock.Mutex
net peersRetriever
mu deadlock.Mutex
net peersRetriever
// peerClasses is the list of peer classes we want to have in the peerSelector.
peerClasses []peerClass
pools []peerPool
counter uint64
// pools is the list of peer pools, each pool contains a list of peers with the same rank.
pools []peerPool
counter uint64
}

// historicStats stores the past windowSize ranks for the peer passed
Expand All @@ -141,7 +147,7 @@ func makeHistoricStatus(windowSize int, class peerClass) *historicStats {
// that will determine the rank of the peer.
hs := historicStats{
windowSize: windowSize,
rankSamples: make([]int, windowSize, windowSize),
rankSamples: make([]int, windowSize),
requestGaps: make([]uint64, 0, windowSize),
rankSum: uint64(class.initialRank) * uint64(windowSize),
gapSum: 0.0}
Expand Down Expand Up @@ -227,18 +233,24 @@ func (hs *historicStats) push(value int, counter uint64, class peerClass) (avera

// Download may fail for various reasons. Give it additional tries
// and see if it recovers/improves.
if value == peerRankDownloadFailed {
factor := float64(1.0)
switch value {
// - Set the rank to the class upper bound multiplied
// by the number of downloadFailures.
// - Each downloadFailure increments the counter, and
// each non-failure decrements it, until it gets to 0.
// - When the peer is consistently failing to
// download, the value added to rankSum will
// increase at an increasing rate to evict the peer
// from the class sooner.
case peerRankNoBlockForRound:
// for the no block errors apply very smooth rank increase
factor = 0.1
fallthrough
case peerRankDownloadFailed:
hs.downloadFailures++
// - Set the rank to the class upper bound multiplied
// by the number of downloadFailures.
// - Each downloadFailure increments the counter, and
// each non-failure decrements it, until it gets to 0.
// - When the peer is consistently failing to
// download, the value added to rankSum will
// increase at an increasing rate to evict the peer
// from the class sooner.
value = upperBound(class) * int(math.Exp2(float64(hs.downloadFailures)))
} else {
value = upperBound(class) * int(math.Exp2(float64(hs.downloadFailures)*factor))
default:
if hs.downloadFailures > 0 {
hs.downloadFailures--
}
Expand All @@ -250,12 +262,12 @@ func (hs *historicStats) push(value int, counter uint64, class peerClass) (avera
// The average performance of the peer
average := float64(hs.rankSum) / float64(len(hs.rankSamples))

if int(average) > upperBound(class) && initialRank == peerRankDownloadFailed {
if int(average) > upperBound(class) && (initialRank == peerRankDownloadFailed || initialRank == peerRankNoBlockForRound) {
// peerRankDownloadFailed will be delayed, to give the peer
// additional time to improve. If does not improve over time,
// the average will exceed the class limit. At this point,
// it will be pushed down to download failed class.
return peerRankDownloadFailed
return initialRank
}

// A penalty is added relative to how freequently the peer is used
Expand Down Expand Up @@ -468,7 +480,7 @@ func (ps *peerSelector) refreshAvailablePeers() {
for peerIdx := len(pool.peers) - 1; peerIdx >= 0; peerIdx-- {
peer := pool.peers[peerIdx].peer
if peerAddress := peerAddress(peer); peerAddress != "" {
if toRemove, _ := existingPeers[pool.peers[peerIdx].class.peerClass][peerAddress]; toRemove {
if toRemove := existingPeers[pool.peers[peerIdx].class.peerClass][peerAddress]; toRemove {
// need to be removed.
pool.peers = append(pool.peers[:peerIdx], pool.peers[peerIdx+1:]...)
}
Expand Down
Loading