diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ab7a81ef3..585af17d02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - Update `EthGetBlockByNumber` to return a pointer to ethtypes.EthBlock or nil for null rounds. ([filecoin-project/lotus#12529](https://github.com/filecoin-project/lotus/pull/12529)) - Reduce size of embedded genesis CAR files by removing WASM actor blocks and compressing with zstd. This reduces the `lotus` binary size by approximately 10 MiB. ([filecoin-project/lotus#12439](https://github.com/filecoin-project/lotus/pull/12439)) - Add ChainSafe operated Calibration archival node to the bootstrap list ([filecoin-project/lotus#12517](https://github.com/filecoin-project/lotus/pull/12517)) +- Fix hotloop in F3 pariticpation API ([filecoin-project/lotus#12575](https://github.com/filecoin-project/lotus/pull/12575)) ## Bug Fixes diff --git a/itests/f3_test.go b/itests/f3_test.go index 0a96fa4d54..727b539782 100644 --- a/itests/f3_test.go +++ b/itests/f3_test.go @@ -46,7 +46,7 @@ func TestF3_Enabled(t *testing.T) { blocktime := 100 * time.Millisecond e := setup(t, blocktime) - e.waitTillF3Instance(3, 25*time.Second) + e.waitTillF3Instance(modules.F3LeaseTerm+1, 40*time.Second) } // Test that checks that F3 can be rebootsrapped by changing the manifest diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index b37ebd52ba..63e9f70a0c 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -47,6 +47,9 @@ import ( "github.com/filecoin-project/lotus/storage/wdpost" ) +// F3LeaseTerm The number of instances the miner will attempt to lease from nodes. +const F3LeaseTerm = 5 + type UuidWrapper struct { v1api.FullNode } @@ -380,15 +383,28 @@ func newF3Participator(node v1api.FullNode, participant dtypes.MinerAddress, bac func (p *f3Participator) participate(ctx context.Context) error { for ctx.Err() == nil { - if ticket, err := p.tryGetF3ParticipationTicket(ctx); err != nil { - return err - } else if lease, participating, err := p.tryF3Participate(ctx, ticket); err != nil { + start := time.Now() + ticket, err := p.tryGetF3ParticipationTicket(ctx) + if err != nil { return err - } else if !participating { - continue - } else if err := p.awaitLeaseExpiry(ctx, lease); err != nil { + } + lease, participating, err := p.tryF3Participate(ctx, ticket) + if err != nil { return err } + if participating { + if err := p.awaitLeaseExpiry(ctx, lease); err != nil { + return err + } + } + const minPeriod = 500 * time.Millisecond + if sinceLastLoop := time.Since(start); sinceLastLoop < minPeriod { + select { + case <-time.After(minPeriod - sinceLastLoop): + case <-ctx.Done(): + return ctx.Err() + } + } log.Info("Restarting F3 participation") } return ctx.Err() @@ -449,7 +465,11 @@ func (p *f3Participator) tryF3Participate(ctx context.Context, ticket api.F3Part p.backOff(ctx) continue default: - log.Infow("Successfully acquired F3 participation lease.", "issuer", lease.Issuer, "expiry", lease.ValidityTerm) + log.Infow("Successfully acquired F3 participation lease.", + "issuer", lease.Issuer, + "not-before", lease.FromInstance, + "not-after", lease.FromInstance+lease.ValidityTerm, + ) p.previousTicket = ticket return lease, true, nil } @@ -485,8 +505,8 @@ func (p *f3Participator) awaitLeaseExpiry(ctx context.Context, lease api.F3Parti } log.Errorw("Failed to check F3 progress while awaiting lease expiry. Retrying after backoff.", "attempts", p.backoff.Attempt(), "backoff", p.backoff.Duration(), "err", err) p.backOff(ctx) - case progress.ID+2 >= lease.ValidityTerm: - log.Infof("F3 progressed (%d) to within two instances of lease expiry (%d). Restarting participation.", progress.ID, lease.ValidityTerm) + case progress.ID+2 >= lease.FromInstance+lease.ValidityTerm: + log.Infof("F3 progressed (%d) to within two instances of lease expiry (%d+%d). Restarting participation.", progress.ID, lease.FromInstance, lease.ValidityTerm) return nil default: remainingInstanceLease := lease.ValidityTerm - progress.ID @@ -529,8 +549,6 @@ func F3Participation(mctx helpers.MetricsCtx, lc fx.Lifecycle, node v1api.FullNo // checkProgressInterval defines the duration between progress checks in normal operation mode. // This interval is used when there are no errors in retrieving the current progress. checkProgressInterval = 10 * time.Second - // leaseTerm The number of instances the miner will attempt to lease from nodes. - leaseTerm = 5 ) participator := newF3Participator( @@ -543,7 +561,7 @@ func F3Participation(mctx helpers.MetricsCtx, lc fx.Lifecycle, node v1api.FullNo }, checkProgressMaxAttempts, checkProgressInterval, - leaseTerm, + F3LeaseTerm, ) ctx, cancel := context.WithCancel(mctx)