Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(f3): fix hot loop in F3 participation #12575

Merged
merged 4 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Update `EthGetBlockByNumber` to return a pointer to ethtypes.EthBlock or nil for null rounds. ([filecoin-project/lotus#12529](https://github.com/filecoin-project/lotus/pull/12529))
- Reduce size of embedded genesis CAR files by removing WASM actor blocks and compressing with zstd. This reduces the `lotus` binary size by approximately 10 MiB. ([filecoin-project/lotus#12439](https://github.com/filecoin-project/lotus/pull/12439))
- Add ChainSafe operated Calibration archival node to the bootstrap list ([filecoin-project/lotus#12517](https://github.com/filecoin-project/lotus/pull/12517))
- Fix hotloop in F3 pariticpation API ([filecoin-project/lotus#12575](https://github.com/filecoin-project/lotus/pull/12575))

## Bug Fixes

Expand Down
2 changes: 1 addition & 1 deletion itests/f3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func TestF3_Enabled(t *testing.T) {
blocktime := 100 * time.Millisecond
e := setup(t, blocktime)

e.waitTillF3Instance(3, 25*time.Second)
e.waitTillF3Instance(modules.F3LeaseTerm+1, 40*time.Second)
}

// Test that checks that F3 can be rebootsrapped by changing the manifest
Expand Down
42 changes: 30 additions & 12 deletions node/modules/storageminer.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ import (
"github.com/filecoin-project/lotus/storage/wdpost"
)

// F3LeaseTerm The number of instances the miner will attempt to lease from nodes.
const F3LeaseTerm = 5

type UuidWrapper struct {
v1api.FullNode
}
Expand Down Expand Up @@ -380,15 +383,28 @@ func newF3Participator(node v1api.FullNode, participant dtypes.MinerAddress, bac

func (p *f3Participator) participate(ctx context.Context) error {
for ctx.Err() == nil {
if ticket, err := p.tryGetF3ParticipationTicket(ctx); err != nil {
return err
} else if lease, participating, err := p.tryF3Participate(ctx, ticket); err != nil {
start := time.Now()
ticket, err := p.tryGetF3ParticipationTicket(ctx)
if err != nil {
return err
} else if !participating {
continue
} else if err := p.awaitLeaseExpiry(ctx, lease); err != nil {
}
lease, participating, err := p.tryF3Participate(ctx, ticket)
if err != nil {
return err
}
if participating {
if err := p.awaitLeaseExpiry(ctx, lease); err != nil {
return err
}
}
const minPeriod = 500 * time.Millisecond
if sinceLastLoop := time.Since(start); sinceLastLoop < minPeriod {
select {
case <-time.After(minPeriod - sinceLastLoop):
case <-ctx.Done():
Stebalien marked this conversation as resolved.
Show resolved Hide resolved
return ctx.Err()
}
}
log.Info("Restarting F3 participation")
}
return ctx.Err()
Expand Down Expand Up @@ -449,7 +465,11 @@ func (p *f3Participator) tryF3Participate(ctx context.Context, ticket api.F3Part
p.backOff(ctx)
continue
default:
log.Infow("Successfully acquired F3 participation lease.", "issuer", lease.Issuer, "expiry", lease.ValidityTerm)
log.Infow("Successfully acquired F3 participation lease.",
"issuer", lease.Issuer,
"not-before", lease.FromInstance,
"not-after", lease.FromInstance+lease.ValidityTerm,
)
p.previousTicket = ticket
return lease, true, nil
}
Expand Down Expand Up @@ -485,8 +505,8 @@ func (p *f3Participator) awaitLeaseExpiry(ctx context.Context, lease api.F3Parti
}
log.Errorw("Failed to check F3 progress while awaiting lease expiry. Retrying after backoff.", "attempts", p.backoff.Attempt(), "backoff", p.backoff.Duration(), "err", err)
p.backOff(ctx)
case progress.ID+2 >= lease.ValidityTerm:
log.Infof("F3 progressed (%d) to within two instances of lease expiry (%d). Restarting participation.", progress.ID, lease.ValidityTerm)
case progress.ID+2 >= lease.FromInstance+lease.ValidityTerm:
log.Infof("F3 progressed (%d) to within two instances of lease expiry (%d+%d). Restarting participation.", progress.ID, lease.FromInstance, lease.ValidityTerm)
return nil
default:
remainingInstanceLease := lease.ValidityTerm - progress.ID
Expand Down Expand Up @@ -529,8 +549,6 @@ func F3Participation(mctx helpers.MetricsCtx, lc fx.Lifecycle, node v1api.FullNo
// checkProgressInterval defines the duration between progress checks in normal operation mode.
// This interval is used when there are no errors in retrieving the current progress.
checkProgressInterval = 10 * time.Second
// leaseTerm The number of instances the miner will attempt to lease from nodes.
leaseTerm = 5
)

participator := newF3Participator(
Expand All @@ -543,7 +561,7 @@ func F3Participation(mctx helpers.MetricsCtx, lc fx.Lifecycle, node v1api.FullNo
},
checkProgressMaxAttempts,
checkProgressInterval,
leaseTerm,
F3LeaseTerm,
)

ctx, cancel := context.WithCancel(mctx)
Expand Down
Loading