Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

op-proposer: retry failed output proposals #11291

Merged
merged 5 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions op-e2e/actions/l2_proposer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package actions
import (
"context"
"crypto/ecdsa"
"encoding/binary"
"math/big"
"time"

Expand Down Expand Up @@ -208,18 +209,12 @@ func toCallArg(msg ethereum.CallMsg) interface{} {

func (p *L2Proposer) fetchNextOutput(t Testing) (*eth.OutputResponse, bool, error) {
if e2eutils.UseFaultProofs() {
blockNumber, err := p.driver.FetchCurrentBlockNumber(t.Ctx())
output, err := p.driver.FetchDGFOutput(t.Ctx())
if err != nil {
return nil, false, err
}

output, _, err := p.driver.FetchOutput(t.Ctx(), blockNumber)
if err != nil {
return nil, false, err
}

encodedBlockNumber := make([]byte, 32)
copy(encodedBlockNumber[32-len(blockNumber.Bytes()):], blockNumber.Bytes())
binary.BigEndian.PutUint64(encodedBlockNumber[24:], output.BlockRef.Number)
game, err := p.disputeGameFactory.Games(&bind.CallOpts{}, p.driver.Cfg.DisputeGameType, output.OutputRoot, encodedBlockNumber)
if err != nil {
return nil, false, err
Expand All @@ -230,7 +225,7 @@ func (p *L2Proposer) fetchNextOutput(t Testing) (*eth.OutputResponse, bool, erro

return output, true, nil
} else {
return p.driver.FetchNextOutputInfo(t.Ctx())
return p.driver.FetchL2OOOutput(t.Ctx())
}
}

Expand Down
8 changes: 4 additions & 4 deletions op-proposer/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ var (
}
PollIntervalFlag = &cli.DurationFlag{
Name: "poll-interval",
Usage: "How frequently to poll L2 for new blocks",
Value: 6 * time.Second,
Usage: "How frequently to poll L2 for new blocks (legacy L2OO)",
Value: 12 * time.Second,
EnvVars: prefixEnvVars("POLL_INTERVAL"),
}
AllowNonFinalizedFlag = &cli.BoolFlag{
Expand All @@ -62,8 +62,8 @@ var (
}
OutputRetryIntervalFlag = &cli.DurationFlag{
Name: "output-retry-interval",
Usage: "Interval between retrying output fetch if one fails",
Value: time.Minute,
Usage: "Interval between retrying output fetching (DGF)",
Value: 12 * time.Second,
EnvVars: prefixEnvVars("OUTPUT_RETRY_INTERVAL"),
}
DisputeGameTypeFlag = &cli.UintFlag{
Expand Down
164 changes: 97 additions & 67 deletions op-proposer/proposer/driver.go
bitwiseguy marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"github.com/ethereum-optimism/optimism/op-proposer/metrics"
"github.com/ethereum-optimism/optimism/op-service/dial"
"github.com/ethereum-optimism/optimism/op-service/eth"
"github.com/ethereum-optimism/optimism/op-service/retry"
"github.com/ethereum-optimism/optimism/op-service/txmgr"
)

Expand Down Expand Up @@ -213,9 +212,12 @@ func (l *L2OutputSubmitter) StopL2OutputSubmitting() error {
return nil
}

// FetchNextOutputInfo gets the block number of the next proposal.
// It returns: the next block number, if the proposal should be made, error
func (l *L2OutputSubmitter) FetchNextOutputInfo(ctx context.Context) (*eth.OutputResponse, bool, error) {
// FetchL2OOOutput gets the next output proposal for the L2OO.
// It queries the L2OO for the earliest next block number that should be proposed.
// It returns the output to propose, and whether the proposal should be submitted at all.
// The passed context is expected to be a lifecycle context. A network timeout
// context will be derived from it.
func (l *L2OutputSubmitter) FetchL2OOOutput(ctx context.Context) (*eth.OutputResponse, bool, error) {
if l.l2ooContract == nil {
return nil, false, fmt.Errorf("L2OutputOracle contract not set, cannot fetch next output info")
}
Expand All @@ -226,88 +228,91 @@ func (l *L2OutputSubmitter) FetchNextOutputInfo(ctx context.Context) (*eth.Outpu
From: l.Txmgr.From(),
Context: cCtx,
}
nextCheckpointBlock, err := l.l2ooContract.NextBlockNumber(callOpts)
nextCheckpointBlockBig, err := l.l2ooContract.NextBlockNumber(callOpts)
if err != nil {
l.Log.Error("Proposer unable to get next block number", "err", err)
return nil, false, err
return nil, false, fmt.Errorf("querying next block number: %w", err)
}
nextCheckpointBlock := nextCheckpointBlockBig.Uint64()
// Fetch the current L2 heads
currentBlockNumber, err := l.FetchCurrentBlockNumber(ctx)
if err != nil {
return nil, false, err
}

// Ensure that we do not submit a block in the future
if currentBlockNumber.Cmp(nextCheckpointBlock) < 0 {
if currentBlockNumber < nextCheckpointBlock {
l.Log.Debug("Proposer submission interval has not elapsed", "currentBlockNumber", currentBlockNumber, "nextBlockNumber", nextCheckpointBlock)
return nil, false, nil
}

return l.FetchOutput(ctx, nextCheckpointBlock)
output, err := l.FetchOutput(ctx, nextCheckpointBlock)
if err != nil {
return nil, false, fmt.Errorf("fetching output: %w", err)
}

// Always propose if it's part of the Finalized L2 chain. Or if allowed, if it's part of the safe L2 chain.
if output.BlockRef.Number > output.Status.FinalizedL2.Number && (!l.Cfg.AllowNonFinalized || output.BlockRef.Number > output.Status.SafeL2.Number) {
l.Log.Debug("Not proposing yet, L2 block is not ready for proposal",
"l2_proposal", output.BlockRef,
"l2_safe", output.Status.SafeL2,
"l2_finalized", output.Status.FinalizedL2,
"allow_non_finalized", l.Cfg.AllowNonFinalized)
return output, false, nil
}
return output, true, nil
sebastianst marked this conversation as resolved.
Show resolved Hide resolved
}

// FetchDGFOutput gets the next output proposal for the DGF.
// The passed context is expected to be a lifecycle context. A network timeout
// context will be derived from it.
func (l *L2OutputSubmitter) FetchDGFOutput(ctx context.Context) (*eth.OutputResponse, error) {
ctx, cancel := context.WithTimeout(ctx, l.Cfg.NetworkTimeout)
defer cancel()

blockNum, err := l.FetchCurrentBlockNumber(ctx)
if err != nil {
return nil, err
}
return l.FetchOutput(ctx, blockNum)
}

// FetchCurrentBlockNumber gets the current block number from the [L2OutputSubmitter]'s [RollupClient]. If the `AllowNonFinalized` configuration
// option is set, it will return the safe head block number, and if not, it will return the finalized head block number.
func (l *L2OutputSubmitter) FetchCurrentBlockNumber(ctx context.Context) (*big.Int, error) {
func (l *L2OutputSubmitter) FetchCurrentBlockNumber(ctx context.Context) (uint64, error) {
rollupClient, err := l.RollupProvider.RollupClient(ctx)
if err != nil {
l.Log.Error("Proposer unable to get rollup client", "err", err)
return nil, err
return 0, fmt.Errorf("getting rollup client: %w", err)
}

cCtx, cancel := context.WithTimeout(ctx, l.Cfg.NetworkTimeout)
defer cancel()

status, err := rollupClient.SyncStatus(cCtx)
status, err := rollupClient.SyncStatus(ctx)
if err != nil {
l.Log.Error("Proposer unable to get sync status", "err", err)
return nil, err
return 0, fmt.Errorf("getting sync status: %w", err)
}

// Use either the finalized or safe head depending on the config. Finalized head is default & safer.
var currentBlockNumber *big.Int
if l.Cfg.AllowNonFinalized {
currentBlockNumber = new(big.Int).SetUint64(status.SafeL2.Number)
} else {
currentBlockNumber = new(big.Int).SetUint64(status.FinalizedL2.Number)
return status.SafeL2.Number, nil
}
return currentBlockNumber, nil
return status.FinalizedL2.Number, nil
}

func (l *L2OutputSubmitter) FetchOutput(ctx context.Context, block *big.Int) (*eth.OutputResponse, bool, error) {
func (l *L2OutputSubmitter) FetchOutput(ctx context.Context, block uint64) (*eth.OutputResponse, error) {
rollupClient, err := l.RollupProvider.RollupClient(ctx)
if err != nil {
l.Log.Error("Proposer unable to get rollup client", "err", err)
return nil, false, err
return nil, fmt.Errorf("getting rollup client: %w", err)
}

cCtx, cancel := context.WithTimeout(ctx, l.Cfg.NetworkTimeout)
defer cancel()

output, err := rollupClient.OutputAtBlock(cCtx, block.Uint64())
output, err := rollupClient.OutputAtBlock(ctx, block)
if err != nil {
l.Log.Error("Failed to fetch output at block", "block", block, "err", err)
return nil, false, err
return nil, fmt.Errorf("fetching output at block %d: %w", block, err)
}
if output.Version != supportedL2OutputVersion {
l.Log.Error("Unsupported l2 output version", "output_version", output.Version, "supported_version", supportedL2OutputVersion)
return nil, false, errors.New("unsupported l2 output version")
return nil, fmt.Errorf("unsupported l2 output version: %v, supported: %v", output.Version, supportedL2OutputVersion)
}
if output.BlockRef.Number != block.Uint64() { // sanity check, e.g. in case of bad RPC caching
l.Log.Error("Invalid blockNumber", "next_block", block, "output_block", output.BlockRef.Number)
return nil, false, errors.New("invalid blockNumber")
if onum := output.BlockRef.Number; onum != block { // sanity check, e.g. in case of bad RPC caching
return nil, fmt.Errorf("output block number %d mismatches requested %d", output.BlockRef.Number, block)
}

// Always propose if it's part of the Finalized L2 chain. Or if allowed, if it's part of the safe L2 chain.
if output.BlockRef.Number > output.Status.FinalizedL2.Number && (!l.Cfg.AllowNonFinalized || output.BlockRef.Number > output.Status.SafeL2.Number) {
l.Log.Debug("Not proposing yet, L2 block is not ready for proposal",
"l2_proposal", output.BlockRef,
"l2_safe", output.Status.SafeL2,
"l2_finalized", output.Status.FinalizedL2,
"allow_non_finalized", l.Cfg.AllowNonFinalized)
return nil, false, nil
}
return output, true, nil
return output, nil
}

// ProposeL2OutputTxData creates the transaction data for the ProposeL2Output function
Expand Down Expand Up @@ -456,19 +461,33 @@ func (l *L2OutputSubmitter) waitNodeSync() error {
return dial.WaitRollupSync(l.ctx, l.Log, rollupClient, l1head, time.Second*12)
}

// The loopL2OO regularly polls the L2OO for the next block to propose,
// and if the current finalized (or safe) block is past that next block, it
// proposes it.
func (l *L2OutputSubmitter) loopL2OO(ctx context.Context) {
defer l.Log.Info("loopL2OO returning")
ticker := time.NewTicker(l.Cfg.PollInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
output, shouldPropose, err := retry.Do2(ctx, 10, &retry.FixedStrategy{Dur: l.Cfg.OutputRetryInterval}, func() (*eth.OutputResponse, bool, error) {
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
return l.FetchNextOutputInfo(ctx)
})
if err != nil || !shouldPropose {
break
// prioritize quit signal
select {
case <-l.done:
return
default:
}

// A note on retrying: the outer ticker already runs on a short
// poll interval, which has a default value of 6 seconds. So no
// retry logic is needed around output fetching here.
output, shouldPropose, err := l.FetchL2OOOutput(ctx)
if err != nil {
l.Log.Warn("Error getting L2OO output", "err", err)
continue
} else if !shouldPropose {
// debug logging already in FetchL2OOOutput
continue
}

l.proposeOutput(ctx, output)
Expand All @@ -478,26 +497,37 @@ func (l *L2OutputSubmitter) loopL2OO(ctx context.Context) {
}
}

// The loopDGF proposes a new output every proposal interval. It does _not_ query
// the DGF for when to next propose, as the DGF doesn't have the concept of a
// proposal interval, like in the L2OO case. For this reason, it has to keep track
// of the interval itself, for which it uses an internal ticker.
func (l *L2OutputSubmitter) loopDGF(ctx context.Context) {
defer l.Log.Info("loopDGF returning")
ticker := time.NewTicker(l.Cfg.ProposalInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
blockNumber, err := retry.Do(ctx, 10, &retry.FixedStrategy{Dur: l.Cfg.OutputRetryInterval}, func() (*big.Int, error) {
return l.FetchCurrentBlockNumber(ctx)
})
if err != nil {
break
// prioritize quit signal
select {
case <-l.done:
return
default:
}

output, shouldPropose, err := retry.Do2(ctx, 10, &retry.FixedStrategy{Dur: l.Cfg.OutputRetryInterval}, func() (*eth.OutputResponse, bool, error) {
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
return l.FetchOutput(ctx, blockNumber)
})
if err != nil || !shouldPropose {
break
var (
output *eth.OutputResponse
err error
)
// A note on retrying: because the proposal interval is usually much
// larger than the interval at which to retry proposing on a failed attempt,
// we want to keep retrying getting the output proposal until we succeed.
for output == nil || err != nil {
output, err = l.FetchDGFOutput(ctx)
if err != nil {
sebastianst marked this conversation as resolved.
Show resolved Hide resolved
l.Log.Warn("Error getting DGF output, retrying...", "err", err)
time.Sleep(l.Cfg.OutputRetryInterval)
sebastianst marked this conversation as resolved.
Show resolved Hide resolved
}
}

l.proposeOutput(ctx, output)
Expand Down
Loading