-
Notifications
You must be signed in to change notification settings - Fork 3.9k
op-node/recover-mode: handle l1 origin close to tip gracefully #18556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
543df5b
ea66bee
dbaa3ab
7333916
acc6c33
e1bfc22
0aa6adc
9e95a84
90abe3d
16f5198
dd9492e
418a985
3b22b34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| package sequencer | ||
|
|
||
| import ( | ||
| "log/slog" | ||
| "testing" | ||
|
|
||
| "github.com/ethereum-optimism/optimism/op-devstack/compat" | ||
| "github.com/ethereum-optimism/optimism/op-devstack/presets" | ||
| ) | ||
|
|
||
| // TestMain creates the test-setups against the shared backend | ||
| func TestMain(m *testing.M) { | ||
| presets.DoMain(m, presets.WithMinimal(), | ||
| presets.WithCompatibleTypes(compat.SysGo), | ||
| presets.WithLogLevel(slog.LevelDebug), | ||
| ) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| package sequencer | ||
|
|
||
| import ( | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/ethereum-optimism/optimism/op-devstack/devtest" | ||
| "github.com/ethereum-optimism/optimism/op-devstack/presets" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| // TestRecoverModeWhenChainHealthy checks that the chain | ||
| // can progress as normal when recover mode is activated. | ||
| // Recover mode is designed to recover from a sequencing | ||
| // window expiry when there are ample L1 blocks to eagerly | ||
| // progress the l1 origin to. But when the l1 origin is | ||
| // close to the tip of the l1 chain, the eagerness would cause | ||
| // a delay in unsafe block production while the sequencer waits | ||
| // for the next l1 origin to become available. Recover mode | ||
| // has since been patched, and the sequencer will not demand the | ||
| // next l1 origin until it is actually available. This tests | ||
| // protects against a regeression in that behavior. | ||
| func TestRecoverModeWhenChainHealthy(gt *testing.T) { | ||
| t := devtest.ParallelT(gt) | ||
| sys := presets.NewMinimal(t) | ||
| tracer := t.Tracer() | ||
| ctx := t.Ctx() | ||
|
|
||
| sys.L2CL.SetSequencerRecoverMode(true) | ||
| blockTime := sys.L2Chain.Escape().RollupConfig().BlockTime | ||
| numL2Blocks := uint64(20) | ||
| waitTime := time.Duration(blockTime*numL2Blocks+5) * time.Second | ||
|
|
||
| num := sys.L2CL.SyncStatus().UnsafeL2.Number | ||
| new_num := num | ||
| require.Eventually(t, func() bool { | ||
| ctx, span := tracer.Start(ctx, "check head") | ||
| defer span.End() | ||
|
|
||
| new_num, num = sys.L2CL.SyncStatus().UnsafeL2.Number, new_num | ||
| t.Logger().InfoContext(ctx, "unsafe head", "number", new_num, "safe head", sys.L2CL.SyncStatus().SafeL2.Number) | ||
| return new_num >= numL2Blocks | ||
| }, waitTime, time.Duration(blockTime)*time.Second) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,26 +5,46 @@ import ( | |
|
|
||
| actionsHelpers "github.com/ethereum-optimism/optimism/op-e2e/actions/helpers" | ||
| "github.com/ethereum-optimism/optimism/op-e2e/actions/proofs/helpers" | ||
| "github.com/ethereum-optimism/optimism/op-e2e/e2eutils" | ||
| "github.com/ethereum-optimism/optimism/op-program/client/claim" | ||
| "github.com/ethereum/go-ethereum/common" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| // Run a test that proves a deposit-only block generated due to sequence window expiry. | ||
| // Run a test that proves a deposit-only block generated due to sequence window expiry, | ||
| // and then recovers the chain using sequencer recover mode. | ||
| func runSequenceWindowExpireTest(gt *testing.T, testCfg *helpers.TestCfg[any]) { | ||
| t := actionsHelpers.NewDefaultTesting(gt) | ||
| tp := helpers.NewTestParams() | ||
| env := helpers.NewL2FaultProofEnv(t, testCfg, tp, helpers.NewBatcherCfg()) | ||
| const SEQUENCER_WINDOW_SIZE = 50 // (short, to keep test fast) | ||
| tp := helpers.NewTestParams(func(p *e2eutils.TestParams) { | ||
| p.SequencerWindowSize = SEQUENCER_WINDOW_SIZE | ||
| }) | ||
|
|
||
| // It seems more difficult (almost impossible) to recover from sequencing window expiry with span batches, | ||
| // since the singular batches within are invalidated _atomically_. | ||
| // That is to say, if the oldest batch in the span batch fails the sequencing window check | ||
| // (l1 origin + seq window < l1 inclusion) | ||
| // All following batches are invalidated / dropped as well. | ||
| // https://github.com/ethereum-optimism/optimism/blob/73339162d78a1ebf2daadab01736382eed6f4527/op-node/rollup/derive/batches.go#L96-L100 | ||
| // | ||
| // If the same blocks were batched with singular batches, the validation rules are different | ||
| // https://github.com/ethereum-optimism/optimism/blob/73339162d78a1ebf2daadab01736382eed6f4527/op-node/rollup/derive/batches.go#L83-L86 | ||
| // In the case of recover mode, the noTxPool=true condition means autoderviation actually fills | ||
| // the gap with identical blocks anyway, meaning the following batches are actually still valid. | ||
| bc := helpers.NewBatcherCfg() | ||
| bc.ForceSubmitSingularBatch = true | ||
|
|
||
| // Mine an empty block for gas estimation purposes. | ||
| env := helpers.NewL2FaultProofEnv(t, testCfg, tp, bc) | ||
|
|
||
| // Mine an empty L1 block for gas estimation purposes. | ||
| env.Miner.ActEmptyBlock(t) | ||
|
|
||
| // Expire the sequence window by building `SequenceWindow + 1` empty blocks on L1. | ||
| for i := 0; i < int(tp.SequencerWindowSize)+1; i++ { | ||
| env.Alice.L1.ActResetTxOpts(t) | ||
| env.Alice.ActDeposit(t) | ||
|
|
||
| env.Miner.ActL1StartBlock(12)(t) | ||
| env.Miner.ActL1StartBlock(tp.L1BlockTime)(t) | ||
| env.Miner.ActL1IncludeTx(env.Alice.Address())(t) | ||
| env.Miner.ActL1EndBlock(t) | ||
|
|
||
|
|
@@ -44,8 +64,104 @@ func runSequenceWindowExpireTest(gt *testing.T, testCfg *helpers.TestCfg[any]) { | |
| l2SafeHead = env.Engine.L2Chain().CurrentSafeBlock() | ||
| require.Greater(t, l2SafeHead.Number.Uint64(), uint64(0)) | ||
|
|
||
| // Run the FPP on one of the auto-derived blocks. | ||
| env.RunFaultProofProgram(t, l2SafeHead.Number.Uint64()/2, testCfg.CheckResult, testCfg.InputParams...) | ||
|
|
||
| // Set recover mode on the sequencer: | ||
| env.Sequencer.ActSetRecoverMode(t, true) | ||
|
Comment on lines
+69
to
+70
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I manually confirmed that the test fails without recover mode enabled. |
||
| // Since recover mode only affects the L2 CL (op-node), | ||
| // it won't stop the test environment injecting transactions | ||
| // directly into the engine. So we will force the engine | ||
| // to ignore such injections if recover mode is enabled. | ||
| env.Engine.EngineApi.SetForceEmpty(true) | ||
|
|
||
| // Define "lag" as the difference between the current L1 block number and the safe L2 block's L1 origin number. | ||
| computeLag := func() int { | ||
| ss := env.Sequencer.SyncStatus() | ||
| return int(ss.CurrentL1.Number - ss.SafeL2.L1Origin.Number) | ||
| } | ||
|
|
||
| // Define "drift" as the difference between the current L2 block's timestamp and the unsafe L2 block's L1 origin's timestamp. | ||
| computeDrift := func() int { | ||
| ss := env.Sequencer.SyncStatus() | ||
| l2header, err := env.Engine.EthClient().HeaderByHash(t.Ctx(), ss.UnsafeL2.Hash) | ||
| require.NoError(t, err) | ||
| l1header, err := env.Miner.EthClient().HeaderByHash(t.Ctx(), ss.UnsafeL2.L1Origin.Hash) | ||
| require.NoError(t, err) | ||
| t.Log("l2header.Time", l2header.Time) | ||
| t.Log("l1header.Time", l1header.Time) | ||
| return int(l2header.Time) - int(l1header.Time) | ||
| } | ||
|
|
||
| // Build both chains and assert the L1 origin catches back up with the tip of the L1 chain. | ||
| lag := computeLag() | ||
| t.Log("lag", lag) | ||
| drift := computeDrift() | ||
| t.Log("drift", drift) | ||
| require.GreaterOrEqual(t, uint64(lag), tp.SequencerWindowSize, "Lag is less than sequencing window size") | ||
| numL1Blocks := 0 | ||
| timeout := tp.SequencerWindowSize * 50 | ||
|
|
||
| for numL1Blocks < int(timeout) { | ||
| for range 100 * tp.L1BlockTime / env.Sd.RollupCfg.BlockTime { // go at 100x real time | ||
| err := env.Sequencer.ActMaybeL2StartBlock(t) | ||
| if err != nil { | ||
| break | ||
| } | ||
| env.Bob.L2.ActResetTxOpts(t) | ||
| env.Bob.L2.ActMakeTx(t) | ||
| env.Engine.ActL2IncludeTx(env.Bob.Address())(t) | ||
| // RecoverMode (enabled above) should prevent this | ||
| // transaction from being included in the block, which | ||
| // is critical for recover mode to work. | ||
| env.Sequencer.ActL2EndBlock(t) | ||
| drift = computeDrift() | ||
| t.Log("drift", drift) | ||
| } | ||
| env.BatchMineAndSync(t) // Mines 1 block on L1 | ||
| numL1Blocks++ | ||
| lag = computeLag() | ||
| t.Log("lag", lag) | ||
| drift = computeDrift() | ||
| t.Log("drift", drift) | ||
| if lag == 1 { // A lag of 1 is the minimum possible. | ||
| break | ||
| } | ||
| } | ||
|
|
||
| if uint64(numL1Blocks) >= timeout { | ||
| t.Fatal("L1 Origin did not catch up to tip within %d L1 blocks (lag is %d)", numL1Blocks, lag) | ||
| } else { | ||
| t.Logf("L1 Origin caught up to within %d blocks of the tip within %d L1 blocks (sequencing window size %d)", | ||
| lag, numL1Blocks, tp.SequencerWindowSize) | ||
| } | ||
|
|
||
| switch { | ||
| case drift == 0: | ||
| t.Fatal("drift is zero, this implies the unsafe l2 head is pinned to the l1 head") | ||
| case drift > int(tp.MaxSequencerDrift): | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that |
||
| t.Fatal("drift is too high") | ||
| default: | ||
| t.Log("drift", drift) | ||
| } | ||
|
|
||
| // Disable recover mode so we can get some user transactions in again. | ||
| env.Sequencer.ActSetRecoverMode(t, false) | ||
| env.Engine.EngineApi.SetForceEmpty(false) | ||
| l2SafeBefore := env.Sequencer.L2Safe() | ||
| env.Sequencer.ActL2StartBlock(t) | ||
| env.Bob.L2.ActResetTxOpts(t) | ||
| env.Bob.L2.ActMakeTx(t) | ||
| env.Engine.ActL2IncludeTx(env.Bob.Address())(t) | ||
| env.Sequencer.ActL2EndBlock(t) | ||
| env.BatchMineAndSync(t) | ||
| l2Safe := env.Sequencer.L2Safe() | ||
| require.Equal(t, l2Safe.Number, l2SafeBefore.Number+1, "safe chain did not progress with user transactions") | ||
| l2SafeBlock, err := env.Engine.EthClient().BlockByHash(t.Ctx(), l2Safe.Hash) | ||
| require.NoError(t, err) | ||
| // Assert safe block has at least two transactions | ||
| require.GreaterOrEqual(t, len(l2SafeBlock.Transactions()), 2, "safe block did not have at least two transactions") | ||
|
|
||
| env.RunFaultProofProgram(t, l2Safe.Number, testCfg.CheckResult, testCfg.InputParams...) | ||
| } | ||
|
|
||
| // Runs a that proves a block in a chain where the batcher opens a channel, the sequence window expires, and then the | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,7 +135,13 @@ func (los *L1OriginSelector) CurrentAndNextOrigin(ctx context.Context, l2Head et | |
| } | ||
| los.currentOrigin = currentOrigin | ||
| nextOrigin, err := los.l1.L1BlockRefByNumber(ctx, currentOrigin.Number+1) | ||
| if err != nil { | ||
| if errors.Is(err, ethereum.NotFound) { | ||
| // If the next origin is not found, it means we are at the end of the chain. | ||
| // In this case, we set the next origin to an empty block reference. | ||
| los.log.Error("next L1 origin not found, recover mode likely brought L1 origin up to the tip of the chain", "error", err) | ||
| los.nextOrigin = eth.L1BlockRef{} | ||
| return los.currentOrigin, los.nextOrigin, nil | ||
| } else if err != nil { | ||
|
Comment on lines
+138
to
+144
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The action test added in this PR fails without this patch.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The failure is due to this error I'm not sure what this would mean outside of the action test environment, but I think it is probably related to what we saw with #18350.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is just because the action test helper being called requires block building to succeed, and it doesn't because of the unavailability of the next l1 origin. So it does not really tally with the sequencer drift violation, I haven't yet manage to repro that. But still, it is better to avoid the temporary errors and continue block building with the old origin when we get into this situation.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modified the test to not get stuck on this, if it can't build a block we just wait until more l1 blocks are available. This means the test actually passes without the patch. But I added an acceptance test which still fails without the patch. |
||
| return eth.L1BlockRef{}, eth.L1BlockRef{}, | ||
| derive.NewTemporaryError(fmt.Errorf("failed to fetch next L1 origin: %w", err)) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I manually confirmed the test fails without forcing singular batches.