diff --git a/justfile b/justfile index 14665b5f726a7..00822da84c73d 100644 --- a/justfile +++ b/justfile @@ -39,4 +39,3 @@ update-op-geth ref: go mod edit -replace=github.com/ethereum/go-ethereum=github.com/ethereum-optimism/op-geth@"$ver"; \ go mod tidy; \ echo "Updated op-geth to $ver" - diff --git a/op-acceptance-tests/tests/sequencer/init_test.go b/op-acceptance-tests/tests/sequencer/init_test.go new file mode 100644 index 0000000000000..ed9fb2d955542 --- /dev/null +++ b/op-acceptance-tests/tests/sequencer/init_test.go @@ -0,0 +1,17 @@ +package sequencer + +import ( + "log/slog" + "testing" + + "github.com/ethereum-optimism/optimism/op-devstack/compat" + "github.com/ethereum-optimism/optimism/op-devstack/presets" +) + +// TestMain creates the test-setups against the shared backend +func TestMain(m *testing.M) { + presets.DoMain(m, presets.WithMinimal(), + presets.WithCompatibleTypes(compat.SysGo), + presets.WithLogLevel(slog.LevelDebug), + ) +} diff --git a/op-acceptance-tests/tests/sequencer/recover_mode_test.go b/op-acceptance-tests/tests/sequencer/recover_mode_test.go new file mode 100644 index 0000000000000..a0f3382e181c9 --- /dev/null +++ b/op-acceptance-tests/tests/sequencer/recover_mode_test.go @@ -0,0 +1,45 @@ +package sequencer + +import ( + "testing" + "time" + + "github.com/ethereum-optimism/optimism/op-devstack/devtest" + "github.com/ethereum-optimism/optimism/op-devstack/presets" + "github.com/stretchr/testify/require" +) + +// TestRecoverModeWhenChainHealthy checks that the chain +// can progress as normal when recover mode is activated. +// Recover mode is designed to recover from a sequencing +// window expiry when there are ample L1 blocks to eagerly +// progress the l1 origin to. But when the l1 origin is +// close to the tip of the l1 chain, the eagerness would cause +// a delay in unsafe block production while the sequencer waits +// for the next l1 origin to become available. Recover mode +// has since been patched, and the sequencer will not demand the +// next l1 origin until it is actually available. This tests +// protects against a regeression in that behavior. +func TestRecoverModeWhenChainHealthy(gt *testing.T) { + t := devtest.ParallelT(gt) + sys := presets.NewMinimal(t) + tracer := t.Tracer() + ctx := t.Ctx() + + err := sys.L2CL.SetSequencerRecoverMode(true) + require.NoError(t, err) + blockTime := sys.L2Chain.Escape().RollupConfig().BlockTime + numL2Blocks := uint64(20) + waitTime := time.Duration(blockTime*numL2Blocks+5) * time.Second + + num := sys.L2CL.SyncStatus().UnsafeL2.Number + new_num := num + require.Eventually(t, func() bool { + ctx, span := tracer.Start(ctx, "check head") + defer span.End() + + new_num, num = sys.L2CL.SyncStatus().UnsafeL2.Number, new_num + t.Logger().InfoContext(ctx, "unsafe head", "number", new_num, "safe head", sys.L2CL.SyncStatus().SafeL2.Number) + return new_num >= numL2Blocks + }, waitTime, time.Duration(blockTime)*time.Second) +} diff --git a/op-devstack/dsl/l2_cl.go b/op-devstack/dsl/l2_cl.go index 862394fc522c4..2d9874d3987c2 100644 --- a/op-devstack/dsl/l2_cl.go +++ b/op-devstack/dsl/l2_cl.go @@ -91,6 +91,10 @@ func (cl *L2CLNode) StopSequencer() common.Hash { return unsafeHead } +func (cl *L2CLNode) SetSequencerRecoverMode(b bool) error { + return cl.inner.RollupAPI().SetRecoverMode(cl.ctx, b) +} + func (cl *L2CLNode) SyncStatus() *eth.SyncStatus { ctx, cancel := context.WithTimeout(cl.ctx, DefaultTimeout) defer cancel() diff --git a/op-e2e/actions/helpers/l2_batcher.go b/op-e2e/actions/helpers/l2_batcher.go index e0d8eac5a284e..2e1a941721d39 100644 --- a/op-e2e/actions/helpers/l2_batcher.go +++ b/op-e2e/actions/helpers/l2_batcher.go @@ -270,7 +270,7 @@ func (s *L2Batcher) Buffer(t Testing, bufferOpts ...BufferOption) error { } } - s.ActCreateChannel(t, s.rollupCfg.IsDelta(block.Time()), options.channelModifiers...) + s.ActCreateChannel(t, s.rollupCfg.IsDelta(block.Time()) && !s.l2BatcherCfg.ForceSubmitSingularBatch, options.channelModifiers...) if _, err := s.L2ChannelOut.AddBlock(s.rollupCfg, block); err != nil { return err diff --git a/op-e2e/actions/helpers/l2_sequencer.go b/op-e2e/actions/helpers/l2_sequencer.go index 97d8100b1a5d6..ef3d4e29dfbfc 100644 --- a/op-e2e/actions/helpers/l2_sequencer.go +++ b/op-e2e/actions/helpers/l2_sequencer.go @@ -40,8 +40,8 @@ func (m *MockL1OriginSelector) FindL1Origin(ctx context.Context, l2Head eth.L2Bl return m.actual.FindL1Origin(ctx, l2Head) } -func (m *MockL1OriginSelector) SetRecoverMode(bool) { - // noop +func (m *MockL1OriginSelector) SetRecoverMode(b bool) { + m.actual.SetRecoverMode(b) } // L2Sequencer is an actor that functions like a rollup node, @@ -98,20 +98,28 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, blobSrc deri // ActL2StartBlock starts building of a new L2 block on top of the head func (s *L2Sequencer) ActL2StartBlock(t Testing) { + err := s.ActMaybeL2StartBlock(t) + require.NoError(t, err, "failed to start block building") +} + +// ActMaybeL2StartBlock tries to start building a new L2 block on top of the head +func (s *L2Sequencer) ActMaybeL2StartBlock(t Testing) error { require.NoError(t, s.drainer.Drain()) // can't build when other work is still blocking if !s.L2PipelineIdle { t.InvalidAction("cannot start L2 build when derivation is not idle") - return + return nil } if s.l2Building { t.InvalidAction("already started building L2 block") - return + return nil } s.synchronousEvents.Emit(t.Ctx(), sequencing.SequencerActionEvent{}) - require.NoError(t, s.drainer.DrainUntil(event.Is[engine.BuildStartedEvent], false), - "failed to start block building") - + err := s.drainer.DrainUntil(event.Is[engine.BuildStartedEvent], false) + if err != nil { + return err + } s.l2Building = true + return nil } // ActL2EndBlock completes a new L2 block and applies it to the L2 chain as new canonical unsafe head @@ -272,3 +280,7 @@ func (s *L2Sequencer) ActBuildL2ToInterop(t Testing) { s.ActL2EmptyBlock(t) } } + +func (s *L2Sequencer) ActSetRecoverMode(t Testing, b bool) { + s.sequencer.SetRecoverMode(b) +} diff --git a/op-e2e/actions/helpers/utils.go b/op-e2e/actions/helpers/utils.go index 9fa5d0438741c..e8dc404ea164a 100644 --- a/op-e2e/actions/helpers/utils.go +++ b/op-e2e/actions/helpers/utils.go @@ -15,7 +15,7 @@ func DefaultRollupTestParams() *e2eutils.TestParams { MaxSequencerDrift: 40, SequencerWindowSize: 120, ChannelTimeout: 120, - L1BlockTime: 15, + L1BlockTime: 12, // Many of the action helpers assume a 12s L1 block time AllocType: config.DefaultAllocType, } } diff --git a/op-e2e/actions/proofs/helpers/env.go b/op-e2e/actions/proofs/helpers/env.go index 6872c1d0e1efa..d700763d7f79f 100644 --- a/op-e2e/actions/proofs/helpers/env.go +++ b/op-e2e/actions/proofs/helpers/env.go @@ -243,15 +243,12 @@ func (env *L2FaultProofEnv) BatchAndMine(t helpers.Testing) { // Returns the L2 Safe Block Reference func (env *L2FaultProofEnv) BatchMineAndSync(t helpers.Testing) eth.L2BlockRef { t.Helper() - id := env.Miner.UnsafeID() env.BatchAndMine(t) env.Sequencer.ActL1HeadSignal(t) env.Sequencer.ActL2PipelineFull(t) // Assertions - syncStatus := env.Sequencer.SyncStatus() - require.Equal(t, syncStatus.UnsafeL2.L1Origin, id, "UnsafeL2.L1Origin should equal L1 Unsafe ID before batch submitted") require.Equal(t, syncStatus.UnsafeL2, syncStatus.SafeL2, "UnsafeL2 should equal SafeL2") return syncStatus.SafeL2 diff --git a/op-e2e/actions/proofs/sequence_window_expiry_test.go b/op-e2e/actions/proofs/sequence_window_expiry_test.go index cb702fe8eb6a4..cd614afb97abc 100644 --- a/op-e2e/actions/proofs/sequence_window_expiry_test.go +++ b/op-e2e/actions/proofs/sequence_window_expiry_test.go @@ -5,18 +5,39 @@ import ( actionsHelpers "github.com/ethereum-optimism/optimism/op-e2e/actions/helpers" "github.com/ethereum-optimism/optimism/op-e2e/actions/proofs/helpers" + "github.com/ethereum-optimism/optimism/op-e2e/e2eutils" "github.com/ethereum-optimism/optimism/op-program/client/claim" "github.com/ethereum/go-ethereum/common" "github.com/stretchr/testify/require" ) -// Run a test that proves a deposit-only block generated due to sequence window expiry. +// Run a test that proves a deposit-only block generated due to sequence window expiry, +// and then recovers the chain using sequencer recover mode. func runSequenceWindowExpireTest(gt *testing.T, testCfg *helpers.TestCfg[any]) { t := actionsHelpers.NewDefaultTesting(gt) - tp := helpers.NewTestParams() - env := helpers.NewL2FaultProofEnv(t, testCfg, tp, helpers.NewBatcherCfg()) + const SEQUENCER_WINDOW_SIZE = 50 // (short, to keep test fast) + tp := helpers.NewTestParams(func(p *e2eutils.TestParams) { + p.SequencerWindowSize = SEQUENCER_WINDOW_SIZE + p.MaxSequencerDrift = 1800 // use 1800 seconds (30 minutes), which is the protocol constant since Fjord + }) + + // It seems more difficult (almost impossible) to recover from sequencing window expiry with span batches, + // since the singular batches within are invalidated _atomically_. + // That is to say, if the oldest batch in the span batch fails the sequencing window check + // (l1 origin + seq window < l1 inclusion) + // All following batches are invalidated / dropped as well. + // https://github.com/ethereum-optimism/optimism/blob/73339162d78a1ebf2daadab01736382eed6f4527/op-node/rollup/derive/batches.go#L96-L100 + // + // If the same blocks were batched with singular batches, the validation rules are different + // https://github.com/ethereum-optimism/optimism/blob/73339162d78a1ebf2daadab01736382eed6f4527/op-node/rollup/derive/batches.go#L83-L86 + // In the case of recover mode, the noTxPool=true condition means autoderviation actually fills + // the gap with identical blocks anyway, meaning the following batches are actually still valid. + bc := helpers.NewBatcherCfg() + bc.ForceSubmitSingularBatch = true - // Mine an empty block for gas estimation purposes. + env := helpers.NewL2FaultProofEnv(t, testCfg, tp, bc) + + // Mine an empty L1 block for gas estimation purposes. env.Miner.ActEmptyBlock(t) // Expire the sequence window by building `SequenceWindow + 1` empty blocks on L1. @@ -24,7 +45,7 @@ func runSequenceWindowExpireTest(gt *testing.T, testCfg *helpers.TestCfg[any]) { env.Alice.L1.ActResetTxOpts(t) env.Alice.ActDeposit(t) - env.Miner.ActL1StartBlock(12)(t) + env.Miner.ActL1StartBlock(tp.L1BlockTime)(t) env.Miner.ActL1IncludeTx(env.Alice.Address())(t) env.Miner.ActL1EndBlock(t) @@ -42,10 +63,107 @@ func runSequenceWindowExpireTest(gt *testing.T, testCfg *helpers.TestCfg[any]) { // Ensure the safe head advanced forcefully. l2SafeHead = env.Engine.L2Chain().CurrentSafeBlock() - require.Greater(t, l2SafeHead.Number.Uint64(), uint64(0)) + require.Greater(t, l2SafeHead.Number.Uint64(), uint64(0), + "The safe head failed to progress after the sequencing window expired (expected deposit-only blocks to be derived).") - // Run the FPP on one of the auto-derived blocks. env.RunFaultProofProgram(t, l2SafeHead.Number.Uint64()/2, testCfg.CheckResult, testCfg.InputParams...) + + // Set recover mode on the sequencer: + env.Sequencer.ActSetRecoverMode(t, true) + // Since recover mode only affects the L2 CL (op-node), + // it won't stop the test environment injecting transactions + // directly into the engine. So we will force the engine + // to ignore such injections if recover mode is enabled. + env.Engine.EngineApi.SetForceEmpty(true) + + // Define "lag" as the difference between the current L1 block number and the safe L2 block's L1 origin number. + computeLag := func() int { + ss := env.Sequencer.SyncStatus() + return int(ss.CurrentL1.Number - ss.SafeL2.L1Origin.Number) + } + + // Define "drift" as the difference between the current L2 block's timestamp and the unsafe L2 block's L1 origin's timestamp. + computeDrift := func() int { + ss := env.Sequencer.SyncStatus() + l2header, err := env.Engine.EthClient().HeaderByHash(t.Ctx(), ss.UnsafeL2.Hash) + require.NoError(t, err) + l1header, err := env.Miner.EthClient().HeaderByHash(t.Ctx(), ss.UnsafeL2.L1Origin.Hash) + require.NoError(t, err) + t.Log("l2header.Time", l2header.Time) + t.Log("l1header.Time", l1header.Time) + return int(l2header.Time) - int(l1header.Time) + } + + // Build both chains and assert the L1 origin catches back up with the tip of the L1 chain. + lag := computeLag() + t.Log("lag", lag) + drift := computeDrift() + t.Log("drift", drift) + require.GreaterOrEqual(t, uint64(lag), tp.SequencerWindowSize, "Lag is less than sequencing window size") + numL1Blocks := 0 + timeout := tp.SequencerWindowSize * 50 + + for numL1Blocks < int(timeout) { + for range 100 * tp.L1BlockTime / env.Sd.RollupCfg.BlockTime { // go at 100x real time + err := env.Sequencer.ActMaybeL2StartBlock(t) + if err != nil { + break + } + env.Bob.L2.ActResetTxOpts(t) + env.Bob.L2.ActMakeTx(t) + env.Engine.ActL2IncludeTx(env.Bob.Address())(t) + // RecoverMode (enabled above) should prevent this + // transaction from being included in the block, which + // is critical for recover mode to work. + env.Sequencer.ActL2EndBlock(t) + drift = computeDrift() + t.Log("drift", drift) + } + env.BatchMineAndSync(t) // Mines 1 block on L1 + numL1Blocks++ + lag = computeLag() + t.Log("lag", lag) + drift = computeDrift() + t.Log("drift", drift) + if lag == 1 { // A lag of 1 is the minimum possible. + break + } + } + + if uint64(numL1Blocks) >= timeout { + t.Fatal("L1 Origin did not catch up to tip within %d L1 blocks (lag is %d)", numL1Blocks, lag) + } else { + t.Logf("L1 Origin caught up to within %d blocks of the tip within %d L1 blocks (sequencing window size %d)", + lag, numL1Blocks, tp.SequencerWindowSize) + } + + switch { + case drift == 0: + t.Fatal("drift is zero, this implies the unsafe l2 head is pinned to the l1 head") + case drift > int(tp.MaxSequencerDrift): + t.Fatal("drift is too high") + default: + t.Log("drift", drift) + } + + // Disable recover mode so we can get some user transactions in again. + env.Sequencer.ActSetRecoverMode(t, false) + env.Engine.EngineApi.SetForceEmpty(false) + l2SafeBefore := env.Sequencer.L2Safe() + env.Sequencer.ActL2StartBlock(t) + env.Bob.L2.ActResetTxOpts(t) + env.Bob.L2.ActMakeTx(t) + env.Engine.ActL2IncludeTx(env.Bob.Address())(t) + env.Sequencer.ActL2EndBlock(t) + env.BatchMineAndSync(t) + l2Safe := env.Sequencer.L2Safe() + require.Equal(t, l2Safe.Number, l2SafeBefore.Number+1, "safe chain did not progress with user transactions") + l2SafeBlock, err := env.Engine.EthClient().BlockByHash(t.Ctx(), l2Safe.Hash) + require.NoError(t, err) + // Assert safe block has at least two transactions + require.GreaterOrEqual(t, len(l2SafeBlock.Transactions()), 2, "safe block did not have at least two transactions") + + env.RunFaultProofProgram(t, l2Safe.Number, testCfg.CheckResult, testCfg.InputParams...) } // Runs a that proves a block in a chain where the batcher opens a channel, the sequence window expires, and then the diff --git a/op-node/rollup/sequencing/origin_selector.go b/op-node/rollup/sequencing/origin_selector.go index c5f23407d7652..368226db34e26 100644 --- a/op-node/rollup/sequencing/origin_selector.go +++ b/op-node/rollup/sequencing/origin_selector.go @@ -70,80 +70,51 @@ func (los *L1OriginSelector) OnEvent(ctx context.Context, ev event.Event) bool { return true } -// FindL1Origin determines what the next L1 Origin should be. -// The L1 Origin is either the L2 Head's Origin, or the following L1 block -// if the next L2 block's time is greater than or equal to the L2 Head's Origin. -// The origin selection relies purely on block numbers and it is the caller's -// responsibility to detect and handle L1 reorgs. +// FindL1Origin determines what the L1 Origin for the next L2 Block should be. +// It wraps the FindL1OriginOfNextL2Block function and handles caching and network requests. func (los *L1OriginSelector) FindL1Origin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, error) { + recoverMode := los.recoverMode.Load() + // Get cached values for currentOrigin and nextOrigin currentOrigin, nextOrigin, err := los.CurrentAndNextOrigin(ctx, l2Head) if err != nil { return eth.L1BlockRef{}, err } - - // If the next L2 block time is greater than the next origin block's time, we can choose to - // start building on top of the next origin. Sequencer implementation has some leeway here and - // could decide to continue to build on top of the previous origin until the Sequencer runs out - // of slack. For simplicity, we implement our Sequencer to always start building on the latest - // L1 block when we can. - if nextOrigin != (eth.L1BlockRef{}) && l2Head.Time+los.cfg.BlockTime >= nextOrigin.Time { - return nextOrigin, nil - } - - msd := los.spec.MaxSequencerDrift(currentOrigin.Time) - log := los.log.New("current", currentOrigin, "current_time", currentOrigin.Time, - "l2_head", l2Head, "l2_head_time", l2Head.Time, "max_seq_drift", msd) - - pastSeqDrift := l2Head.Time+los.cfg.BlockTime-currentOrigin.Time > msd - - // If we are not past the max sequencer drift, we can just return the current origin. - if !pastSeqDrift { - return currentOrigin, nil - } - - // Otherwise, we need to find the next L1 origin block in order to continue producing blocks. - log.Warn("Next L2 block time is past the sequencer drift + current origin time") - - if nextOrigin == (eth.L1BlockRef{}) { - fetchCtx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - - // If the next origin is not set, we need to fetch it now. - nextOrigin, err = los.fetch(fetchCtx, currentOrigin.Number+1) - if err != nil { - return eth.L1BlockRef{}, fmt.Errorf("cannot build next L2 block past current L1 origin %s by more than sequencer time drift, and failed to find next L1 origin: %w", currentOrigin, err) + // Try to find the L1 origin given the current data in cache + o, err := los.findL1OriginOfNextL2Block( + l2Head, + currentOrigin, + nextOrigin, + recoverMode) + + // If the cache doesn't have the next origin, but we now + // know we definitely need it, fetch it and try again. + if errors.Is(err, ErrNextL1OriginRequired) { + nextOrigin, err = los.fetch(ctx, currentOrigin.Number+1) + if err == nil || (recoverMode && errors.Is(err, ethereum.NotFound)) { + // If we got the origin, or we are in recover mode and the origin is not found + // (because we recovered the l1 origin up to the l1 tip) + // try again with matchAutoDerivation = false. + return los.findL1OriginOfNextL2Block( + l2Head, + currentOrigin, + nextOrigin, + false) + } else { + return eth.L1BlockRef{}, ErrNextL1OriginRequired } } - - // If the next origin is ahead of the L2 head, we must return the current origin. - if l2Head.Time+los.cfg.BlockTime < nextOrigin.Time { - return currentOrigin, nil - } - - return nextOrigin, nil + return o, err } +// CurrentAndNextOrigin returns the current cached values for the current L1 origin for the supplied l2Head, and its successor. +// It only performs a fetch to L1 if the cache is invalid. +// The cache can be updated asynchronously by other methods on L1OriginSelector. +// The returned currentOrigin should _always_ be non-empty, because it is populated from l2Head whose +// l1Origin is first specified in the rollup.Config.Genesis.L1 and progressed to non-empty values thereafter. func (los *L1OriginSelector) CurrentAndNextOrigin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, eth.L1BlockRef, error) { los.mu.Lock() defer los.mu.Unlock() - if los.recoverMode.Load() { - currentOrigin, err := los.l1.L1BlockRefByHash(ctx, l2Head.L1Origin.Hash) - if err != nil { - return eth.L1BlockRef{}, eth.L1BlockRef{}, - derive.NewTemporaryError(fmt.Errorf("failed to fetch current L1 origin: %w", err)) - } - los.currentOrigin = currentOrigin - nextOrigin, err := los.l1.L1BlockRefByNumber(ctx, currentOrigin.Number+1) - if err != nil { - return eth.L1BlockRef{}, eth.L1BlockRef{}, - derive.NewTemporaryError(fmt.Errorf("failed to fetch next L1 origin: %w", err)) - } - los.nextOrigin = nextOrigin - los.log.Info("origin selector in recover mode", "current_origin", los.currentOrigin, "next_origin", los.nextOrigin, "l2_head", l2Head) - return los.currentOrigin, los.nextOrigin, nil - } - if l2Head.L1Origin == los.currentOrigin.ID() { // Most likely outcome: the L2 head is still on the current origin. } else if l2Head.L1Origin == los.nextOrigin.ID() { @@ -238,3 +209,69 @@ func (los *L1OriginSelector) reset() { los.currentOrigin = eth.L1BlockRef{} los.nextOrigin = eth.L1BlockRef{} } + +var ( + ErrInvalidL1Origin = fmt.Errorf("origin-selector: currentL1Origin.Hash != l2Head.L1Origin.Hash") + ErrNextL1OriginOrphaned = fmt.Errorf("origin-selector: nextL1Origin.ParentHash != currentL1Origin.Hash") + ErrNextL1OriginRequired = fmt.Errorf("origin-selector: nextL1Origin not supplied but required to satisfy constraints") +) + +// FindL1OriginOfNextL2Block finds the L1 origin of the next L2 block. +// It returns an error if there is no way to build a block satisfying +// derivation constraints with the supplied data. +// You can pass an empty nextL1Origin if it is not yet available +// removing the need for block building to wait on the result of network calls. +// This method is designed to be pure (it only reads the cfg property of the receiver) +// and should not have any side effects. +func (los *L1OriginSelector) findL1OriginOfNextL2Block( + l2Head eth.L2BlockRef, + currentL1Origin eth.L1BlockRef, nextL1Origin eth.L1BlockRef, + matchAutoDerivation bool) (eth.L1BlockRef, error) { + + if (currentL1Origin == eth.L1BlockRef{}) { + // This would indicate a programming error, since the currentL1Origin + // should _always_ be available. + // The first value (for block 1) is specified in rollup.Config.Genesis.L1 + // and it is then only updated to non-empty values. + panic("origin-selector: currentL1Origin is empty") + } + if l2Head.L1Origin.Hash != currentL1Origin.Hash { + return currentL1Origin, ErrInvalidL1Origin + } + if (nextL1Origin != eth.L1BlockRef{} && nextL1Origin.ParentHash != currentL1Origin.Hash) { + return nextL1Origin, ErrNextL1OriginOrphaned + } + + l2BlockTime := los.cfg.BlockTime + maxDrift := rollup.NewChainSpec(los.cfg).MaxSequencerDrift(currentL1Origin.Time) + nextL2BlockTime := l2Head.Time + l2BlockTime + driftCurrent := int64(nextL2BlockTime) - int64(currentL1Origin.Time) + + if (nextL1Origin == eth.L1BlockRef{}) { + if matchAutoDerivation { + // See https://github.com/ethereum-optimism/optimism/blob/ce9fa62d0c0325304fc37d91d87aa2e16a7f8356/op-node/rollup/derive/base_batch_stage.go#L186-L205 + // We need the next L1 origin to decide whether we can eagerly adopt it. + // NOTE: This can cause unsafe block production to slow to the rate of L1 block production, if the L1 origin is caught up to the L1 Head. + // Code higher up the call stack should ensure that matchAutoDerivation is false under such conditions. + return eth.L1BlockRef{}, ErrNextL1OriginRequired + } else { + // If we don't yet have the nextL1Origin, stick with the current L1 origin unless doing so would exceed the maximum drift. + if driftCurrent > int64(maxDrift) { + // Return an error so the caller knows it needs to fetch the next l1 origin now. + return eth.L1BlockRef{}, fmt.Errorf("%w: drift of next L2 block would exceed maximum %d unless nextl1Origin is adopted", ErrNextL1OriginRequired, maxDrift) + } + return currentL1Origin, nil + } + } + + driftNext := int64(nextL2BlockTime) - int64(nextL1Origin.Time) + + // Progress to l1OriginChild if doing so would respect the requirement + // that L2 blocks cannot point to a future L1 block (negative drift). + if driftNext >= 0 { + return nextL1Origin, nil + } else { + // If we cannot adopt the l1OriginChild, use the current l1 origin. + return currentL1Origin, nil + } +} diff --git a/op-node/rollup/sequencing/origin_selector_test.go b/op-node/rollup/sequencing/origin_selector_test.go index be8a0dc76255a..5ab947df69fed 100644 --- a/op-node/rollup/sequencing/origin_selector_test.go +++ b/op-node/rollup/sequencing/origin_selector_test.go @@ -7,7 +7,6 @@ import ( "github.com/ethereum-optimism/optimism/op-node/rollup" "github.com/ethereum-optimism/optimism/op-node/rollup/confdepth" - "github.com/ethereum-optimism/optimism/op-node/rollup/derive" "github.com/ethereum-optimism/optimism/op-node/rollup/engine" "github.com/ethereum-optimism/optimism/op-service/eth" "github.com/ethereum-optimism/optimism/op-service/testlog" @@ -208,18 +207,14 @@ func TestOriginSelectorAdvances(t *testing.T) { } if recoverMode { - // In recovery mode (only) we make two RPC calls. + // In recovery mode (only) we make an RPC call to find the next origin. // First, cover the case where the nextOrigin // is not ready yet by simulating a NotFound error. - l1.ExpectL1BlockRefByHash(c.Hash, c, nil) l1.ExpectL1BlockRefByNumber(d.Number, eth.BlockRef{}, ethereum.NotFound) - _, err := s.FindL1Origin(ctx, l2Head) - require.ErrorIs(t, err, derive.ErrTemporary) - require.ErrorIs(t, err, ethereum.NotFound) + requireL1OriginAt(l2Head, c) // Now, simulate the block being ready, and ensure // that the origin advances to the next block. - l1.ExpectL1BlockRefByHash(c.Hash, c, nil) l1.ExpectL1BlockRefByNumber(d.Number, d, nil) requireL1OriginAt(l2Head, d) } else { @@ -344,7 +339,7 @@ func TestOriginSelectorFetchesNextOrigin(t *testing.T) { // // There are 3 blocks [a, b, c]. After advancing to b, a reorg is simulated // where b is reorged and replaced by providing a `c` next that has a different parent hash. -// The origin should still provide c as the next origin so upstream services can detect the reorg. +// A sentinel error should be returned. func TestOriginSelectorHandlesReorg(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -384,6 +379,11 @@ func TestOriginSelectorHandlesReorg(t *testing.T) { require.Equal(t, l1ref, next) } + requireFindL1OriginError := func(e error) { + _, err := s.FindL1Origin(ctx, l2Head) + require.ErrorIs(t, err, e) + } + requireFindl1OriginEqual(a) // Selection is stable until the next origin is fetched @@ -413,9 +413,8 @@ func TestOriginSelectorHandlesReorg(t *testing.T) { handled = s.OnEvent(context.Background(), engine.ForkchoiceUpdateEvent{UnsafeL2Head: l2Head}) require.True(t, handled) - // The next origin should be `c` now, otherwise an upstream service cannot detect the reorg - // and the origin will be stuck at `b` - requireFindl1OriginEqual(c) + // We shuold get a sentinel error + requireFindL1OriginError(ErrNextL1OriginOrphaned) } // TestOriginSelectorRespectsOriginTiming ensures that the origin selector @@ -593,7 +592,7 @@ func TestOriginSelectorStrictConfDepth(t *testing.T) { s := NewL1OriginSelector(ctx, log, cfg, confDepthL1) _, err := s.FindL1Origin(ctx, l2Head) - require.ErrorContains(t, err, "sequencer time drift") + require.ErrorIs(t, err, ErrNextL1OriginRequired) } func u64ptr(n uint64) *uint64 { @@ -779,11 +778,11 @@ func TestOriginSelectorHandlesLateL1Blocks(t *testing.T) { s := NewL1OriginSelector(ctx, log, cfg, confDepthL1) _, err := s.FindL1Origin(ctx, l2Head) - require.ErrorContains(t, err, "sequencer time drift") + require.ErrorIs(t, err, ErrNextL1OriginRequired) l1Head = c _, err = s.FindL1Origin(ctx, l2Head) - require.ErrorContains(t, err, "sequencer time drift") + require.ErrorIs(t, err, ErrNextL1OriginRequired) l1Head = d next, err := s.FindL1Origin(ctx, l2Head) @@ -811,3 +810,196 @@ func TestOriginSelectorMiscEvent(t *testing.T) { handled := s.OnEvent(context.Background(), rollup.L1TemporaryErrorEvent{}) require.False(t, handled) } + +func TestFindL1OriginOfNextL2Block(t *testing.T) { + cfg := &rollup.Config{ + MaxSequencerDrift: 1800, // Use Fjord constant value + BlockTime: 2, + } + + los := NewL1OriginSelector(context.Background(), testlog.Logger(t, log.LevelDebug), cfg, &testutils.MockL1Source{}) + + require.Panics(t, func() { + _, _ = los.findL1OriginOfNextL2Block( + eth.L2BlockRef{}, + eth.L1BlockRef{}, + eth.L1BlockRef{}, + false) + }) + + type testCase struct { + name string + l2Head eth.L2BlockRef + currentL1Origin eth.L1BlockRef + nextL1Origin eth.L1BlockRef + matchAutoderivation bool + expectedResult eth.L1BlockRef + expectedError error + } + + tcs := []testCase{} + + // Scenarios with valid data, no drift concerns + // but availability of next l1 origin is modulated. + // + // L1 chain: a100(1200) <- [ a101(1212) ] + // /\ + // L2 chain \_ b1000(1220) + a100 := eth.L1BlockRef{ + Number: 100, + Hash: common.Hash{'a', '1', '0', '0'}, + Time: 1200, + } + a101 := eth.L1BlockRef{ + Number: 101, + ParentHash: a100.Hash, + Hash: common.Hash{'a', '0', '0'}, + Time: 1212, + } + b1000 := eth.L2BlockRef{ + Number: 1000, + Hash: common.Hash{'b', '1', '0', '0', '0'}, + L1Origin: a100.ID(), + Time: 1220, + } + + tcs = append(tcs, + testCase{ + name: "normal operation, progress because we can", + l2Head: b1000, + currentL1Origin: a100, + nextL1Origin: a101, + expectedResult: a101, + }, + testCase{ + name: "recover mode, progress because we can", + l2Head: b1000, + currentL1Origin: a100, + nextL1Origin: a101, + expectedResult: a101, + matchAutoderivation: true, + }, + testCase{ + name: "normal operation, don't need to progress", + l2Head: b1000, + currentL1Origin: a100, + expectedResult: a100, + }, + testCase{ + name: "recover mode, need to progress but can't", + l2Head: b1000, + currentL1Origin: a100, + matchAutoderivation: true, + expectedError: ErrNextL1OriginRequired, + }, + ) + + // Bad input data / reorg scenarios + // L1 chain: c100(1200) <-[x]- c101(1212) + // /\ + // L2 chain \_[x]- d/e1000(1220) + c100 := eth.L1BlockRef{ + Number: 100, + Hash: common.Hash{'a', '1', '0', '0'}, + Time: 1200, + } + c101 := eth.L1BlockRef{ + Number: 101, + ParentHash: common.Hash{}, // does not point to c100 + Hash: common.Hash{'a', '0', '0'}, + Time: 1212, + } + d1000 := eth.L2BlockRef{ + Number: 1000, + L1Origin: c100.ID(), + Hash: common.Hash{'d', '1', '0', '0', '0'}, + Time: 1220, + } + e1000 := eth.L2BlockRef{ + Number: 1000, + L1Origin: eth.BlockID{}, // does not point to c100 + Hash: common.Hash{'d', '1', '0', '0', '0'}, + Time: 1220, + } + tcs = append(tcs, + testCase{ + name: "L1 reorg", + currentL1Origin: c100, + nextL1Origin: c101, + l2Head: d1000, + expectedResult: c101, + expectedError: ErrNextL1OriginOrphaned, + }, + testCase{ + name: "Invalid l1 origin", + currentL1Origin: c100, + nextL1Origin: c101, + l2Head: e1000, + expectedResult: c100, + expectedError: ErrInvalidL1Origin, + }) + + // Drift at maximum, + // L1 chain: a100(1200) <- [ a101(1212) ] + // /\ + // L2 chain \_ f1000(3000) + f1000 := eth.L2BlockRef{ + Number: 1000, + L1Origin: a100.ID(), + Hash: common.Hash{'f', '1', '0', '0', '0'}, + Time: 3000, + } + tcs = append(tcs, + testCase{ + name: "Drift at maximum, nextL1Origin available", + currentL1Origin: a100, + nextL1Origin: a101, + l2Head: f1000, + expectedResult: a101, + }, + testCase{ + name: "Drift at maximum, nextL1Origin unavailable", + currentL1Origin: a100, + l2Head: f1000, + expectedError: ErrNextL1OriginRequired, + }) + + // Negative drift, + // L1 chain: a100(1200) <- a101(1212) + // /\ + // L2 chain \_ g1000(1200) + // Current drift is 0 + // adopting the nextLOrigin would make it negative (add 2 subtract 12) + g1000 := eth.L2BlockRef{ + Number: 1000, + L1Origin: a100.ID(), + Hash: common.Hash{'g', '1', '0', '0', '0'}, + Time: 1200, + } + tcs = append(tcs, + testCase{ + name: "Negative drift", + currentL1Origin: a100, + nextL1Origin: a101, + l2Head: g1000, + expectedResult: a100, + }) + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + result, err := los.findL1OriginOfNextL2Block( + tc.l2Head, + tc.currentL1Origin, + tc.nextL1Origin, + tc.matchAutoderivation) + if tc.expectedError != nil { + require.ErrorIs(t, err, tc.expectedError) + } else { + require.NoError(t, err) + } + if result != tc.expectedResult { + t.Errorf("expected result %v, got %v", tc.expectedResult, result) + } + }) + } +} diff --git a/op-node/rollup/sequencing/sequencer.go b/op-node/rollup/sequencing/sequencer.go index 9e2b348c93b84..76e743af1dd46 100644 --- a/op-node/rollup/sequencing/sequencer.go +++ b/op-node/rollup/sequencing/sequencer.go @@ -503,21 +503,23 @@ func (d *Sequencer) startBuildingBlock() { // Figure out which L1 origin block we're going to be building on top of. l1Origin, err := d.l1OriginSelector.FindL1Origin(ctx, l2Head) - if err != nil { - d.nextAction = d.timeNow().Add(time.Second) - d.nextActionOK = d.active.Load() - d.log.Error("Error finding next L1 Origin", "err", err) - d.emitter.Emit(d.ctx, rollup.L1TemporaryErrorEvent{Err: err}) - return - } - - if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) { + switch { + case err == nil: + case errors.Is(err, ErrInvalidL1Origin), errors.Is(err, ErrNextL1OriginOrphaned): d.metrics.RecordSequencerInconsistentL1Origin(l2Head.L1Origin, l1Origin.ID()) d.emitter.Emit(d.ctx, rollup.ResetEvent{ Err: fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin), }) return + case errors.Is(err, ErrNextL1OriginRequired): + fallthrough + default: + d.nextAction = d.timeNow().Add(time.Second) + d.nextActionOK = d.active.Load() + d.log.Error("Error finding next L1 Origin", "err", err) + d.emitter.Emit(d.ctx, rollup.L1TemporaryErrorEvent{Err: err}) + return } d.log.Info("Started sequencing new block", "parent", l2Head, "l1Origin", l1Origin)