diff --git a/devnet-sdk/devstack/dsl/params.go b/devnet-sdk/devstack/dsl/params.go new file mode 100644 index 0000000000000..099e016656fc6 --- /dev/null +++ b/devnet-sdk/devstack/dsl/params.go @@ -0,0 +1,5 @@ +package dsl + +import "time" + +const DefaultTimeout = 30 * time.Second diff --git a/op-devstack/dsl/params.go b/op-devstack/dsl/params.go index 1b91cf8843181..099e016656fc6 100644 --- a/op-devstack/dsl/params.go +++ b/op-devstack/dsl/params.go @@ -2,4 +2,4 @@ package dsl import "time" -const defaultTimeout = 30 * time.Second +const DefaultTimeout = 30 * time.Second diff --git a/op-devstack/dsl/supervisor.go b/op-devstack/dsl/supervisor.go index b63ac7c475dcc..abb34124f1042 100644 --- a/op-devstack/dsl/supervisor.go +++ b/op-devstack/dsl/supervisor.go @@ -48,7 +48,7 @@ func WithAllLocalUnsafeHeadsAdvancedBy(blocks uint64) func(cfg *VerifySyncStatus func (s *Supervisor) VerifySyncStatus(opts ...func(config *VerifySyncStatusConfig)) { cfg := applyOpts(VerifySyncStatusConfig{}, opts...) initial := s.fetchSyncStatus() - ctx, cancel := context.WithTimeout(s.ctx, defaultTimeout) + ctx, cancel := context.WithTimeout(s.ctx, DefaultTimeout) defer cancel() err := wait.For(ctx, 1*time.Second, func() (bool, error) { status := s.fetchSyncStatus() @@ -69,7 +69,7 @@ func (s *Supervisor) VerifySyncStatus(opts ...func(config *VerifySyncStatusConfi func (s *Supervisor) fetchSyncStatus() eth.SupervisorSyncStatus { s.log.Debug("Fetching supervisor sync status") - ctx, cancel := context.WithTimeout(s.ctx, defaultTimeout) + ctx, cancel := context.WithTimeout(s.ctx, DefaultTimeout) defer cancel() syncStatus, err := retry.Do[eth.SupervisorSyncStatus](ctx, 2, retry.Fixed(500*time.Millisecond), func() (eth.SupervisorSyncStatus, error) { syncStatus, err := s.inner.QueryAPI().SyncStatus(s.ctx) @@ -87,7 +87,7 @@ func (s *Supervisor) fetchSyncStatus() eth.SupervisorSyncStatus { } func (s *Supervisor) SafeBlockID(chainID eth.ChainID) eth.BlockID { - ctx, cancel := context.WithTimeout(s.ctx, defaultTimeout) + ctx, cancel := context.WithTimeout(s.ctx, DefaultTimeout) defer cancel() syncStatus, err := retry.Do[eth.SupervisorSyncStatus](ctx, 2, retry.Fixed(500*time.Millisecond), func() (eth.SupervisorSyncStatus, error) { syncStatus, err := s.inner.QueryAPI().SyncStatus(s.ctx) diff --git a/op-devstack/sysgo/sync_test.go b/op-devstack/sysgo/sync_test.go index 38588b9de04cf..c30948463a251 100644 --- a/op-devstack/sysgo/sync_test.go +++ b/op-devstack/sysgo/sync_test.go @@ -10,6 +10,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum-optimism/optimism/op-devstack/devtest" + "github.com/ethereum-optimism/optimism/op-devstack/dsl" "github.com/ethereum-optimism/optimism/op-devstack/shim" "github.com/ethereum-optimism/optimism/op-devstack/stack" "github.com/ethereum-optimism/optimism/op-service/eth" @@ -220,6 +221,40 @@ func TestL2CLSyncP2P(gt *testing.T) { } } +func queryBlockFromEL(el stack.ELNode, label eth.BlockLabel) eth.BlockRef { + ctx, cancel := context.WithTimeout(el.T().Ctx(), dsl.DefaultTimeout) + defer cancel() + block, err := el.EthClient().BlockRefByLabel(ctx, label) + el.T().Require().NoError(err) + return block +} + +func queryBlockFromELByNumber(el stack.ELNode, num uint64) eth.BlockRef { + ctx, cancel := context.WithTimeout(el.T().Ctx(), dsl.DefaultTimeout) + defer cancel() + block, err := el.EthClient().BlockRefByNumber(ctx, num) + el.T().Require().NoError(err) + return block +} + +func querySyncStatusFromCL(cl stack.L2CLNode) *eth.SyncStatus { + ctx, cancel := context.WithTimeout(cl.T().Ctx(), dsl.DefaultTimeout) + defer cancel() + block, err := cl.RollupAPI().SyncStatus(ctx) + cl.T().Require().NoError(err) + return block +} + +func querySyncStatusFromSupervisor(supervisor stack.Supervisor, chainID eth.ChainID) *eth.SupervisorChainSyncStatus { + ctx, cancel := context.WithTimeout(supervisor.T().Ctx(), dsl.DefaultTimeout) + defer cancel() + viewAll, err := supervisor.QueryAPI().SyncStatus(ctx) + supervisor.T().Require().NoError(err) + view, ok := viewAll.Chains[chainID] + supervisor.T().Require().True(ok, "chain sync status not found from supervisor sync status") + return view +} + // TestUnsafeChainUnknownToL2CL tests the below scenario: // supervisor unsafe ahead of L2CL unsafe, aka L2CL processes new blocks first. // To create this out-of-sync scenario, we follow the steps below: @@ -259,6 +294,7 @@ func TestUnsafeChainUnknownToL2CL(gt *testing.T) { waitTime := time.Duration(blockTime+1) * time.Second { logger := system.T().Logger() + require := system.T().Require() elA := system.L2Network(ids.L2A).L2ELNode(ids.L2AEL) elA2 := system.L2Network(ids.L2A).L2ELNode(ids.L2A2EL) @@ -266,42 +302,13 @@ func TestUnsafeChainUnknownToL2CL(gt *testing.T) { clA2 := system.L2Network(ids.L2A).L2CLNode(ids.L2A2CL) supervisor := system.Supervisor(ids.Supervisor) - queryEL := func(label eth.BlockLabel) (eth.BlockRef, eth.BlockRef) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) - blockA, err := elA.EthClient().BlockRefByLabel(ctx, label) - require.NoError(t, err) - blockA2, err := elA2.EthClient().BlockRefByLabel(ctx, label) - require.NoError(t, err) - cancel() - logger.Info("chain A", "blockNum", blockA.Number, "block", blockA) - logger.Info("chain A2", "blockNum", blockA2.Number, "block", blockA2) - return blockA, blockA2 - } - - queryCL := func() (*eth.SyncStatus, *eth.SyncStatus) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) - syncA, err := clA.RollupAPI().SyncStatus(ctx) - require.NoError(t, err) - syncA2, err := clA2.RollupAPI().SyncStatus(ctx) - require.NoError(t, err) - cancel() - logger.Info("chain A", "sync", syncA) - logger.Info("chain A2", "sync", syncA2) - return syncA, syncA2 - } - - querySupervisor := func(chainID eth.ChainID) eth.BlockID { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) - view, err := supervisor.QueryAPI().LocalUnsafe(ctx, chainID) - require.NoError(t, err) - cancel() - return view - } - targetBlockNum1 := uint64(10) logger.Info("wait until reaching target block", "blockNum", targetBlockNum1) - require.Eventually(t, func() bool { - blockA, blockA2 := queryEL(eth.Unsafe) + require.Eventually(func() bool { + blockA := queryBlockFromEL(elA, eth.Unsafe) + blockA2 := queryBlockFromEL(elA2, eth.Unsafe) + logger.Info("chain A", "blockNum", blockA.Number, "block", blockA) + logger.Info("chain A2", "blockNum", blockA2.Number, "block", blockA2) return blockA.Number >= targetBlockNum1 && blockA2.Number >= targetBlockNum1 }, 30*time.Second, waitTime) @@ -309,26 +316,30 @@ func TestUnsafeChainUnknownToL2CL(gt *testing.T) { DisconnectL2CLP2P(ids.L2ACL, ids.L2A2CL).AfterDeploy(orch) // verifier lost its P2P connection with sequencer, and will advance its unsafe head by reading L1 but not by P2P - _, prevblockA2 := queryEL(eth.Unsafe) + prevblockA2 := queryBlockFromEL(elA2, eth.Unsafe) targetBlockNum2 := prevblockA2.Number + 5 logger.Info("make sure verifier advances safe head by reading L1", "blockNum", targetBlockNum2) - require.Eventually(t, func() bool { - _, syncA2 := queryCL() + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + logger.Info("chain A", "sync", syncA) + logger.Info("chain A2", "sync", syncA2) // unsafe head and safe head both advanced from last observed unsafe head return syncA2.SafeL2.Number == syncA2.UnsafeL2.Number && syncA2.SafeL2.Number > targetBlockNum2 }, 60*time.Second, waitTime) logger.Info("verifier heads will lag compared from sequencer heads and supervisor view") - require.Never(t, func() bool { - syncA, syncA2 := queryCL() - chainAView := querySupervisor(elA2.ChainID()) + require.Never(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + chainAView := querySyncStatusFromSupervisor(supervisor, elA2.ChainID()) // unsafe head will always lagged check := syncA.UnsafeL2.Number > syncA2.UnsafeL2.Number // safe head may be matched or lagged check = check && syncA.SafeL2.Number >= syncA2.SafeL2.Number // unsafe head may be matched or lagged compared to supervisor unsafe head view for chain A - check = check && chainAView.Number >= syncA2.UnsafeL2.Number - logger.Info("unsafe head sync status", "sequencer", syncA.UnsafeL2.Number, "supervisor", chainAView.Number, "verifier", syncA2.UnsafeL2.Number) + check = check && chainAView.LocalUnsafe.Number >= syncA2.UnsafeL2.Number + logger.Info("unsafe head sync status", "sequencer", syncA.UnsafeL2.Number, "supervisor", chainAView.LocalUnsafe.Number, "verifier", syncA2.UnsafeL2.Number) return !check }, 15*time.Second, waitTime) @@ -336,9 +347,361 @@ func TestUnsafeChainUnknownToL2CL(gt *testing.T) { WithL2CLP2PConnection(ids.L2ACL, ids.L2A2CL).AfterDeploy(orch) logger.Info("verifier catchs up sequencer unsafe chain with was unknown for verifier") - require.Eventually(t, func() bool { - blockA, blockA2 := queryEL(eth.Unsafe) + require.Eventually(func() bool { + blockA := queryBlockFromEL(elA, eth.Unsafe) + blockA2 := queryBlockFromEL(elA2, eth.Unsafe) return blockA.Number == blockA2.Number && blockA.Hash == blockA2.Hash }, 10*time.Second, waitTime) } } + +// TestUnsafeChainKnownToL2CL tests the below scenario: +// supervisor cross-safe ahead of L2CL cross-safe, aka L2CL can "skip" forward to match safety of supervisor. +// To create this out-of-sync scenario, we follow the steps below: +// 1. Make sequencer (L2CL), verifier (L2CL), and supervisor sync for a few blocks. +// - Sequencer and verifier are connected via P2P, which makes their unsafe heads in sync. +// - Both L2CLs are in managed mode, digesting L1 blocks from the supervisor and reporting unsafe and safe blocks back to the supervisor. +// - Wait enough for both L2CLs advance safe heads. +// 2. Disconnect the P2P connection between the sequencer and verifier. +// - The verifier will not receive unsafe heads via P2P, and can only update unsafe heads matching with safe heads by reading L1 batches. +// - The verifier safe head will lag behind or match the sequencer and supervisor because all three components share the same L1 view. +// 3. Stop verifier L2CL +// - The verifier will not be able to advance unsafe head and safe head. +// - The sequencer will advance unsafe head and safe head, as well as synced with supervisor. +// 4. Wait until sequencer and supervisor diverged enough from the verifier. +// - To make the verifier held unsafe blocks which are already viewed as safe by sequencer and supervisor, we wait. +// - Wait until supervisor viewed safe head number is large enough than the stopped verifier's safe head view. +// 5. Restart the verifier. +// - The verifier will not sync via P2P but only able to advance unsafe and safe heads by reading L1 batches. +// - The verifier will quickly catch up with the sequencer safe head as well as the supervisor. +// - The verifier will "skip" processing already known unsafe blocks, and consolidate them into safe blocks. +func TestUnsafeChainKnownToL2CL(gt *testing.T) { + var ids DefaultRedundancyInteropSystemIDs + opt := DefaultRedundancyInteropSystem(&ids) + + logger := testlog.Logger(gt, log.LevelInfo) + + p := devtest.NewP(context.Background(), logger, func() { + gt.Helper() + gt.FailNow() + }) + gt.Cleanup(p.Close) + + orch := NewOrchestrator(p) + stack.ApplyOptionLifecycle(opt, orch) + + t := devtest.SerialT(gt) + system := shim.NewSystem(t) + orch.Hydrate(system) + + control := orch.controlPlane + + blockTime := system.L2Network(ids.L2A).RollupConfig().BlockTime + + waitTime := time.Duration(blockTime+1) * time.Second + { + logger := system.T().Logger() + require := system.T().Require() + + elA := system.L2Network(ids.L2A).L2ELNode(ids.L2AEL) + elA2 := system.L2Network(ids.L2A).L2ELNode(ids.L2A2EL) + clA := system.L2Network(ids.L2A).L2CLNode(ids.L2ACL) + clA2 := system.L2Network(ids.L2A).L2CLNode(ids.L2A2CL) + supervisor := system.Supervisor(ids.Supervisor) + + logger.Info("make sure verifier safe head advances") + targetBlockNum1 := uint64(5) + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + logger.Info("chain A", "sync", syncA) + logger.Info("chain A2", "sync", syncA2) + return syncA2.SafeL2.Number > targetBlockNum1 + }, 60*time.Second, waitTime) + + safeA2 := queryBlockFromEL(elA2, eth.Safe) + logger.Info("verifier advanced safe head", "number", safeA2.Number) + unsafeA2 := queryBlockFromEL(elA2, eth.Unsafe) + logger.Info("verifier advanced unsafe head", "number", unsafeA2.Number) + + // For making verifier stop advancing unsafe head via P2P + logger.Info("disconnect p2p between L2CLs") + DisconnectL2CLP2P(ids.L2ACL, ids.L2A2CL).AfterDeploy(orch) + + // For making verifer not sync at all + logger.Info("stop verifier") + control.L2CLNodeState(ids.L2A2CL, stack.Stop) + + targetBlockNum2 := safeA2.Number + 10 + logger.Info("wait until supervisor reaches safe head", "target", targetBlockNum2) + var chainAView *eth.SupervisorChainSyncStatus + require.Eventually(func() bool { + chainAView := querySyncStatusFromSupervisor(supervisor, elA2.ChainID()) + logger.Info("supervisor safe head", "number", chainAView.Safe.Number) + return chainAView.Safe.Number > targetBlockNum2 + }, 60*time.Second, waitTime) + + // Restarted verifier will advance its unsafe head by reading L1 but not by P2P + logger.Info("restart verifier") + control.L2CLNodeState(ids.L2A2CL, stack.Start) + + // Query from EL because initializing CL may return null sync status + safeA2 = queryBlockFromEL(elA2, eth.Safe) + logger.Info("verifier safe head after restart", "number", safeA2.Number) + unsafeA2 = queryBlockFromEL(elA2, eth.Unsafe) + logger.Info("verifier unsafe head after restart", "number", unsafeA2.Number) + + // Make sure there are unsafe blocks to be consolidated: + // To check verifier does not have to process blocks since unsafe blocks are already processed + require.Greater(unsafeA2.Number, safeA2.Number) + + logger.Info("make sure verifier unsafe head was consolidated to safe") + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + chainAView = querySyncStatusFromSupervisor(supervisor, elA2.ChainID()) + blockA2 := queryBlockFromEL(elA2, eth.Safe) + blockA := queryBlockFromELByNumber(elA, blockA2.Number) + require.Equal(blockA.Hash, blockA2.Hash) + logger.Info("safe head sync status", "sequencer CL", syncA.SafeL2.Number, "supervisor", chainAView.Safe.Number, "verifier CL", syncA2.SafeL2.Number, "verifier EL", blockA2.Number) + // verifier consolidated every previously known unsafe head to safe head + return syncA2.SafeL2.Number >= unsafeA2.Number + }, 60*time.Second, waitTime) + + delta := uint64(10) + safeA := queryBlockFromEL(elA, eth.Safe) + targetBlockNum3 := safeA.Number + delta + logger.Info("make sure verifier unsafe head advances due to safe head advances", "target", targetBlockNum3, "delta", delta) + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + chainAView = querySyncStatusFromSupervisor(supervisor, elA2.ChainID()) + logger.Info("unsafe head sync status", "sequencer CL", syncA.UnsafeL2.Number, "supervisor", chainAView.LocalUnsafe.Number, "verifier CL", syncA2.UnsafeL2.Number) + return syncA2.UnsafeL2.Number >= targetBlockNum3 + }, 40*time.Second, waitTime) + + // make sure the resulting chain viewed by verifier did not reorged + block := queryBlockFromELByNumber(elA2, unsafeA2.Number) + require.Equal(unsafeA2.Hash, block.Hash) + } +} + +// TestL2CLAheadOfSupervisor tests the below scenario: +// L2CL ahead of supervisor, aka supervisor needs to reset the L2CL, to reproduce old data. Currently supervisor has only managed mode implemented, so the supervisor will ask the L2CL to reset back. +// To create this out-of-sync scenario, we follow the steps below: +// 0. System setup +// - Two supervisor initialized, each managing two L2CLs per chains. +// - Primary supervisor manages sequencer L2CLs for chain A, B. +// - Backup supervisor manages verifier L2CLs for chain A, B. +// - Each L2CLs per chain is connected via P2P. +// 1. Make sequencers (L2CL), verifiers (L2CL), and supervisors sync for a few blocks. +// - Sequencer and verifier are connected via P2P, which makes their unsafe heads in sync. +// - Both L2CLs are in managed mode, digesting L1 blocks from the supervisor and reporting unsafe and safe blocks back to the supervisor. +// - Wait enough for both L2CLs advance unsafe heads. +// 2. Stop backup supervisor. +// - Verifiers stops advancing safe heads because there is no supervisor to provide them L1 data. +// - Verifiers advances unsafe head because they still have P2P connection with each sequencers. +// - Wait enough to make sequencers and primary supervisor advance safe head enough. +// 3. Connect verifiers (L2CL) to primary supervisor. +// - Primary supervisor has safe heads synced with sequencers. +// - After connection, verifiers will sync with primary supervisor, matching supervisor safe head view. +// - Backup supervisor and verifiers becomes out-of-sync with safe heads. +// - Every L2CLs advance safe head. +// 4. Stop primary supervisor. +// - Every L2CL safe heads will stop advancing. +// - For disconnecting every L2CLs from the supervisor. +// 5. Restart primary supervisor and reconnect sequencers (L2CL) to primary supervisor. +// - Sequencers will resume advancing safe heads, but not verifiers. +// 6. Restart backup supervisor and reconnect verifiers (L2CL) to backup supervisor. +// - Backup supervisor will compare its safe head knowledge with L2CLs, and find out L2CLs are ahead of the backup supervisor. +// - Backup supervisor asks the verifiers (L2CL) to rewind(reset) back to match backup supervisor safe head view. +// - After rewinding(reset), verifier will advance safe heads again because backup supervisor gives L1 data to the verifiers. +// - Wait until verifiers advance safe head enough +func TestL2CLAheadOfSupervisor(gt *testing.T) { + var ids MultiSupervisorInteropSystemIDs + opt := MultiSupervisorInteropSystem(&ids) + + logger := testlog.Logger(gt, log.LevelInfo) + + p := devtest.NewP(context.Background(), logger, func() { + gt.Helper() + gt.FailNow() + }) + gt.Cleanup(p.Close) + + orch := NewOrchestrator(p) + stack.ApplyOptionLifecycle(opt, orch) + + t := devtest.SerialT(gt) + system := shim.NewSystem(t) + orch.Hydrate(system) + + control := orch.controlPlane + + blockTime := system.L2Network(ids.L2A).RollupConfig().BlockTime + + waitTime := time.Duration(blockTime+1) * time.Second + { + logger := system.T().Logger() + require := system.T().Require() + + elA := system.L2Network(ids.L2A).L2ELNode(ids.L2AEL) + elA2 := system.L2Network(ids.L2A).L2ELNode(ids.L2A2EL) + elB := system.L2Network(ids.L2B).L2ELNode(ids.L2BEL) + elB2 := system.L2Network(ids.L2B).L2ELNode(ids.L2B2EL) + clA := system.L2Network(ids.L2A).L2CLNode(ids.L2ACL) + clA2 := system.L2Network(ids.L2A).L2CLNode(ids.L2A2CL) + clB := system.L2Network(ids.L2B).L2CLNode(ids.L2BCL) + clB2 := system.L2Network(ids.L2B).L2CLNode(ids.L2B2CL) + supervisorBackup := system.Supervisor(ids.SupervisorBackup) + + targetBlockNum1 := max(querySyncStatusFromCL(clA).UnsafeL2.Number, querySyncStatusFromCL(clB).UnsafeL2.Number) + 10 + logger.Info("make sure verifiers advances unsafe head", "target", targetBlockNum1) + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + syncB := querySyncStatusFromCL(clB) + syncB2 := querySyncStatusFromCL(clB2) + logger.Info("chain A", "unsafe", syncA.UnsafeL2, "safe", syncA.SafeL2) + logger.Info("chain A2", "unsafe", syncA2.UnsafeL2, "safe", syncA2.SafeL2) + logger.Info("chain B", "unsafe", syncB.UnsafeL2, "safe", syncB.SafeL2) + logger.Info("chain B2", "unsafe", syncB2.UnsafeL2, "safe", syncB2.SafeL2) + check := syncA2.UnsafeL2.Number > targetBlockNum1 + check = check && syncB2.UnsafeL2.Number > targetBlockNum1 + return check + }, 60*time.Second, waitTime) + + chainAView := querySyncStatusFromSupervisor(supervisorBackup, clA2.ID().ChainID) + chainBView := querySyncStatusFromSupervisor(supervisorBackup, clB2.ID().ChainID) + + logger.Info("stop backup supervisor") + control.SupervisorState(ids.SupervisorBackup, stack.Stop) + // backup supervisor will only know until these safe heads when restart + A2SafeHead := querySyncStatusFromCL(clA2).SafeL2 + B2SafeHead := querySyncStatusFromCL(clB2).SafeL2 + require.Equal(chainAView.Safe.Hash, A2SafeHead.Hash) + require.Equal(chainBView.Safe.Hash, B2SafeHead.Hash) + logger.Info("backup supervisor(stopped) safe head view", "chainA", A2SafeHead, "chainB", B2SafeHead) + + targetBlockNum2 := max(querySyncStatusFromCL(clA).SafeL2.Number, querySyncStatusFromCL(clB).SafeL2.Number) + 10 + logger.Info("sequencers advances safe heads but not verifiers", "target", targetBlockNum2) + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + syncB := querySyncStatusFromCL(clB) + syncB2 := querySyncStatusFromCL(clB2) + logger.Info("chain A", "unsafe", syncA.UnsafeL2, "safe", syncA.SafeL2) + logger.Info("chain A2", "unsafe", syncA2.UnsafeL2, "safe", syncA2.SafeL2) + logger.Info("chain B", "unsafe", syncB.UnsafeL2, "safe", syncB.SafeL2) + logger.Info("chain B2", "unsafe", syncB2.UnsafeL2, "safe", syncB2.SafeL2) + // verifier CLs cannot advance their safe head because backup supervisor is down + require.Equal(A2SafeHead, syncA2.SafeL2, "verifier safe head advanced") + require.Equal(B2SafeHead, syncB2.SafeL2, "verifier safe head advanced") + check := syncA.SafeL2.Number > targetBlockNum2 + check = check && syncB.SafeL2.Number > targetBlockNum2 + return check + }, 60*time.Second, waitTime) + + logger.Info("connect verifier CLs to primary supervisor to advance verifier safe heads") + WithManagedBySupervisor(ids.L2A2CL, ids.Supervisor).AfterDeploy(orch) + WithManagedBySupervisor(ids.L2B2CL, ids.Supervisor).AfterDeploy(orch) + + targetBlockNum3 := max(querySyncStatusFromCL(clA).SafeL2.Number, querySyncStatusFromCL(clB).SafeL2.Number) + 10 + logger.Info("every CLs advance safe heads", "target", targetBlockNum3) + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + syncB := querySyncStatusFromCL(clB) + syncB2 := querySyncStatusFromCL(clB2) + logger.Info("chain A", "unsafe", syncA.UnsafeL2, "safe", syncA.SafeL2) + logger.Info("chain A2", "unsafe", syncA2.UnsafeL2, "safe", syncA2.SafeL2) + logger.Info("chain B", "unsafe", syncB.UnsafeL2, "safe", syncB.SafeL2) + logger.Info("chain B2", "unsafe", syncB2.UnsafeL2, "safe", syncB2.SafeL2) + check := syncA.SafeL2.Number > targetBlockNum3 + check = check && syncA2.SafeL2.Number > targetBlockNum3 + check = check && syncB.SafeL2.Number > targetBlockNum3 + check = check && syncB2.SafeL2.Number > targetBlockNum3 + return check + }, 60*time.Second, waitTime) + + logger.Info("stop primary supervisor to disconnect every CL connection") + control.SupervisorState(ids.Supervisor, stack.Stop) + // make sure supervisor halt + time.Sleep(waitTime) + + logger.Info("restart primary supervisor") + control.SupervisorState(ids.Supervisor, stack.Start) + // make sure supervisor initialize + time.Sleep(waitTime) + + targetBlockNum4 := max(querySyncStatusFromCL(clA).SafeL2.Number, querySyncStatusFromCL(clB).SafeL2.Number) + logger.Info("no CL connected to supervisor so every CL safe head will not advance", "target", targetBlockNum4) + require.Never(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + syncB := querySyncStatusFromCL(clB) + syncB2 := querySyncStatusFromCL(clB2) + logger.Info("chain A", "unsafe", syncA.UnsafeL2, "safe", syncA.SafeL2) + logger.Info("chain A2", "unsafe", syncA2.UnsafeL2, "safe", syncA2.SafeL2) + logger.Info("chain B", "unsafe", syncB.UnsafeL2, "safe", syncB.SafeL2) + logger.Info("chain B2", "unsafe", syncB2.UnsafeL2, "safe", syncB2.SafeL2) + check := syncA.SafeL2.Number > targetBlockNum4 + check = check && syncA2.SafeL2.Number > targetBlockNum4 + check = check && syncB.SafeL2.Number > targetBlockNum4 + check = check && syncB2.SafeL2.Number > targetBlockNum4 + return check + }, 5*time.Second, waitTime) + + // save sync status for rewind check + syncA2 := querySyncStatusFromCL(clA2) + syncB2 := querySyncStatusFromCL(clB2) + + logger.Info("reconnect sequencer CLs to primary supervisor") + WithManagedBySupervisor(ids.L2ACL, ids.Supervisor).AfterDeploy(orch) + WithManagedBySupervisor(ids.L2BCL, ids.Supervisor).AfterDeploy(orch) + + logger.Info("restart backup supervisor") + control.SupervisorState(ids.SupervisorBackup, stack.Start) + // make sure supervisor initializes + time.Sleep(waitTime) + + logger.Info("reconnect verifier CLs to backup supervisor") + WithManagedBySupervisor(ids.L2A2CL, ids.SupervisorBackup).AfterDeploy(orch) + WithManagedBySupervisor(ids.L2B2CL, ids.SupervisorBackup).AfterDeploy(orch) + + logger.Info("check verifier CLs safe head rewinded") + // wait for backup supervisor manage verifier CLs + time.Sleep(waitTime) + syncA2Rewinded := querySyncStatusFromCL(clA2) + syncB2Rewinded := querySyncStatusFromCL(clB2) + // check safe head rewinded(reset) + require.Greater(syncA2.SafeL2.Number, syncA2Rewinded.SafeL2.Number) + require.Greater(syncB2.SafeL2.Number, syncB2Rewinded.SafeL2.Number) + // also check rewinded(reset) safe head number is close enough with backup supervisor knowledge before L1 sync + tolerance := uint64(3) + require.Greater(A2SafeHead.Number+tolerance, syncA2Rewinded.SafeL2.Number) + require.Greater(B2SafeHead.Number+tolerance, syncB2Rewinded.SafeL2.Number) + + targetBlockNum5 := max(querySyncStatusFromCL(clA).SafeL2.Number, querySyncStatusFromCL(clB).SafeL2.Number) + 10 + logger.Info("every CLs advance safe heads", "target", targetBlockNum5) + require.Eventually(func() bool { + syncA := querySyncStatusFromCL(clA) + syncA2 := querySyncStatusFromCL(clA2) + syncB := querySyncStatusFromCL(clB) + syncB2 := querySyncStatusFromCL(clB2) + logger.Info("chain A", "unsafe", syncA.UnsafeL2, "safe", syncA.SafeL2) + logger.Info("chain A2", "unsafe", syncA2.UnsafeL2, "safe", syncA2.SafeL2) + logger.Info("chain B", "unsafe", syncB.UnsafeL2, "safe", syncB.SafeL2) + logger.Info("chain B2", "unsafe", syncB2.UnsafeL2, "safe", syncB2.SafeL2) + check := syncA.SafeL2.Number > targetBlockNum5 + check = check && syncA2.SafeL2.Number > targetBlockNum5 + check = check && syncB.SafeL2.Number > targetBlockNum5 + check = check && syncB2.SafeL2.Number > targetBlockNum5 + return check + }, 60*time.Second, waitTime) + + // Make sure each chain did not diverge + require.Equal(queryBlockFromELByNumber(elA, targetBlockNum5).Hash, queryBlockFromELByNumber(elA2, targetBlockNum5).Hash) + require.Equal(queryBlockFromELByNumber(elB, targetBlockNum5).Hash, queryBlockFromELByNumber(elB2, targetBlockNum5).Hash) + } +} diff --git a/op-devstack/sysgo/system.go b/op-devstack/sysgo/system.go index e5ff67dffad76..f1aab457bf602 100644 --- a/op-devstack/sysgo/system.go +++ b/op-devstack/sysgo/system.go @@ -155,3 +155,59 @@ func DefaultRedundancyInteropSystem(dest *DefaultRedundancyInteropSystemIDs) sta return opt } + +type MultiSupervisorInteropSystemIDs struct { + DefaultRedundancyInteropSystemIDs + + SupervisorBackup stack.SupervisorID + + L2B2CL stack.L2CLNodeID + L2B2EL stack.L2ELNodeID +} + +func MultiSupervisorInteropSystem(dest *MultiSupervisorInteropSystemIDs) stack.Option[*Orchestrator] { + l1ID := eth.ChainIDFromUInt64(900) + l2AID := eth.ChainIDFromUInt64(901) + l2BID := eth.ChainIDFromUInt64(902) + ids := MultiSupervisorInteropSystemIDs{ + DefaultRedundancyInteropSystemIDs: DefaultRedundancyInteropSystemIDs{ + DefaultInteropSystemIDs: NewDefaultInteropSystemIDs(l1ID, l2AID, l2BID), + L2A2CL: stack.L2CLNodeID{Key: "verifier", ChainID: l2AID}, + L2A2EL: stack.L2ELNodeID{Key: "verifier", ChainID: l2AID}, + }, + SupervisorBackup: "backup", + L2B2CL: stack.L2CLNodeID{Key: "verifier", ChainID: l2BID}, + L2B2EL: stack.L2ELNodeID{Key: "verifier", ChainID: l2BID}, + } + + // start with default interop system + var parentIds DefaultInteropSystemIDs + opt := stack.Combine[*Orchestrator]() + opt.Add(DefaultInteropSystem(&parentIds)) + + // add backup supervisor + opt.Add(WithSupervisor(ids.SupervisorBackup, ids.Cluster, ids.L1EL)) + + opt.Add(WithL2ELNode(ids.L2A2EL, &ids.SupervisorBackup)) + opt.Add(WithL2CLNode(ids.L2A2CL, false, ids.L1CL, ids.L1EL, ids.L2A2EL)) + + opt.Add(WithL2ELNode(ids.L2B2EL, &ids.SupervisorBackup)) + opt.Add(WithL2CLNode(ids.L2B2CL, false, ids.L1CL, ids.L1EL, ids.L2B2EL)) + + // verifier must be also managed or it cannot advance + // we attach verifer L2CL with backup supervisor + opt.Add(WithManagedBySupervisor(ids.L2A2CL, ids.SupervisorBackup)) + opt.Add(WithManagedBySupervisor(ids.L2B2CL, ids.SupervisorBackup)) + + // P2P connect L2CL nodes + opt.Add(WithL2CLP2PConnection(ids.L2ACL, ids.L2A2CL)) + opt.Add(WithL2CLP2PConnection(ids.L2BCL, ids.L2B2CL)) + + // Upon evaluation of the option, export the contents we created. + // Ids here are static, but other things may be exported too. + opt.Add(stack.Finally(func(orch *Orchestrator, hook stack.SystemHook) { + *dest = ids + })) + + return opt +}