diff --git a/op-acceptance-tests/tests/sync/elsync/gap_elp2p/init_test.go b/op-acceptance-tests/tests/sync/elsync/gap_elp2p/init_test.go new file mode 100644 index 00000000000..280fdd55ad5 --- /dev/null +++ b/op-acceptance-tests/tests/sync/elsync/gap_elp2p/init_test.go @@ -0,0 +1,22 @@ +package gap_elp2p + +import ( + "testing" + + bss "github.com/ethereum-optimism/optimism/op-batcher/batcher" + "github.com/ethereum-optimism/optimism/op-devstack/compat" + "github.com/ethereum-optimism/optimism/op-devstack/presets" + "github.com/ethereum-optimism/optimism/op-devstack/stack" + "github.com/ethereum-optimism/optimism/op-devstack/sysgo" +) + +func TestMain(m *testing.M) { + // No ELP2P, CLP2P to control the supply of unsafe payload to the CL + presets.DoMain(m, presets.WithSingleChainMultiNodeWithoutP2P(), + presets.WithCompatibleTypes(compat.SysGo), + stack.MakeCommon(sysgo.WithBatcherOption(func(id stack.L2BatcherID, cfg *bss.CLIConfig) { + // For stopping derivation, not to advance safe heads + cfg.Stopped = true + })), + ) +} diff --git a/op-acceptance-tests/tests/sync/elsync/gap_elp2p/sync_test.go b/op-acceptance-tests/tests/sync/elsync/gap_elp2p/sync_test.go new file mode 100644 index 00000000000..f3961fa94cc --- /dev/null +++ b/op-acceptance-tests/tests/sync/elsync/gap_elp2p/sync_test.go @@ -0,0 +1,232 @@ +package gap_elp2p + +import ( + "testing" + + "github.com/ethereum-optimism/optimism/op-devstack/devtest" + "github.com/ethereum-optimism/optimism/op-devstack/presets" + "github.com/ethereum-optimism/optimism/op-service/eth" + "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/types" + "github.com/ethereum/go-ethereum" +) + +// TestL2ELP2PCanonicalChainAdvancedByFCU verifies the interaction between NewPayload, +// ForkchoiceUpdate (FCU), and ELP2P/EL sync in a multi-node L2 test network. +// +// Scenario +// - Start a single-chain, multi-node system without ELP2P connectivity for L2ELB. +// - Advance the reference node (L2EL) so it is ahead of L2ELB. +// +// Expectations covered by this test +// +// NewPayload without parents present: +// - Does NOT trigger EL sync. +// - Returns SYNCING for future blocks (startNum+3/5/4/6). +// +// NewPayload on a non canonical chain with available state: +// - Can extend a non canonical chain (startNum+1 then +2) and returns VALID. +// - These non canonical chain blocks are retrievable by hash but remain non-canonical +// (BlockRefByNumber returns NotFound) until FCU marks them valid. +// +// FCU promoting non canonical chain to canonical: +// - FCU to startNum+2 marks the previously imported non canonical blocks valid +// and advances L2ELB canonical head to startNum+2. +// +// FCU targeting a block that cannot yet be validated (missing ancestors): +// - Triggers EL sync on L2EL (skeleton/backfill logs), returns SYNCING, +// and does not advance the head while ELP2P is still unavailable. +// +// Enabling ELP2P and eventual validation: +// - After peering L2ELB with L2EL, FCU to startNum+4 eventually becomes VALID +// once EL sync completes; the test waits for canonicalization and confirms head advances. +// - Subsequent gaps (to startNum+6, then +8) are resolved by FCU with +// WaitUntilValid, advancing the canonical head each time. +// +// NewPayload still does not initiate EL sync: +// - A NewPayload to startNum+10 returns SYNCING and the block remains unknown by number +// until an FCU is issued, which initially returns SYNCING. +// +// Insights +// - NewPayload alone never initiates EL sync, but can build a non canonical chain if state exists. +// - FCU is the mechanism that (a) promotes non canonical chain blocks to canonical when they are +// already fully validated, and (b) triggers EL sync when ancestors are missing. +// - Previously submitted NewPayloads that returned SYNCING are not retained to automatically +// assemble a non canonical chain later. +// - With ELP2P enabled, repeated FCU attempts eventually validate and advance the canonical chain. +func TestL2ELP2PCanonicalChainAdvancedByFCU(gt *testing.T) { + t := devtest.SerialT(gt) + sys := presets.NewSingleChainMultiNodeWithoutCheck(t) + require := t.Require() + logger := t.Logger() + + // Advance few blocks to make sure reference node advanced + sys.L2CL.Advanced(types.LocalUnsafe, 10, 30) + + sys.L2CLB.Stop() + + // At this point, L2ELB has no ELP2P, and L2CL connection + startNum := sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number + + // NewPayload does not trigger the EL Sync + // Example logs from L2EL(geth) + // New skeleton head announced + // Ignoring payload with missing parent + targetNum := startNum + 3 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsSyncing() + + // NewPayload does not trigger the EL Sync + // Example logs from L2EL(geth) + // New skeleton head announced + // Ignoring payload with missing parent + targetNum = startNum + 5 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsSyncing() + + // NewPayload does not trigger the EL Sync + // Example logs from L2EL(geth) + // New skeleton head announced + // Ignoring payload with missing parent + targetNum = startNum + 4 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsSyncing() + + // NewPayload can extend non canonical chain because L2EL has state for startNum and can validate payload + // Example logs from L2EL(geth) + // Inserting block without sethead + // Persisted trie from memory database + // Imported new potential chain segment + targetNum = startNum + 1 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsValid() + logger.Info("Non canonical chain advanced", "number", targetNum) + + // NewPayload can extend non canonical chain because L2EL has state for startNum+1 and can validate payload + // Example logs from L2EL(geth) + // Inserting block without sethead + // Persisted trie from memory database + // Imported new potential chain segment + targetNum = startNum + 2 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsValid() + logger.Info("Non canonical chain advanced", "number", targetNum) + + // Non canonical chain can be fetched via blockhash + blockRef := sys.L2EL.BlockRefByNumber(targetNum) + nonCan := sys.L2ELB.BlockRefByHash(blockRef.Hash) + require.Equal(uint64(targetNum), nonCan.Number) + require.Equal(blockRef.Hash, nonCan.Hash) + // Still targetNum block is non canonicalized + _, err := sys.L2ELB.Escape().L2EthClient().BlockRefByNumber(t.Ctx(), targetNum) + require.ErrorIs(err, ethereum.NotFound) + + // Previously inserted payloads are not used to make non-canonical chain automatically + blockRef = sys.L2EL.BlockRefByNumber(startNum + 3) + _, err = sys.L2ELB.Escape().EthClient().BlockRefByHash(t.Ctx(), blockRef.Hash) + require.ErrorIs(err, ethereum.NotFound) + blockRef = sys.L2EL.BlockRefByNumber(startNum + 5) + _, err = sys.L2ELB.Escape().EthClient().BlockRefByHash(t.Ctx(), blockRef.Hash) + require.ErrorIs(err, ethereum.NotFound) + + // No FCU yet so head not advanced yet + require.Equal(startNum, sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // NewPayload does not trigger the EL Sync + // Example logs from L2EL(geth) + // New skeleton head announced + // Ignoring payload with missing parent + targetNum = startNum + 6 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsSyncing() + + // FCU marks startNum + 2 as valid, promoting non canonical blocks to canonical blocks + // Example logs from L2EL(geth) + // Extend chain + // Chain head was updated + targetNum = startNum + 2 + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).IsValid() + logger.Info("Canonical chain advanced", "number", targetNum) + + // Head advanced, canonical head bumped + require.Equal(uint64(targetNum), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // FCU to target block which cannot be validated, triggers EL Sync but ELP2P not yet available + // Example logs from L2EL(geth) + // New skeleton head announced + // created initial skeleton subchain + // Starting reverse header sync cycle + // Block synchronisation started + // Backfilling with the network + targetNum = startNum + 3 + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).IsSyncing() + + // head not advanced + require.Equal(uint64(startNum+2), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // FCU to target block which cannot be validated + // Example logs from L2EL(geth) + // New skeleton head announced + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).IsSyncing() + + // head not advanced + require.Equal(uint64(startNum+2), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // FCU to target block which cannot be validated + // Example logs from L2EL(geth) + // New skeleton head announced + targetNum = startNum + 4 + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).IsSyncing() + + // head not advanced + require.Equal(uint64(startNum+2), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // Finally peer for enabling ELP2P + sys.L2ELB.PeerWith(sys.L2EL) + + // We allow three attempts. Most of the time, two attempts are enough + // At first attempt, L2EL starts EL Sync, returing SYNCING. + // Before second attempt, L2EL finishes EL Sync, and updates targetNum as canonical + // At second attempt, L2EL returns VALID since targetNum is already canonical + attempts := 3 + + // FCU to target block which can be eventually validated, because ELP2P enabled + // Example logs from L2EL(geth) + // New skeleton head announced + // Backfilling with the network + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).IsSyncing() + + // Wait until L2EL finishes EL Sync and canonicalizes until targetNum + sys.L2ELB.Reached(eth.Unsafe, targetNum, 3) + + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).WaitUntilValid(attempts) + logger.Info("Canonical chain advanced", "number", targetNum) + + // head advanced + require.Equal(uint64(targetNum), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // FCU to target block which can be eventually validated, because ELP2P enabled + // Example logs from L2EL(geth) + // "Restarting sync cycle" reason="chain gapped, head: 4, newHead: 6" + targetNum = startNum + 6 + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).WaitUntilValid(attempts) + logger.Info("Canonical chain advanced", "number", targetNum) + + // head advanced + require.Equal(uint64(targetNum), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // FCU to target block which can be eventually validated, because ELP2P enabled + // Example logs from L2EL(geth) + // "Restarting sync cycle" reason="chain gapped, head: 6, newHead: 8" + targetNum = startNum + 8 + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).WaitUntilValid(attempts) + logger.Info("Canonical chain advanced", "number", targetNum) + + // head advanced + require.Equal(uint64(targetNum), sys.L2ELB.BlockRefByLabel(eth.Unsafe).Number) + + // NewPayload does not trigger EL Sync + targetNum = startNum + 10 + sys.L2ELB.NewPayload(sys.L2EL, targetNum).IsSyncing() + _, err = sys.L2ELB.Escape().L2EthClient().BlockRefByNumber(t.Ctx(), targetNum) + require.ErrorIs(err, ethereum.NotFound) + sys.L2ELB.ForkchoiceUpdate(sys.L2EL, targetNum, 0, 0, nil).IsSyncing() + + t.Cleanup(func() { + sys.L2CLB.Start() + sys.L2ELB.DisconnectPeerWith(sys.L2EL) + }) +} diff --git a/op-devstack/dsl/engine.go b/op-devstack/dsl/engine.go index 8cb26507322..b01659e7b6c 100644 --- a/op-devstack/dsl/engine.go +++ b/op-devstack/dsl/engine.go @@ -1,8 +1,12 @@ package dsl import ( + "errors" + "time" + "github.com/ethereum-optimism/optimism/op-devstack/devtest" "github.com/ethereum-optimism/optimism/op-service/eth" + "github.com/ethereum-optimism/optimism/op-service/retry" ) type NewPayloadResult struct { @@ -30,9 +34,10 @@ func (r *NewPayloadResult) IsValid() *NewPayloadResult { } type ForkchoiceUpdateResult struct { - T devtest.T - Result *eth.ForkchoiceUpdatedResult - Err error + T devtest.T + Refresh func() + Result *eth.ForkchoiceUpdatedResult + Err error } func (r *ForkchoiceUpdateResult) IsForkchoiceUpdatedStatus(status eth.ExecutePayloadStatus) *ForkchoiceUpdateResult { @@ -52,3 +57,19 @@ func (r *ForkchoiceUpdateResult) IsValid() *ForkchoiceUpdateResult { r.T.Require().NoError(r.Err) return r } + +func (r *ForkchoiceUpdateResult) WaitUntilValid(attempts int) *ForkchoiceUpdateResult { + tryCnt := 0 + err := retry.Do0(r.T.Ctx(), attempts, &retry.FixedStrategy{Dur: 1 * time.Second}, + func() error { + r.Refresh() + tryCnt += 1 + if r.Result.PayloadStatus.Status != eth.ExecutionValid { + r.T.Log("Wait for FCU to return valid", "status", r.Result.PayloadStatus.Status, "try_count", tryCnt) + return errors.New("still syncing") + } + return nil + }) + r.T.Require().NoError(err) + return r +} diff --git a/op-devstack/dsl/l2_el.go b/op-devstack/dsl/l2_el.go index af38089dfad..5d72ad4328b 100644 --- a/op-devstack/dsl/l2_el.go +++ b/op-devstack/dsl/l2_el.go @@ -218,6 +218,10 @@ func (el *L2ELNode) PeerWith(peer *L2ELNode) { sysgo.ConnectP2P(el.ctx, el.require, el.inner.L2EthClient().RPC(), peer.inner.L2EthClient().RPC()) } +func (el *L2ELNode) DisconnectPeerWith(peer *L2ELNode) { + sysgo.DisconnectP2P(el.ctx, el.require, el.inner.L2EthClient().RPC(), peer.inner.L2EthClient().RPC()) +} + func (el *L2ELNode) PayloadByNumber(number uint64) *eth.ExecutionPayloadEnvelope { payload, err := el.inner.L2EthExtendedClient().PayloadByNumber(el.ctx, number) el.require.NoError(err, "failed to get payload") @@ -226,6 +230,7 @@ func (el *L2ELNode) PayloadByNumber(number uint64) *eth.ExecutionPayloadEnvelope // NewPayload fetches payload for target number from the reference EL Node, and inserts the payload func (el *L2ELNode) NewPayload(refNode *L2ELNode, number uint64) *NewPayloadResult { + el.log.Info("NewPayload", "number", number, "refNode", refNode) payload := refNode.PayloadByNumber(number) status, err := el.inner.L2EngineClient().NewPayload(el.ctx, payload.ExecutionPayload, payload.ParentBeaconBlockRoot) return &NewPayloadResult{T: el.t, Status: status, Err: err} @@ -233,11 +238,19 @@ func (el *L2ELNode) NewPayload(refNode *L2ELNode, number uint64) *NewPayloadResu // ForkchoiceUpdate fetches FCU target hashes from the reference EL node, and FCU update with attributes func (el *L2ELNode) ForkchoiceUpdate(refNode *L2ELNode, unsafe, safe, finalized uint64, attr *eth.PayloadAttributes) *ForkchoiceUpdateResult { - state := ð.ForkchoiceState{ - HeadBlockHash: refNode.BlockRefByNumber(unsafe).Hash, - SafeBlockHash: refNode.BlockRefByNumber(safe).Hash, - FinalizedBlockHash: refNode.BlockRefByNumber(finalized).Hash, + result := &ForkchoiceUpdateResult{T: el.t} + refresh := func() { + el.log.Info("ForkchoiceUpdate", "unsafe", unsafe, "safe", safe, "finalized", finalized, "attr", attr, "refNode", refNode) + state := ð.ForkchoiceState{ + HeadBlockHash: refNode.BlockRefByNumber(unsafe).Hash, + SafeBlockHash: refNode.BlockRefByNumber(safe).Hash, + FinalizedBlockHash: refNode.BlockRefByNumber(finalized).Hash, + } + res, err := el.inner.L2EngineClient().ForkchoiceUpdate(el.ctx, state, attr) + result.Result = res + result.Err = err } - result, err := el.inner.L2EngineClient().ForkchoiceUpdate(el.ctx, state, attr) - return &ForkchoiceUpdateResult{T: el.t, Result: result, Err: err} + result.Refresh = refresh + result.Refresh() + return result } diff --git a/op-devstack/sysgo/l2_el_p2p_util.go b/op-devstack/sysgo/l2_el_p2p_util.go index fc7adf04f36..a97633d069f 100644 --- a/op-devstack/sysgo/l2_el_p2p_util.go +++ b/op-devstack/sysgo/l2_el_p2p_util.go @@ -64,6 +64,29 @@ func ConnectP2P(ctx context.Context, require *testreq.Assertions, initiator RpcC require.NoError(err, "The peer was not connected") } +// DisconnectP2P disconnects a p2p peer connection between node1 and node2. +func DisconnectP2P(ctx context.Context, require *testreq.Assertions, initiator RpcCaller, acceptor RpcCaller) { + var targetInfo p2p.NodeInfo + require.NoError(acceptor.CallContext(ctx, &targetInfo, "admin_nodeInfo"), "get node info") + + var peerRemoved bool + require.NoError(initiator.CallContext(ctx, &peerRemoved, "admin_removePeer", targetInfo.ENR), "add peer") + require.True(peerRemoved, "should have removed peer successfully") + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + err := wait.For(ctx, time.Second, func() (bool, error) { + var peers []peer + if err := initiator.CallContext(ctx, &peers, "admin_peers"); err != nil { + return false, err + } + return !slices.ContainsFunc(peers, func(p peer) bool { + return p.ID == targetInfo.ID + }), nil + }) + require.NoError(err, "The peer was not removed") +} + type peer struct { ID string `json:"id"` }