diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 5884fe6fd..4b5d4fc3c 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -30,7 +30,6 @@ import ( "morph-l2/node/sequencer/mock" "morph-l2/node/sync" "morph-l2/node/types" - "morph-l2/node/validator" ) func main() { @@ -99,10 +98,6 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return fmt.Errorf("failed to create syncer, error: %v", err) } - validatorCfg := validator.NewConfig() - if err := validatorCfg.SetCliContext(ctx); err != nil { - return fmt.Errorf("validator set cli context error: %v", err) - } l1Client, err := ethclient.Dial(derivationCfg.L1.Addr) if err != nil { return fmt.Errorf("dial l1 node error:%v", err) @@ -111,12 +106,7 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return fmt.Errorf("NewRollup error:%v", err) } - vt, err := validator.NewValidator(validatorCfg, rollup, nodeConfig.Logger) - if err != nil { - return fmt.Errorf("new validator client error: %v", err) - } - - dvNode, err = derivation.NewDerivationClient(context.Background(), derivationCfg, syncer, store, vt, rollup, nodeConfig.Logger) + dvNode, err = derivation.NewDerivationClient(context.Background(), derivationCfg, syncer, store, rollup, nodeConfig.Logger) if err != nil { return fmt.Errorf("new derivation client error: %v", err) } diff --git a/node/db/keys.go b/node/db/keys.go index b0d50ddcd..336b4abcc 100644 --- a/node/db/keys.go +++ b/node/db/keys.go @@ -7,7 +7,12 @@ var ( L1MessagePrefix = []byte("l1") BatchBlockNumberPrefix = []byte("batch") - derivationL1HeightKey = []byte("LastDerivationL1Height") + derivationL1HeightKey = []byte("LastDerivationL1Height") + derivationL1BlockPrefix = []byte("derivL1Block") + + // SPEC-005: safe / finalized head anchors. Each value is an RLP-encoded HeadAnchor. + derivationSafeHeadKey = []byte("DerivationSafeHead") + derivationFinalizedHeadKey = []byte("DerivationFinalizedHead") ) // encodeBlockNumber encodes an L1 enqueue index as big endian uint64 @@ -26,3 +31,8 @@ func L1MessageKey(enqueueIndex uint64) []byte { func BatchBlockNumberKey(batchIndex uint64) []byte { return append(BatchBlockNumberPrefix, encodeEnqueueIndex(batchIndex)...) } + +// DerivationL1BlockKey = derivationL1BlockPrefix + l1Height (uint64 big endian) +func DerivationL1BlockKey(l1Height uint64) []byte { + return append(derivationL1BlockPrefix, encodeEnqueueIndex(l1Height)...) +} diff --git a/node/db/store.go b/node/db/store.go index 1a87a227c..cff4d319d 100644 --- a/node/db/store.go +++ b/node/db/store.go @@ -156,6 +156,120 @@ func (s *Store) WriteSyncedL1Messages(messages []types.L1Message, latestSynced u return batch.Write() } +// DerivationL1Block stores L1 block info for reorg detection. +type DerivationL1Block struct { + Number uint64 + Hash [32]byte +} + +// DerivationHeadAnchor pairs an L2 head with the L1 origin that justifies its +// current safety stage. Persisted form of derivation.HeadAnchor (kept in this +// package to avoid an import cycle between db and derivation). +type DerivationHeadAnchor struct { + L2Number uint64 + L2Hash [32]byte + L1Number uint64 + L1Hash [32]byte +} + +func (s *Store) writeHeadAnchor(key []byte, anchor *DerivationHeadAnchor) { + data, err := rlp.EncodeToBytes(anchor) + if err != nil { + panic(fmt.Sprintf("failed to RLP encode DerivationHeadAnchor, err: %v", err)) + } + if err := s.db.Put(key, data); err != nil { + panic(fmt.Sprintf("failed to write DerivationHeadAnchor, err: %v", err)) + } +} + +func (s *Store) readHeadAnchor(key []byte) *DerivationHeadAnchor { + data, err := s.db.Get(key) + if err != nil && !isNotFoundErr(err) { + panic(fmt.Sprintf("failed to read DerivationHeadAnchor, err: %v", err)) + } + if len(data) == 0 { + return nil + } + var anchor DerivationHeadAnchor + if err := rlp.DecodeBytes(data, &anchor); err != nil { + panic(fmt.Sprintf("invalid DerivationHeadAnchor RLP, err: %v", err)) + } + return &anchor +} + +// WriteDerivationSafeHead persists the safe-stage L2 head together with its L1 origin. +func (s *Store) WriteDerivationSafeHead(anchor *DerivationHeadAnchor) { + s.writeHeadAnchor(derivationSafeHeadKey, anchor) +} + +// ReadDerivationSafeHead reads the safe-stage L2 head, or nil if unset. +func (s *Store) ReadDerivationSafeHead() *DerivationHeadAnchor { + return s.readHeadAnchor(derivationSafeHeadKey) +} + +// WriteDerivationFinalizedHead persists the finalized-stage L2 head together with its L1 origin. +func (s *Store) WriteDerivationFinalizedHead(anchor *DerivationHeadAnchor) { + s.writeHeadAnchor(derivationFinalizedHeadKey, anchor) +} + +// ReadDerivationFinalizedHead reads the finalized-stage L2 head, or nil if unset. +func (s *Store) ReadDerivationFinalizedHead() *DerivationHeadAnchor { + return s.readHeadAnchor(derivationFinalizedHeadKey) +} + +func (s *Store) WriteDerivationL1Block(block *DerivationL1Block) { + data, err := rlp.EncodeToBytes(block) + if err != nil { + panic(fmt.Sprintf("failed to RLP encode DerivationL1Block, err: %v", err)) + } + if err := s.db.Put(DerivationL1BlockKey(block.Number), data); err != nil { + panic(fmt.Sprintf("failed to write DerivationL1Block, err: %v", err)) + } +} + +func (s *Store) ReadDerivationL1Block(l1Height uint64) *DerivationL1Block { + data, err := s.db.Get(DerivationL1BlockKey(l1Height)) + if err != nil && !isNotFoundErr(err) { + panic(fmt.Sprintf("failed to read DerivationL1Block, err: %v", err)) + } + if len(data) == 0 { + return nil + } + var block DerivationL1Block + if err := rlp.DecodeBytes(data, &block); err != nil { + panic(fmt.Sprintf("invalid DerivationL1Block RLP, err: %v", err)) + } + return &block +} + +func (s *Store) ReadDerivationL1BlockRange(from, to uint64) []*DerivationL1Block { + var blocks []*DerivationL1Block + for h := from; h <= to; h++ { + b := s.ReadDerivationL1Block(h) + if b != nil { + blocks = append(blocks, b) + } + } + return blocks +} + +func (s *Store) DeleteDerivationL1BlocksFrom(height uint64) { + batch := s.db.NewBatch() + for h := height; ; h++ { + key := DerivationL1BlockKey(h) + has, err := s.db.Has(key) + if err != nil || !has { + break + } + if err := batch.Delete(key); err != nil { + panic(fmt.Sprintf("failed to delete DerivationL1Block at %d, err: %v", h, err)) + } + } + if err := batch.Write(); err != nil { + panic(fmt.Sprintf("failed to write batch delete for DerivationL1Blocks, err: %v", err)) + } +} + func isNotFoundErr(err error) bool { return err.Error() == leveldb.ErrNotFound.Error() || err.Error() == types.ErrMemoryDBNotFound.Error() } diff --git a/node/derivation/DERIVATION_REFACTOR.md b/node/derivation/DERIVATION_REFACTOR.md new file mode 100644 index 000000000..9f94940a1 --- /dev/null +++ b/node/derivation/DERIVATION_REFACTOR.md @@ -0,0 +1,163 @@ +# Derivation Refactor: Batch Verification & L1 Reorg Detection + +## Background + +The derivation module is the core component that syncs L2 state from L1 batch data. Previously it only ran on validator nodes and used a challenge mechanism when state mismatches were detected. This refactor makes two fundamental changes: + +1. **L1 batch data is the source of truth** — when local L2 blocks don't match L1 batch data, roll back and re-derive from L1 instead of issuing a challenge. +2. **Support `latest` mode** for fetching L1 batches (instead of only `finalized`), with L1 reorg detection to handle the reduced confirmation window. + +## Design Principles + +- **L2 rollback is only triggered by batch data mismatch**, never by L1 reorg alone. + - L1 reorg → clean up DB → re-derive from reorg point → batch comparison decides if L2 needs rollback. + - Most L1 reorgs just re-include the same batch tx in a different block — L2 stays valid. +- **Derivation can run as a verification thread** — when blocks already exist locally (e.g. produced by sequencer), derivation compares them against L1 batch data instead of skipping. + +## What Changed + +### Removed + +| Item | Reason | +|------|--------| +| `validator` field in `Derivation` struct | Challenge mechanism removed | +| `validator.Validator` parameter in `NewDerivationClient()` | No longer needed | +| `ChallengeState` / `ChallengeEnable` logic in `derivationBlock()` | Replaced by rollback + re-derive | +| `validator` import in `node/cmd/node/main.go` | No longer referenced | + +### Added — L1 Reorg Detection + +When `confirmations` is not `finalized` (i.e. using `latest` or `safe`), each derivation loop checks recent L1 blocks for hash changes before processing new batches. + +**New DB layer** (`node/db/`): + +- `DerivationL1Block` struct — stores `{Number, Hash}` per L1 block +- `WriteDerivationL1Block` / `ReadDerivationL1Block` / `ReadDerivationL1BlockRange` / `DeleteDerivationL1BlocksFrom` +- DB key prefix: `derivL1Block` + uint64 big-endian height + +**New config** (`node/derivation/config.go`): + +- `ReorgCheckDepth uint64` — how many recent L1 blocks to verify each loop (default: 64) +- CLI flag: `--derivation.reorgCheckDepth` / env `MORPH_NODE_DERIVATION_REORG_CHECK_DEPTH` + +**New methods** (`node/derivation/derivation.go`): + +| Method | Purpose | +|--------|---------| +| `detectReorg(ctx)` | Iterates recent L1 block hashes from DB, compares against current L1 chain. Returns the height where a mismatch is found, or nil. | +| `handleL1Reorg(height)` | Cleans DB records from the reorg point and resets `latestDerivationL1Height`. Does NOT rollback L2 — the next derivation loop re-fetches batches and the normal comparison logic decides. | +| `recordL1Blocks(ctx, from, to)` | After each derivation round, records L1 block hashes for the processed range. | + +**Flow**: + +```text +derivationBlock() loop start +│ +├─ [if not finalized] detectReorg() +│ ├─ no reorg → continue +│ └─ reorg at height X → handleL1Reorg(X) +│ ├─ DeleteDerivationL1BlocksFrom(X) +│ ├─ WriteLatestDerivationL1Height(X-1) +│ └─ return (next loop re-processes from X) +│ +├─ fetch CommitBatch logs from L1 +├─ process each batch → derive() + verifyBatchRoots() +├─ recordL1Blocks(start, end) +└─ WriteLatestDerivationL1Height(end) +``` + +### Added — Batch Data Verification + +When `derive()` encounters an L2 block that already exists locally, it now **compares** the block against the L1 batch data instead of blindly skipping it. + +**New methods**: + +| Method | Purpose | +|--------|---------| +| `verifyBlockContext(localHeader, blockData)` | Compares timestamp, gasLimit, baseFee between local L2 block header and batch block context. | +| `verifyBatchRoots(batchInfo, lastHeader)` | Compares stateRoot and withdrawalRoot between L1 batch and last derived L2 block. Extracted from the old inline logic. | +| `rollbackLocalChain(targetBlockNumber)` | **TODO stub** — will call geth `SetHead` API to rewind L2 chain. | + +**`derive()` new flow for each block in batch**: + +```text +block.Number <= latestBlockNumber? +├─ YES (block exists) +│ ├─ verifyBlockContext() passes → skip, continue +│ └─ verifyBlockContext() fails +│ ├─ IncBlockMismatchCount() +│ ├─ rollbackLocalChain(block.Number - 1) +│ └─ fall through to NewSafeL2Block (re-execute) +│ +└─ NO (new block) + └─ NewSafeL2Block (execute normally) +``` + +**`derivationBlock()` batch-level verification**: + +```text +After derive(batchInfo) completes: +│ +├─ verifyBatchRoots() passes → normal +└─ verifyBatchRoots() fails + ├─ IncRollbackCount() + ├─ rollbackLocalChain(firstBlockNumber - 1) + ├─ re-derive(batchInfo) + ├─ verifyBatchRoots() again + │ ├─ passes → recovered + │ └─ fails → CRITICAL error, stop (manual intervention needed) +``` + +### Added — Metrics + +| Metric | Type | Description | +|--------|------|-------------| +| `morphnode_derivation_l1_reorg_detected_total` | Counter | L1 reorg detection count | +| `morphnode_derivation_l2_rollback_total` | Counter | L2 rollbacks triggered by batch mismatch | +| `morphnode_derivation_block_mismatch_total` | Counter | Block-level context mismatches | +| `morphnode_derivation_halted` | Gauge | Set to 1 when derivation halts due to unrecoverable batch mismatch (alert on this) | + +## Modified Files + +| File | Changes | +|------|---------| +| `node/derivation/derivation.go` | Core refactor: removed validator/challenge, added reorg detection, batch verification, rollback flow | +| `node/derivation/database.go` | Extended `Reader`/`Writer` interfaces for L1 block hash tracking | +| `node/derivation/config.go` | Added `ReorgCheckDepth` config field | +| `node/derivation/metrics.go` | Added 3 new counter metrics | +| `node/db/keys.go` | Added `derivationL1BlockPrefix` and `DerivationL1BlockKey()` | +| `node/db/store.go` | Added `DerivationL1Block` struct and 4 CRUD methods | +| `node/flags/flags.go` | Added `DerivationReorgCheckDepth` CLI flag | +| `node/cmd/node/main.go` | Removed `validator` dependency from `NewDerivationClient` call | + +## TODO (follow-up work) + +### `rollbackLocalChain()` — geth SetHead integration + +Currently a stub that returns an error. Any batch mismatch will be detected and logged, but the +actual L2 chain rollback cannot proceed until this is implemented: + +1. Expose `SetL2Head(number uint64)` in `go-ethereum/eth/catalyst/l2_api.go` +2. Add `SetHead` method to `go-ethereum/ethclient/authclient` +3. Add `SetHead` method to `node/types/retryable_client.go` +4. Call `d.l2Client.SetHead(d.ctx, targetBlockNumber)` in `rollbackLocalChain()` + +Note: geth already has `BlockChain.SetHead(head uint64) error` — we just need to expose it through the engine API chain. + +### Transaction-level verification + +`verifyBlockContext` currently checks timestamp, gasLimit, baseFee, and batch-internal tx count +consistency. Full transaction hash comparison against local blocks requires `BlockByNumber` RPC +on `RetryableClient`, which is not yet exposed. State root verification in `verifyBatchRoots` +covers transaction execution correctness as an indirect check. + +### Concurrency safety + +When running as a verification thread alongside a sequencer, concurrent access between block production and rollback needs locking. This will be handled separately. + +## How to Test + +1. **Existing behavior preserved**: Set `--derivation.confirmations` to finalized (default) — reorg detection and L1 block hash recording are both skipped, batch verification still runs. +2. **Latest mode**: Set `--derivation.confirmations` to `-2` (latest) — reorg detection activates, L1 block hashes are tracked. +3. **Reorg detection**: Simulate by modifying a saved L1 block hash in DB — next loop should detect and clean up. +4. **Batch verification**: When an existing L2 block matches L1 batch data, it logs "block verified" and skips. When mismatched, it logs the error and returns (rollback stub returns error, preventing silent continuation). diff --git a/node/derivation/admin_rpc.go b/node/derivation/admin_rpc.go new file mode 100644 index 000000000..01bfa0750 --- /dev/null +++ b/node/derivation/admin_rpc.go @@ -0,0 +1,67 @@ +package derivation + +import ( + "context" + "errors" + "fmt" + + "github.com/morph-l2/go-ethereum/common" +) + +// SPEC-005 §3.6 / §5.1 admin RPC: operator-triggered rollback entry point. +// +// Exposes the ability to roll the local L2 chain back to a target (number, +// hash) pair. Per tech-design §3.3, the rollback **must** match by hash — +// rolling back to a number alone is unsafe because it can silently land +// on a different fork after a reorg. +// +// Authentication and the concrete wire-up (registering this with the +// node's existing admin RPC server) are blocked on SPEC-005 §8 #2: +// - dev-mode only (current default below) +// - operator-only via a node-local UNIX socket +// - signed multisig request +// All three options keep the same public method signature. + +// AdminAPI groups operator-only RPC entry points exposed by the +// derivation pipeline. +// +// TODO(spec-005-admin-rpc): wire this into morph/node/cmd/node/main.go +// once SPEC-005 §8 #2 (auth) is decided. Until then, AdminAPI is +// constructible but unregistered; tests can still exercise it directly. +type AdminAPI struct { + d *Derivation +} + +// NewAdminAPI returns the operator-only API surface bound to the given +// Derivation instance. +func NewAdminAPI(d *Derivation) *AdminAPI { + return &AdminAPI{d: d} +} + +// SetL2Head requests a rollback of the local L2 chain to the supplied +// (number, hash). The implementation must verify that hash matches the +// local block at the given number before delegating to the rollback +// executor (SPEC-005 §5.1 / §5.2). +// +// Returns an error if: +// - the (number, hash) does not match the local canonical chain; +// - the target is below finalized_head (SPEC-005 §3.6: halted); +// - the rollback executor itself fails (the node enters halted). +func (a *AdminAPI) SetL2Head(ctx context.Context, number uint64, hash common.Hash) error { + if a == nil || a.d == nil { + return errors.New("admin API not bound to a derivation instance") + } + + if err := a.d.checkRollbackBoundary(number); err != nil { + return err + } + + // TODO(spec-005-admin-rpc): + // 1. Authenticate the request (SPEC-005 §8 #2). + // 2. Verify hash matches local block at `number` via l2Client. + // 3. Acquire sequencerMutex.AcquireRollback() / defer release. + // 4. Call into rollbackLocalChain(number) — currently returns + // "not implemented" because the underlying go-ethereum + // hash-matched SetHead interface (SPEC-005 §8 #4) is not finalised. + return fmt.Errorf("admin SetL2Head not yet implemented (number=%d, hash=%s)", number, hash.Hex()) +} diff --git a/node/derivation/config.go b/node/derivation/config.go index 9d896f0b6..93bd5fd7f 100644 --- a/node/derivation/config.go +++ b/node/derivation/config.go @@ -29,6 +29,9 @@ const ( // DefaultLogProgressInterval is the frequency at which we log progress. DefaultLogProgressInterval = time.Second * 10 + + // DefaultReorgCheckDepth is the number of recent L1 blocks to check for reorgs. + DefaultReorgCheckDepth = uint64(64) ) type Config struct { @@ -41,6 +44,7 @@ type Config struct { PollInterval time.Duration `json:"poll_interval"` LogProgressInterval time.Duration `json:"log_progress_interval"` FetchBlockRange uint64 `json:"fetch_block_range"` + ReorgCheckDepth uint64 `json:"reorg_check_depth"` MetricsPort uint64 `json:"metrics_port"` MetricsHostname string `json:"metrics_hostname"` MetricsServerEnable bool `json:"metrics_server_enable"` @@ -54,6 +58,7 @@ func DefaultConfig() *Config { PollInterval: DefaultPollInterval, LogProgressInterval: DefaultLogProgressInterval, FetchBlockRange: DefaultFetchBlockRange, + ReorgCheckDepth: DefaultReorgCheckDepth, L2: new(types.L2Config), } } @@ -109,6 +114,9 @@ func (c *Config) SetCliContext(ctx *cli.Context) error { return errors.New("invalid fetchBlockRange") } } + if ctx.GlobalIsSet(flags.DerivationReorgCheckDepth.Name) { + c.ReorgCheckDepth = ctx.GlobalUint64(flags.DerivationReorgCheckDepth.Name) + } l2EthAddr := ctx.GlobalString(flags.L2EthAddr.Name) l2EngineAddr := ctx.GlobalString(flags.L2EngineAddr.Name) diff --git a/node/derivation/database.go b/node/derivation/database.go index a63f4eba1..c3922e935 100644 --- a/node/derivation/database.go +++ b/node/derivation/database.go @@ -1,6 +1,7 @@ package derivation import ( + "morph-l2/node/db" "morph-l2/node/sync" ) @@ -12,8 +13,18 @@ type Database interface { type Reader interface { ReadLatestDerivationL1Height() *uint64 + ReadDerivationL1Block(l1Height uint64) *db.DerivationL1Block + ReadDerivationL1BlockRange(from, to uint64) []*db.DerivationL1Block + // SPEC-005: safe / finalized head anchors. + ReadDerivationSafeHead() *db.DerivationHeadAnchor + ReadDerivationFinalizedHead() *db.DerivationHeadAnchor } type Writer interface { WriteLatestDerivationL1Height(latest uint64) + WriteDerivationL1Block(block *db.DerivationL1Block) + DeleteDerivationL1BlocksFrom(height uint64) + // SPEC-005: safe / finalized head anchors. + WriteDerivationSafeHead(anchor *db.DerivationHeadAnchor) + WriteDerivationFinalizedHead(anchor *db.DerivationHeadAnchor) } diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index d5bf58681..e972bd426 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -27,7 +27,6 @@ import ( nodecommon "morph-l2/node/common" "morph-l2/node/sync" "morph-l2/node/types" - "morph-l2/node/validator" ) var ( @@ -42,7 +41,6 @@ type Derivation struct { RollupContractAddress common.Address confirmations rpc.BlockNumber l2Client *types.RetryableClient - validator *validator.Validator logger tmlog.Logger rollup *bindings.Rollup metrics *Metrics @@ -60,9 +58,11 @@ type Derivation struct { startHeight uint64 baseHeight uint64 fetchBlockRange uint64 + reorgCheckDepth uint64 pollInterval time.Duration logProgressInterval time.Duration stop chan struct{} + halted bool // set when an unrecoverable mismatch is detected but rollback is not yet implemented } type DeployContractBackend interface { @@ -72,7 +72,7 @@ type DeployContractBackend interface { ethereum.TransactionReader } -func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, db Database, validator *validator.Validator, rollup *bindings.Rollup, logger tmlog.Logger) (*Derivation, error) { +func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, db Database, rollup *bindings.Rollup, logger tmlog.Logger) (*Derivation, error) { l1Client, err := ethclient.Dial(cfg.L1.Addr) if err != nil { return nil, err @@ -122,7 +122,6 @@ func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, db: db, l1Client: l1Client, syncer: syncer, - validator: validator, rollup: rollup, rollupABI: rollupAbi, legacyRollupABI: legacyRollupAbi, @@ -136,6 +135,7 @@ func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, startHeight: cfg.StartHeight, baseHeight: cfg.BaseHeight, fetchBlockRange: cfg.FetchBlockRange, + reorgCheckDepth: cfg.ReorgCheckDepth, pollInterval: cfg.PollInterval, logProgressInterval: cfg.LogProgressInterval, metrics: metrics, @@ -182,6 +182,33 @@ func (d *Derivation) Stop() { } func (d *Derivation) derivationBlock(ctx context.Context) { + if d.halted { + d.logger.Error("derivation halted due to unrecoverable batch mismatch, manual intervention required") + return + } + + // Step 1: Check for L1 reorg (only meaningful when not using finalized) + if d.confirmations != rpc.FinalizedBlockNumber { + reorgAt, err := d.detectReorg(ctx) + if err != nil { + d.logger.Error("reorg detection failed", "err", err) + return + } + if reorgAt != nil { + d.logger.Info("L1 reorg detected, invoking reorg handler", "reorgAtL1Height", *reorgAt) + d.metrics.IncReorgCount() + if err := d.handleL1Reorg(*reorgAt); err != nil { + d.logger.Error("handle L1 reorg failed", "err", err) + } + // Always return after reorg detection — don't continue processing in + // the same loop. Let the next poll interval re-fetch from the reset + // height. This avoids recording potentially unstable L1 block hashes + // if the chain is still reorging. + return + } + } + + // Step 2: Determine L1 scan range latestDerivation := d.db.ReadLatestDerivationL1Height() latest, err := d.getLatestConfirmedBlockNumber(d.ctx) if err != nil { @@ -201,7 +228,9 @@ func (d *Derivation) derivationBlock(ctx context.Context) { } else if latest-start >= d.fetchBlockRange { end = start + d.fetchBlockRange } - d.logger.Info("derivation start pull rollupData form l1", "startBlock", start, "end", end) + d.logger.Info("derivation start pull rollupData from l1", "startBlock", start, "end", end) + + // Step 3: Fetch CommitBatch logs logs, err := d.fetchRollupLog(ctx, start, end) if err != nil { d.logger.Error("eth_getLogs failed", "err", err) @@ -215,6 +244,7 @@ func (d *Derivation) derivationBlock(ctx context.Context) { d.metrics.SetLatestBatchIndex(latestBatchIndex.Uint64()) d.logger.Info("fetched rollup tx", "txNum", len(logs), "latestBatchIndex", latestBatchIndex) + // Step 4: Process each batch for _, lg := range logs { batchInfo, err := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) if err != nil { @@ -227,51 +257,75 @@ func (d *Derivation) derivationBlock(ctx context.Context) { d.logger.Info("fetch rollup transaction success", "txNonce", batchInfo.nonce, "txHash", batchInfo.txHash, "l1BlockNumber", batchInfo.l1BlockNumber, "firstL2BlockNumber", batchInfo.firstBlockNumber, "lastL2BlockNumber", batchInfo.lastBlockNumber) - // derivation + // Derive or verify blocks lastHeader, err := d.derive(batchInfo) if err != nil { d.logger.Error("derive blocks interrupt", "error", err) return } - // only last block of batch + if lastHeader == nil { + d.logger.Error("derive returned nil header, skipping empty batch", "batchIndex", batchInfo.batchIndex) + continue + } + d.logger.Info("batch derivation complete", "batch_index", batchInfo.batchIndex, "currentBatchEndBlock", lastHeader.Number.Uint64()) d.metrics.SetL2DeriveHeight(lastHeader.Number.Uint64()) d.metrics.SetSyncedBatchIndex(batchInfo.batchIndex) + if lastHeader.Number.Uint64() <= d.baseHeight { continue } - withdrawalRoot, err := d.L2ToL1MessagePasser.MessageRoot(&bind.CallOpts{ - BlockNumber: lastHeader.Number, - }) - if err != nil { - d.logger.Error("get withdrawal root failed", "error", err) - return - } - - rootMismatch := !bytes.Equal(lastHeader.Root.Bytes(), batchInfo.root.Bytes()) - withdrawalMismatch := !bytes.Equal(withdrawalRoot[:], batchInfo.withdrawalRoot.Bytes()) - if rootMismatch || withdrawalMismatch { + // Verify state root and withdrawal root against L1 batch data + if err := d.verifyBatchRoots(batchInfo, lastHeader); err != nil { + d.logger.Error("batch root verification failed, attempting rollback and re-derive", + "batchIndex", batchInfo.batchIndex, "error", err) d.metrics.SetBatchStatus(stateException) - // TODO The challenge switch is currently on and will be turned on in the future - if d.validator != nil && d.validator.ChallengeEnable() { - if err := d.validator.ChallengeState(batchInfo.batchIndex); err != nil { - d.logger.Error("challenge state failed", "batchIndex", batchInfo.batchIndex, "error", err) - return - } + d.metrics.IncRollbackCount() + + rollbackTarget := batchInfo.firstBlockNumber - 1 + if err := d.rollbackLocalChain(rollbackTarget); err != nil { + d.logger.Error("rollback failed, halting derivation to prevent infinite retry", + "target", rollbackTarget, "batchIndex", batchInfo.batchIndex, "error", err) + d.halted = true + d.metrics.SetHalted() + return } - d.logger.Error("root hash or withdrawal hash is not equal", - "originStateRootHash", batchInfo.root, - "deriveStateRootHash", lastHeader.Root.Hex(), - "batchWithdrawalRoot", batchInfo.withdrawalRoot.Hex(), - "deriveWithdrawalRoot", common.BytesToHash(withdrawalRoot[:]).Hex(), - ) - return + + // Re-derive the batch using L1 batch data as source of truth + lastHeader, err = d.derive(batchInfo) + if err != nil { + d.logger.Error("re-derive after rollback failed", "error", err) + return + } + if lastHeader == nil { + d.logger.Error("re-derive returned nil header after rollback", "batchIndex", batchInfo.batchIndex) + return + } + + // Verify again after re-derive + if err := d.verifyBatchRoots(batchInfo, lastHeader); err != nil { + d.logger.Error("CRITICAL: batch roots still mismatch after rollback and re-derive, halting derivation", + "batchIndex", batchInfo.batchIndex, "error", err) + d.halted = true + d.metrics.SetHalted() + return + } + d.logger.Info("rollback and re-derive succeeded", "batchIndex", batchInfo.batchIndex) } + d.metrics.SetBatchStatus(stateNormal) d.metrics.SetL1SyncHeight(lg.BlockNumber) } + // Step 5: Record L1 block hashes for reorg detection (only needed for non-finalized modes) + if d.confirmations != rpc.FinalizedBlockNumber { + if err := d.recordL1Blocks(ctx, start, end); err != nil { + d.logger.Error("recordL1Blocks failed, will retry next loop", "err", err) + return + } + } + d.db.WriteLatestDerivationL1Height(end) d.metrics.SetL1SyncHeight(end) d.logger.Info("write latest derivation l1 height success", "l1BlockNumber", end) @@ -555,13 +609,32 @@ func (d *Derivation) derive(rollupData *BatchInfo) (*eth.Header, error) { return nil, fmt.Errorf("get derivation geth block number error:%v", err) } if blockData.SafeL2Data.Number <= latestBlockNumber { - d.logger.Info("new L2 Data block number less than latestBlockNumber", "safeL2DataNumber", blockData.SafeL2Data.Number, "latestBlockNumber", latestBlockNumber) - lastHeader, err = d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(blockData.SafeL2Data.Number))) + // Block already exists locally - verify it matches the batch data + localHeader, err := d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(blockData.SafeL2Data.Number))) if err != nil { return nil, fmt.Errorf("query header by number error:%v", err) } - continue + + if err := d.verifyBlockContext(localHeader, blockData); err != nil { + d.logger.Error("block context mismatch with L1 batch data, rollback required", + "blockNumber", blockData.Number, "error", err) + d.metrics.IncBlockMismatchCount() + + rollbackTarget := blockData.SafeL2Data.Number - 1 + if err := d.rollbackLocalChain(rollbackTarget); err != nil { + d.halted = true + d.metrics.SetHalted() + return nil, fmt.Errorf("rollback to %d failed (derivation halted): %v", rollbackTarget, err) + } + } else { + d.logger.Info("block verified against L1 batch data", + "blockNumber", blockData.Number) + lastHeader = localHeader + continue + } } + + // Execute the block (either new block or re-execution after rollback) err = func() error { ctx, cancel := context.WithTimeout(context.Background(), time.Duration(60)*time.Second) defer cancel() diff --git a/node/derivation/dual_channel.go b/node/derivation/dual_channel.go new file mode 100644 index 000000000..61a38723e --- /dev/null +++ b/node/derivation/dual_channel.go @@ -0,0 +1,49 @@ +package derivation + +import ( + "context" + "fmt" + "math/big" + + "github.com/morph-l2/go-ethereum/rpc" +) + +// SPEC-005 §3.2 "L1 双通道驱动": +// +// The derivation pipeline must consume two independent L1 cursors: +// - "safe" drives safe_head and is allowed to roll back when L1 reorgs out a batch. +// - "finalized" drives finalized_head; it is monotonic and never rolls back. +// +// The current main loop in derivationBlock() still consumes a single +// `d.confirmations` cursor (rpc.FinalizedBlockNumber by default). The helpers +// below are intentionally not yet wired into the main loop — switching the +// main loop is gated on the SPEC-005 §8 blocking decisions (anchor-window +// depth, sequencer mutex granularity). They are exposed now so that +// downstream tasks #5 / #6 / #7 can build on them without re-establishing +// the L1 access pattern from scratch. + +// fetchLatestSafeHeight returns the L1 block number of the latest "safe" head. +// +// "safe" here is the consensus-layer "safe" tag exposed via L1 RPC, not a +// confirmations-derived height. Use this to drive safe_head. +func (d *Derivation) fetchLatestSafeHeight(ctx context.Context) (uint64, error) { + return d.fetchTaggedHeight(ctx, rpc.SafeBlockNumber, "safe") +} + +// fetchLatestFinalizedHeight returns the L1 block number of the latest +// "finalized" head. Use this to drive finalized_head; the result is +// expected to be monotonic across calls. +func (d *Derivation) fetchLatestFinalizedHeight(ctx context.Context) (uint64, error) { + return d.fetchTaggedHeight(ctx, rpc.FinalizedBlockNumber, "finalized") +} + +func (d *Derivation) fetchTaggedHeight(ctx context.Context, tag rpc.BlockNumber, label string) (uint64, error) { + header, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(tag))) + if err != nil { + return 0, fmt.Errorf("get L1 %s head: %w", label, err) + } + if header == nil || header.Number == nil { + return 0, fmt.Errorf("got nil header for L1 %s head", label) + } + return header.Number.Uint64(), nil +} diff --git a/node/derivation/head_anchor.go b/node/derivation/head_anchor.go new file mode 100644 index 000000000..c81625dbc --- /dev/null +++ b/node/derivation/head_anchor.go @@ -0,0 +1,64 @@ +package derivation + +import "morph-l2/node/db" + +// toDBAnchor converts the in-memory HeadAnchor to the persistent representation. +func (a HeadAnchor) toDBAnchor() *db.DerivationHeadAnchor { + return &db.DerivationHeadAnchor{ + L2Number: a.L2Number, + L2Hash: a.L2Hash, + L1Number: a.L1Number, + L1Hash: a.L1Hash, + } +} + +// headAnchorFromDB inflates a stored anchor back into the in-memory representation. +// Returns nil if the input is nil. +func headAnchorFromDB(a *db.DerivationHeadAnchor) *HeadAnchor { + if a == nil { + return nil + } + return &HeadAnchor{ + L2Number: a.L2Number, + L2Hash: a.L2Hash, + L1Number: a.L1Number, + L1Hash: a.L1Hash, + } +} + +// readSafeHead returns the persisted safe-stage anchor, or nil if unset. +func (d *Derivation) readSafeHead() *HeadAnchor { + return headAnchorFromDB(d.db.ReadDerivationSafeHead()) +} + +// readFinalizedHead returns the persisted finalized-stage anchor, or nil if unset. +func (d *Derivation) readFinalizedHead() *HeadAnchor { + return headAnchorFromDB(d.db.ReadDerivationFinalizedHead()) +} + +// writeSafeHead persists a new safe-stage anchor. +// +// Per SPEC-005 §3.5 ("Restart and consistency"), this should ideally be written +// atomically with the corresponding L1 anchor window updates so the node never +// observes a half-committed state across a restart. The current implementation +// uses single-key Put because the underlying KV store does not yet expose a +// transactional API; this is acceptable for now because: +// - safe head ratchets forward inside a single derivation loop iteration; +// - L1 anchor window writes are append-only and idempotent; +// - on crash mid-write, the next loop will re-derive from the last persisted +// LatestDerivationL1Height and re-establish consistency before advancing. +// +// TODO(spec-005): expose a multi-key atomic write helper on db.Store and +// migrate this + WriteDerivationL1Block + WriteLatestDerivationL1Height onto +// it once the rollback executor (P3) lands. +func (d *Derivation) writeSafeHead(anchor HeadAnchor) { + d.db.WriteDerivationSafeHead(anchor.toDBAnchor()) +} + +// writeFinalizedHead persists a new finalized-stage anchor. +// +// Per SPEC-005 §3.1, finalized_head is monotonic and never rolls back; callers +// must enforce this invariant before calling. +func (d *Derivation) writeFinalizedHead(anchor HeadAnchor) { + d.db.WriteDerivationFinalizedHead(anchor.toDBAnchor()) +} diff --git a/node/derivation/metrics.go b/node/derivation/metrics.go index da5e8937d..00cfe8302 100644 --- a/node/derivation/metrics.go +++ b/node/derivation/metrics.go @@ -18,12 +18,28 @@ const ( ) type Metrics struct { - L1SyncHeight metrics.Gauge - RollupL2Height metrics.Gauge - DeriveL2Height metrics.Gauge - BatchStatus metrics.Gauge - LatestBatchIndex metrics.Gauge - SyncedBatchIndex metrics.Gauge + L1SyncHeight metrics.Gauge + RollupL2Height metrics.Gauge + DeriveL2Height metrics.Gauge + BatchStatus metrics.Gauge + LatestBatchIndex metrics.Gauge + SyncedBatchIndex metrics.Gauge + ReorgCount metrics.Counter + RollbackCount metrics.Counter + BlockMismatchCount metrics.Counter + Halted metrics.Gauge + + // SPEC-005 head stages. + SafeHeadL2Number metrics.Gauge + FinalizedHeadL2Number metrics.Gauge + + // SPEC-005 §3.3 path B (degraded) verification trigger counter. + PathBTriggeredCount metrics.Counter + + // SPEC-005 §4.2 batch-root mismatch counter (separate from generic rollback + // count to distinguish "first attempt failed and re-derive succeeded" from + // "second attempt failed and we entered halted"). + BatchRootMismatchCount metrics.Counter } func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { @@ -68,6 +84,54 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "synced_batch_index", Help: "", }, labels).With(labelsAndValues...), + ReorgCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "l1_reorg_detected_total", + Help: "Total number of L1 reorgs detected", + }, labels).With(labelsAndValues...), + RollbackCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "l2_rollback_total", + Help: "Total number of L2 chain rollbacks triggered by batch mismatch", + }, labels).With(labelsAndValues...), + BlockMismatchCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "block_mismatch_total", + Help: "Total number of block context mismatches detected during verification", + }, labels).With(labelsAndValues...), + Halted: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "halted", + Help: "Set to 1 when derivation is halted due to unrecoverable batch mismatch requiring manual intervention", + }, labels).With(labelsAndValues...), + SafeHeadL2Number: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "safe_head_l2_number", + Help: "L2 block number of the latest safe-stage head (anchored to L1 safe)", + }, labels).With(labelsAndValues...), + FinalizedHeadL2Number: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "finalized_head_l2_number", + Help: "L2 block number of the latest finalized-stage head (anchored to L1 finalized; monotonic)", + }, labels).With(labelsAndValues...), + PathBTriggeredCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "path_b_triggered_total", + Help: "Total number of times batch-content verification fell back to local-rebuild path (SPEC-005 §3.3 path B)", + }, labels).With(labelsAndValues...), + BatchRootMismatchCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "batch_root_mismatch_total", + Help: "Total number of state-root or withdrawal-root mismatches against L1 batch calldata", + }, labels).With(labelsAndValues...), } } @@ -95,6 +159,38 @@ func (m *Metrics) SetSyncedBatchIndex(batchIndex uint64) { m.SyncedBatchIndex.Set(float64(batchIndex)) } +func (m *Metrics) IncReorgCount() { + m.ReorgCount.Add(1) +} + +func (m *Metrics) IncRollbackCount() { + m.RollbackCount.Add(1) +} + +func (m *Metrics) IncBlockMismatchCount() { + m.BlockMismatchCount.Add(1) +} + +func (m *Metrics) SetHalted() { + m.Halted.Set(1) +} + +func (m *Metrics) SetSafeHeadL2Number(n uint64) { + m.SafeHeadL2Number.Set(float64(n)) +} + +func (m *Metrics) SetFinalizedHeadL2Number(n uint64) { + m.FinalizedHeadL2Number.Set(float64(n)) +} + +func (m *Metrics) IncPathBTriggered() { + m.PathBTriggeredCount.Add(1) +} + +func (m *Metrics) IncBatchRootMismatchCount() { + m.BatchRootMismatchCount.Add(1) +} + func (m *Metrics) Serve(hostname string, port uint64) (*http.Server, error) { mux := http.NewServeMux() mux.Handle("/metrics", promhttp.Handler()) diff --git a/node/derivation/reorg.go b/node/derivation/reorg.go new file mode 100644 index 000000000..8773e3117 --- /dev/null +++ b/node/derivation/reorg.go @@ -0,0 +1,114 @@ +package derivation + +import ( + "context" + "fmt" + "math/big" + + "github.com/morph-l2/go-ethereum/common" + + "morph-l2/node/db" +) + +// detectReorg checks recent L1 blocks for hash mismatches indicating a reorg. +// Returns the L1 height where reorg was first detected, or nil if no reorg. +// +// Optimization: checks the newest saved block first. If it matches, there is +// no reorg (1 RPC call in the common case). Only when the newest block +// mismatches does it do a full oldest-to-newest scan to find the earliest +// divergence point. +func (d *Derivation) detectReorg(ctx context.Context) (*uint64, error) { + latestDerivation := d.db.ReadLatestDerivationL1Height() + if latestDerivation == nil { + return nil, nil + } + + checkFrom := d.startHeight + if *latestDerivation > d.reorgCheckDepth && (*latestDerivation-d.reorgCheckDepth) > checkFrom { + checkFrom = *latestDerivation - d.reorgCheckDepth + } + + savedBlocks := d.db.ReadDerivationL1BlockRange(checkFrom, *latestDerivation) + if len(savedBlocks) == 0 { + return nil, nil + } + + // Fast path: check the newest block first. If it matches, no reorg occurred. + newest := savedBlocks[len(savedBlocks)-1] + newestHeader, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(newest.Number))) + if err != nil { + return nil, fmt.Errorf("failed to get L1 header at %d: %w", newest.Number, err) + } + if newestHeader.Hash() == common.BytesToHash(newest.Hash[:]) { + return nil, nil + } + + // Slow path: reorg detected. Scan oldest-to-newest to find the earliest divergence. + for i := 0; i < len(savedBlocks); i++ { + block := savedBlocks[i] + header, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(block.Number))) + if err != nil { + return nil, fmt.Errorf("failed to get L1 header at %d: %w", block.Number, err) + } + savedHash := common.BytesToHash(block.Hash[:]) + if header.Hash() != savedHash { + d.logger.Info("L1 block hash mismatch detected", + "height", block.Number, + "savedHash", savedHash.Hex(), + "currentHash", header.Hash().Hex(), + ) + return &block.Number, nil + } + } + return nil, nil +} + +// handleL1Reorg handles an L1 reorg detected at the given L1 height. +// It only cleans up derivation DB state and resets the derivation L1 height +// so the next derivation loop re-processes from the reorg point. +// +// L1 reorg does NOT directly trigger an L2 rollback — in most cases the same +// batch tx gets re-included in a new L1 block with identical content, so L2 +// blocks remain valid. The normal derivation loop will re-fetch batches and +// run verifyBlockContext / verifyBatchRoots; only if those comparisons fail +// will an L2 rollback be triggered through rollbackLocalChain. +func (d *Derivation) handleL1Reorg(reorgAtL1Height uint64) error { + d.logger.Info("L1 reorg detected, cleaning DB records and restarting derivation from reorg point", + "reorgAtL1Height", reorgAtL1Height) + + d.db.DeleteDerivationL1BlocksFrom(reorgAtL1Height) + + if reorgAtL1Height > d.startHeight { + d.db.WriteLatestDerivationL1Height(reorgAtL1Height - 1) + } else { + // Reorg at or before startHeight — reset so next loop starts from startHeight. + if d.startHeight > 0 { + d.db.WriteLatestDerivationL1Height(d.startHeight - 1) + } else { + d.db.WriteLatestDerivationL1Height(0) + } + } + + return nil +} + +// recordL1Blocks saves L1 block hashes for reorg detection. +// Returns an error if any header fetch fails — the caller must not advance +// derivation height to avoid permanent gaps in L1 block hash tracking. +func (d *Derivation) recordL1Blocks(ctx context.Context, from, to uint64) error { + for h := from; h <= to; h++ { + header, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(h))) + if err != nil { + return fmt.Errorf("failed to get L1 header at %d: %w", h, err) + } + + var hashBytes [32]byte + copy(hashBytes[:], header.Hash().Bytes()) + + d.db.WriteDerivationL1Block(&db.DerivationL1Block{ + Number: h, + Hash: hashBytes, + }) + } + return nil +} diff --git a/node/derivation/sequencer_mutex.go b/node/derivation/sequencer_mutex.go new file mode 100644 index 000000000..3bd424e80 --- /dev/null +++ b/node/derivation/sequencer_mutex.go @@ -0,0 +1,64 @@ +package derivation + +import "sync" + +// SPEC-005 §3.6 / §4 sequencer ↔ derivation mutual exclusion. +// +// Per SPEC-005 §3.7 non-target "do not modify sequencer block production", +// the mutex is enforced **on the morph/node side of the L2Node interface +// (RequestBlockData / DeliverBlock)**. The tendermint consensus layer is +// *not* modified. +// +// This file provides the mutex primitive. Wiring on the sequencer entry +// points (morph/node/sequencer/...) is a separate task tracked in +// tech-design §6.2 task #11. +// +// Granularity (global stop-the-world vs interval lock) is a SPEC-005 §8 #5 +// open question. The default scaffold below is a single global RWMutex, +// which gives global exclusion; if interval locking is later chosen, the +// public API stays the same but the internal representation grows a per- +// range structure. Callers should therefore depend only on the methods, +// not on this being a single global lock. + +// SequencerMutex coordinates between block production and derivation +// rollback. Any path producing a new unsafe L2 block must acquire a +// production lock; the rollback executor takes an exclusive lock during +// the SetHead → metadata persistence sequence. +type SequencerMutex struct { + mu sync.RWMutex +} + +// NewSequencerMutex returns a fresh mutex. There is one such mutex per +// node process; sharing is established through the constructor wiring. +func NewSequencerMutex() *SequencerMutex { + return &SequencerMutex{} +} + +// AcquireProduction blocks until the rollback executor (if any) has +// released the exclusive lock, then reserves a slot for block production. +// Each call must be paired with a deferred ReleaseProduction. +// +// TODO(spec-005-mutex): once SPEC-005 §8 #5 picks interval locking, this +// signature gains a (from, to) range and the implementation switches to +// a per-range exclusion table. +func (m *SequencerMutex) AcquireProduction() { + m.mu.RLock() +} + +// ReleaseProduction releases a production reservation acquired via +// AcquireProduction. Safe to call from defer. +func (m *SequencerMutex) ReleaseProduction() { + m.mu.RUnlock() +} + +// AcquireRollback blocks until all in-flight production reservations have +// been released, then reserves the exclusive rollback slot. Each call must +// be paired with a deferred ReleaseRollback. +func (m *SequencerMutex) AcquireRollback() { + m.mu.Lock() +} + +// ReleaseRollback releases the exclusive rollback slot. Safe to call from defer. +func (m *SequencerMutex) ReleaseRollback() { + m.mu.Unlock() +} diff --git a/node/derivation/state.go b/node/derivation/state.go new file mode 100644 index 000000000..15ce7e553 --- /dev/null +++ b/node/derivation/state.go @@ -0,0 +1,57 @@ +package derivation + +import "fmt" + +// L2HeadStage represents the public-facing safety level of an L2 head per SPEC-005. +// +// State semantics: +// - StageUnsafe: Block executed locally; not yet anchored to any L1 batch. +// - StageSafe: Anchored to an L1 batch found on L1 `safe`; subject to rollback +// if the L1 batch reorgs out or batch verification fails. +// - StageFinalized: Anchored to an L1 batch whose origin is on L1 `finalized`. +// Monotonic; never rolls back. +// - StageHalted: Unrecoverable inconsistency (e.g. second batch-root mismatch +// after rollback, or a rollback target below FinalizedHead). +// Derivation refuses to advance until manual intervention. +// +// A node always advertises a single stage per head (one each for safe / finalized); +// halted is global to the derivation pipeline. +type L2HeadStage uint8 + +const ( + StageUnsafe L2HeadStage = iota + StageSafe + StageFinalized + StageHalted +) + +func (s L2HeadStage) String() string { + switch s { + case StageUnsafe: + return "unsafe" + case StageSafe: + return "safe" + case StageFinalized: + return "finalized" + case StageHalted: + return "halted" + default: + return fmt.Sprintf("unknown(%d)", uint8(s)) + } +} + +// HeadAnchor pairs an L2 head with the L1 origin that justifies its current +// safety stage. Both safe_head and finalized_head are persisted as HeadAnchor +// to allow detecting L1 reorgs that invalidate previously recorded anchors. +type HeadAnchor struct { + L2Number uint64 + L2Hash [32]byte + L1Number uint64 + L1Hash [32]byte +} + +// IsZero reports whether the anchor is uninitialized (e.g. at first node start +// before the first derivation loop has succeeded). +func (a HeadAnchor) IsZero() bool { + return a.L2Number == 0 && a.L1Number == 0 +} diff --git a/node/derivation/verify.go b/node/derivation/verify.go new file mode 100644 index 000000000..b57002602 --- /dev/null +++ b/node/derivation/verify.go @@ -0,0 +1,130 @@ +package derivation + +import ( + "bytes" + "fmt" + + "github.com/morph-l2/go-ethereum/accounts/abi/bind" + "github.com/morph-l2/go-ethereum/common" + eth "github.com/morph-l2/go-ethereum/core/types" +) + +// rollbackLocalChain rolls back the local L2 chain to the specified block number. +// +// SPEC-005 §3.6 / §5: triggered on block-context mismatch or batch-root mismatch. +// After rollback the caller re-derives the offending batch from L1 calldata. +// +// SPEC-005 §4 (safety considerations) requires the rollback to be atomic w.r.t. +// the sequencer's block-production path: the sequencer must not be able to +// produce a new unsafe block while the rollback is in flight. The atomic +// ordering is: +// +// 1. Acquire the sequencer ↔ derivation mutex (P3 — sequencer_mutex.go). +// 2. Pause sequencer block production (mutex blocks RequestBlockData / +// DeliverBlock entry points on the L2Node interface; tendermint +// consensus layer is not modified — see tech-design §3.2.2). +// 3. Pause this derivation loop (already serialized; the caller is the loop). +// 4. Call go-ethereum's hash-matched SetHead (SPEC-005 §8 #4 blocking item). +// 5. Clear derivation cursor for the rolled-back range. +// 6. Clear L1 anchor records for the discarded segment. +// 7. Atomically persist the new safe_head metadata (head_anchor.go). +// 8. Release the mutex. +// +// Boundary: target < finalized_head → halted (SPEC-005 §3.6); enforced before +// invoking the SetHead call. target < genesis → halted. +func (d *Derivation) rollbackLocalChain(targetBlockNumber uint64) error { + if err := d.checkRollbackBoundary(targetBlockNumber); err != nil { + return err + } + + d.logger.Error("L2 chain rollback not yet implemented", + "targetBlockNumber", targetBlockNumber) + + // TODO(spec-005-rollback): implement steps 1-8 above. Blocked on: + // - SPEC-005 §8 #2: sequencer mutex granularity (sequencer_mutex.go). + // - SPEC-005 §8 #4: go-ethereum hash-matched SetHead interface (must + // refuse to roll back if the supplied (number, hash) does not match + // the local canonical chain — see tech-design §3.3). + // - node/types/retryable_client.go SetHead wrapper once the upstream + // EL method is finalised. + return fmt.Errorf("rollback not implemented yet, target=%d", targetBlockNumber) +} + +// checkRollbackBoundary enforces the SPEC-005 §3.6 boundary: rolling back +// past finalized_head is fatal, regardless of why the caller wanted to. +func (d *Derivation) checkRollbackBoundary(targetBlockNumber uint64) error { + finalized := d.readFinalizedHead() + if finalized != nil && targetBlockNumber < finalized.L2Number { + // SPEC-005 §3.6 / §4.3: enter halted; no recovery short of manual + // intervention. The caller is expected to set d.halted in response. + return fmt.Errorf("rollback target %d below finalized_head %d — halted boundary", + targetBlockNumber, finalized.L2Number) + } + return nil +} + +// verifyBatchRoots verifies that the local state root and withdrawal root match the L1 batch data. +// +// SPEC-005 §3.4 / §3.2 invariant: this check is **always executed and never +// depends on blob data**. Both `batchInfo.root` (postStateRoot) and +// `batchInfo.withdrawalRoot` are extracted from L1 calldata at parse time +// (see batch_info.go); they reach this function regardless of whether the +// beacon-side blob fetch (Path A) or the local rebuild fallback (Path B, +// SPEC-005 §3.3) has succeeded. Code review must reject any change that +// makes this verification conditional on blob availability. +func (d *Derivation) verifyBatchRoots(batchInfo *BatchInfo, lastHeader *eth.Header) error { + withdrawalRoot, err := d.L2ToL1MessagePasser.MessageRoot(&bind.CallOpts{ + BlockNumber: lastHeader.Number, + }) + if err != nil { + return fmt.Errorf("get withdrawal root failed: %w", err) + } + + rootMismatch := !bytes.Equal(lastHeader.Root.Bytes(), batchInfo.root.Bytes()) + withdrawalMismatch := !bytes.Equal(withdrawalRoot[:], batchInfo.withdrawalRoot.Bytes()) + + if rootMismatch || withdrawalMismatch { + return fmt.Errorf("root mismatch: stateRoot(l1=%s, local=%s) withdrawalRoot(l1=%s, local=%s)", + batchInfo.root.Hex(), lastHeader.Root.Hex(), + batchInfo.withdrawalRoot.Hex(), common.BytesToHash(withdrawalRoot[:]).Hex()) + } + return nil +} + +// verifyBlockContext compares a local L2 block header against the batch block context from L1. +func (d *Derivation) verifyBlockContext(localHeader *eth.Header, blockData *BlockContext) error { + if localHeader.Time != blockData.Timestamp { + return fmt.Errorf("timestamp mismatch at block %d: local=%d, batch=%d", + blockData.Number, localHeader.Time, blockData.Timestamp) + } + if localHeader.GasLimit != blockData.GasLimit { + return fmt.Errorf("gasLimit mismatch at block %d: local=%d, batch=%d", + blockData.Number, localHeader.GasLimit, blockData.GasLimit) + } + switch { + case blockData.BaseFee != nil && localHeader.BaseFee != nil: + if localHeader.BaseFee.Cmp(blockData.BaseFee) != 0 { + return fmt.Errorf("baseFee mismatch at block %d: local=%s, batch=%s", + blockData.Number, localHeader.BaseFee.String(), blockData.BaseFee.String()) + } + case blockData.BaseFee == nil && localHeader.BaseFee == nil: + // Both nil — pre-EIP-1559 or legacy batch format, OK. + default: + // One side has BaseFee, the other doesn't — structural inconsistency. + return fmt.Errorf("baseFee nil mismatch at block %d: local=%v, batch=%v", + blockData.Number, localHeader.BaseFee, blockData.BaseFee) + } + // Batch internal consistency check: txsNum in the block context should match the + // actual number of transactions assembled in SafeL2Data (L1 messages + L2 txs). + // This catches batch parsing/corruption issues, not local-vs-L1 divergence. + // Local-vs-L1 transaction divergence is covered by state root verification + // in verifyBatchRoots (different txs → different state root). + if blockData.SafeL2Data != nil { + batchTxCount := len(blockData.SafeL2Data.Transactions) + if batchTxCount != int(blockData.txsNum) { + return fmt.Errorf("batch internal tx count inconsistency at block %d: blockContext.txsNum=%d, safeL2Data.Transactions=%d", + blockData.Number, blockData.txsNum, batchTxCount) + } + } + return nil +} diff --git a/node/derivation/verify_path_b.go b/node/derivation/verify_path_b.go new file mode 100644 index 000000000..48a6be0ea --- /dev/null +++ b/node/derivation/verify_path_b.go @@ -0,0 +1,94 @@ +package derivation + +import ( + "context" + "errors" +) + +// SPEC-005 §3.3 path B (degraded batch-content verification). +// +// When path A (online beacon blob) is unavailable, this path rebuilds the +// versioned blob hash from local L2 blocks and compares it against the +// blob hash recorded in L1 commitBatch calldata. State / withdrawal root +// verification (verify.go::verifyBatchRoots) runs independently and is +// never gated on either path; see SPEC-005 §3.4. +// +// Trigger conditions (must all hold per SPEC-005 §3.3): +// 1. Path A returned an empty / unavailable result for this batch. +// 2. The batch's last L2 block is at or below safe_head — i.e. the batch +// is in the historical tail, the only segment where blob retention +// can legitimately have lapsed. +// 3. The local node still holds every L2 block in the batch range. +// +// Default-on/off behaviour and whether to retry path A on success are the +// SPEC-005 §8 #3 open question. + +// errPathBUnavailable indicates the caller must fall back to the standard +// path-A failure handling (rollback / re-derive) — i.e. path B was either +// not eligible to run or failed to reproduce the blob hash. +var errPathBUnavailable = errors.New("path B unavailable") + +// verifyBatchContentPathB attempts the degraded path B verification for the +// given batch. Returns nil on success. +// +// Eligibility check (returns errPathBUnavailable when not eligible) is +// kept inside this function so callers can blindly invoke it as a +// fallback after path A has failed — there is no separate "isEligible" +// query to keep two-stage races out of the main loop. +func (d *Derivation) verifyBatchContentPathB(ctx context.Context, batchInfo *BatchInfo) error { + if err := ctx.Err(); err != nil { + return err + } + if !d.pathBEnabled() { + return errPathBUnavailable + } + if !d.pathBEligible(batchInfo) { + return errPathBUnavailable + } + + // TODO(spec-005-path-b): rebuild versioned blob hash from local L2 blocks. + // + // Implementation sketch: + // 1. For each L2 block in [batchInfo.firstBlockNumber, batchInfo.lastBlockNumber]: + // - fetch local block (already on disk; geth eth_getBlockByNumber). + // - encode tx list using node/types.MaxBlobBytesSize / RetrieveBlobBytes + // inverse: see node/types/blob.go for the path-A decode helpers. + // 2. Compress with node/zstd, slice to blob-sized chunks (see SPEC-002 batching). + // 3. For each chunk, compute kzg4844 commitment + versioned hash. + // 4. Compare ordered versioned hashes against batchInfo.blobHashes. + // + // This is gated on confirming there's no double-implementation cost vs the + // existing tx-submitter blob construction path (open question per + // tech-design §8 / per-module §5 #3); production-grade code should reuse + // existing helpers rather than reimplementing the encoder. + + d.metrics.IncPathBTriggered() + d.logger.Info("path B verification triggered (skeleton — not yet implemented)", + "batchIndex", batchInfo.batchIndex) + return errPathBUnavailable +} + +// pathBEnabled reports whether the operator has opted into the degraded path. +// +// TODO(spec-005-path-b): wire this to a flag once SPEC-005 §8 #3 is decided +// (default-on vs default-off). Until then, path B is permanently disabled. +func (d *Derivation) pathBEnabled() bool { + return false +} + +// pathBEligible reports whether path B can run for the given batch. +// Per SPEC-005 §3.3: batch must be historical (lastBlock <= safe_head) AND +// every L2 block in the range must exist locally. +func (d *Derivation) pathBEligible(batchInfo *BatchInfo) bool { + safe := d.readSafeHead() + if safe == nil { + return false + } + if batchInfo.lastBlockNumber > safe.L2Number { + // Live segment, not eligible — Path A failure here is a real anomaly. + return false + } + // TODO(spec-005-path-b): walk [first, last] confirming local presence. + // Skipped for skeleton — pathBEnabled() is false anyway. + return true +} diff --git a/node/flags/flags.go b/node/flags/flags.go index 19325a4b0..573090f9f 100644 --- a/node/flags/flags.go +++ b/node/flags/flags.go @@ -168,19 +168,6 @@ var ( EnvVar: prefixEnvVar("VALIDATOR"), } - ChallengeEnable = cli.BoolFlag{ - Name: "validator.challengeEnable", - Usage: "Enable the validator challenge", - EnvVar: prefixEnvVar("VALIDATOR_CHALLENGE_ENABLE"), - } - - // validator - ValidatorPrivateKey = cli.StringFlag{ - Name: "validator.privateKey", - Usage: "Private Key corresponding to SUBSIDY Owner", - EnvVar: prefixEnvVar("VALIDATOR_PRIVATE_KEY"), - } - // derivation RollupContractAddress = cli.StringFlag{ Name: "derivation.rollupAddress", @@ -265,6 +252,13 @@ var ( Usage: "The number of confirmations needed on L1 for finalization. If not set, the default value is l1.confirmations", EnvVar: prefixEnvVar("DERIVATION_CONFIRMATIONS"), } + + DerivationReorgCheckDepth = cli.Uint64Flag{ + Name: "derivation.reorgCheckDepth", + Usage: "Number of recent L1 blocks to check for reorgs (default: 64)", + EnvVar: prefixEnvVar("DERIVATION_REORG_CHECK_DEPTH"), + Value: 64, + } // Logger LogLevel = &cli.StringFlag{ Name: "log.level", @@ -351,10 +345,6 @@ var Flags = []cli.Flag{ TendermintConfigPath, MockEnabled, ValidatorEnable, - ChallengeEnable, - - // validator - ValidatorPrivateKey, // derivation RollupContractAddress, @@ -364,6 +354,7 @@ var Flags = []cli.Flag{ DerivationLogProgressInterval, DerivationFetchBlockRange, DerivationConfirmations, + DerivationReorgCheckDepth, L1BeaconAddr, // blocktag options diff --git a/node/ops-morph/docker-compose-validator.yml b/node/ops-morph/docker-compose-validator.yml index 09a1efa74..0b0bc4d63 100644 --- a/node/ops-morph/docker-compose-validator.yml +++ b/node/ops-morph/docker-compose-validator.yml @@ -21,7 +21,6 @@ services: ## todo need to replace it to a public network - MORPH_NODE_L1_ETH_RPC=http://host.docker.internal:9545 - MORPH_NODE_L1_ETH_BEACON_RPC=http://host.docker.internal:3500 - - MORPH_NODE_VALIDATOR_PRIVATE_KEY=0x0000000000000000000000000000000000000000000000000000000000000001 - MORPH_NODE_ROLLUP_ADDRESS=0xa513e6e4b8f2a923d98304ec87f64353c4d5c853 - MORPH_NODE_DERIVATION_START_HEIGHT=1 - MORPH_NODE_DERIVATION_FETCH_BLOCK_RANGE=1000 diff --git a/node/validator/config.go b/node/validator/config.go deleted file mode 100644 index 986fd16d5..000000000 --- a/node/validator/config.go +++ /dev/null @@ -1,46 +0,0 @@ -package validator - -import ( - "crypto/ecdsa" - "math/big" - "strings" - - "github.com/morph-l2/go-ethereum/common" - "github.com/morph-l2/go-ethereum/crypto" - "github.com/urfave/cli" - - "morph-l2/node/flags" -) - -type Config struct { - l1RPC string - PrivateKey *ecdsa.PrivateKey - L1ChainID *big.Int - rollupContract common.Address - challengeEnable bool -} - -func NewConfig() *Config { - return &Config{} -} - -func (c *Config) SetCliContext(ctx *cli.Context) error { - l1NodeAddr := ctx.GlobalString(flags.L1NodeAddr.Name) - l1ChainID := ctx.GlobalUint64(flags.L1ChainID.Name) - c.challengeEnable = ctx.GlobalBool(flags.ChallengeEnable.Name) - if c.challengeEnable { - hexPrvKey := ctx.GlobalString(flags.ValidatorPrivateKey.Name) - hex := strings.TrimPrefix(hexPrvKey, "0x") - privateKey, err := crypto.HexToECDSA(hex) - if err != nil { - return err - } - c.PrivateKey = privateKey - } - addrHex := ctx.GlobalString(flags.RollupContractAddress.Name) - rollupContract := common.HexToAddress(addrHex) - c.l1RPC = l1NodeAddr - c.L1ChainID = big.NewInt(int64(l1ChainID)) - c.rollupContract = rollupContract - return nil -} diff --git a/node/validator/validator.go b/node/validator/validator.go deleted file mode 100644 index 224c8c3d8..000000000 --- a/node/validator/validator.go +++ /dev/null @@ -1,118 +0,0 @@ -package validator - -import ( - "context" - "crypto/ecdsa" - "errors" - "fmt" - "math/big" - "time" - - "github.com/morph-l2/go-ethereum" - "github.com/morph-l2/go-ethereum/accounts/abi/bind" - ethtypes "github.com/morph-l2/go-ethereum/core/types" - "github.com/morph-l2/go-ethereum/ethclient" - "github.com/morph-l2/go-ethereum/log" - tmlog "github.com/tendermint/tendermint/libs/log" - - "morph-l2/bindings/bindings" -) - -type Validator struct { - cli DeployContractBackend - privateKey *ecdsa.PrivateKey - l1ChainID *big.Int - contract *bindings.Rollup - challengeEnable bool - logger tmlog.Logger -} - -type DeployContractBackend interface { - bind.DeployBackend - bind.ContractBackend -} - -func NewValidator(cfg *Config, rollup *bindings.Rollup, logger tmlog.Logger) (*Validator, error) { - cli, err := ethclient.Dial(cfg.l1RPC) - if err != nil { - return nil, fmt.Errorf("dial l1 node error:%v", err) - } - return &Validator{ - cli: cli, - contract: rollup, - privateKey: cfg.PrivateKey, - l1ChainID: cfg.L1ChainID, - challengeEnable: cfg.challengeEnable, - logger: logger, - }, nil -} - -func (v *Validator) SetLogger() { - v.logger = v.logger.With("module", "validator") -} - -func (v *Validator) ChallengeEnable() bool { - return v.challengeEnable -} - -func (v *Validator) ChallengeState(batchIndex uint64) error { - if !v.ChallengeEnable() { - return fmt.Errorf("the challenge is not enabled,please set challengeEnable is true") - } - opts, err := bind.NewKeyedTransactorWithChainID(v.privateKey, v.l1ChainID) - if err != nil { - return err - } - gasPrice, err := v.cli.SuggestGasPrice(opts.Context) - if err != nil { - return err - } - opts.GasPrice = gasPrice - opts.NoSend = true - batchHash, err := v.contract.CommittedBatches( - &bind.CallOpts{ - Pending: false, - Context: opts.Context, - }, - new(big.Int).SetUint64(batchIndex), - ) - if err != nil { - return err - } - tx, err := v.contract.ChallengeState(opts, batchIndex, batchHash) - if err != nil { - return err - } - log.Info("send ChallengeState transaction ", "txHash", tx.Hash().Hex()) - if err := v.cli.SendTransaction(context.Background(), tx); err != nil { - return err - } - // Wait for the receipt - receipt, err := waitForReceipt(v.cli, tx) - if err != nil { - return err - } - log.Info("Validator has already started the challenge", "hash", tx.Hash().Hex(), - "gas-used", receipt.GasUsed, "blocknumber", receipt.BlockNumber) - return nil -} - -func waitForReceipt(backend DeployContractBackend, tx *ethtypes.Transaction) (*ethtypes.Receipt, error) { - t := time.NewTicker(300 * time.Millisecond) - receipt := new(ethtypes.Receipt) - var err error - for range t.C { - receipt, err = backend.TransactionReceipt(context.Background(), tx.Hash()) - if errors.Is(err, ethereum.NotFound) { - continue - } - if err != nil { - return nil, err - } - if receipt != nil { - t.Stop() - break - } - } - return receipt, nil -} diff --git a/node/validator/validator_test.go b/node/validator/validator_test.go deleted file mode 100644 index 038a6f978..000000000 --- a/node/validator/validator_test.go +++ /dev/null @@ -1,48 +0,0 @@ -package validator - -import ( - "crypto/ecdsa" - "math/big" - "testing" - - "github.com/morph-l2/go-ethereum/accounts/abi/bind" - "github.com/morph-l2/go-ethereum/accounts/abi/bind/backends" - "github.com/morph-l2/go-ethereum/core" - "github.com/morph-l2/go-ethereum/core/rawdb" - "github.com/morph-l2/go-ethereum/crypto" - "github.com/morph-l2/go-ethereum/ethdb" - "github.com/morph-l2/go-ethereum/log" - "github.com/stretchr/testify/require" - - "morph-l2/bindings/bindings" -) - -func TestValidator_ChallengeState(t *testing.T) { - key, _ := crypto.GenerateKey() - sim, _ := newSimulatedBackend(key) - opts, err := bind.NewKeyedTransactorWithChainID(key, big.NewInt(1337)) - require.NoError(t, err) - addr, _, rollup, err := bindings.DeployRollup(opts, sim, 1337) - require.NoError(t, err) - sim.Commit() - v := Validator{ - cli: sim, - privateKey: key, - l1ChainID: big.NewInt(1), - contract: rollup, - challengeEnable: true, - } - err = v.ChallengeState(10) - log.Info("addr:", addr) - require.EqualError(t, err, "execution reverted: only challenger allowed") -} - -func newSimulatedBackend(key *ecdsa.PrivateKey) (*backends.SimulatedBackend, ethdb.Database) { - var gasLimit uint64 = 9_000_000 - auth, _ := bind.NewKeyedTransactorWithChainID(key, big.NewInt(1337)) - genAlloc := make(core.GenesisAlloc) - genAlloc[auth.From] = core.GenesisAccount{Balance: big.NewInt(9223372036854775807)} - db := rawdb.NewMemoryDatabase() - sim := backends.NewSimulatedBackendWithDatabase(db, genAlloc, gasLimit) - return sim, db -} diff --git a/ops/docker/docker-compose-4nodes.yml b/ops/docker/docker-compose-4nodes.yml index 32ea8b79b..370808b89 100644 --- a/ops/docker/docker-compose-4nodes.yml +++ b/ops/docker/docker-compose-4nodes.yml @@ -433,7 +433,6 @@ services: - MORPH_NODE_L1_ETH_RPC=${L1_ETH_RPC} - MORPH_NODE_L1_ETH_BEACON_RPC=${L1_BEACON_CHAIN_RPC} - MORPH_NODE_SYNC_DEPOSIT_CONTRACT_ADDRESS=${MORPH_PORTAL:-0x6900000000000000000000000000000000000001} - - MORPH_NODE_VALIDATOR_PRIVATE_KEY=ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 - MORPH_NODE_ROLLUP_ADDRESS=${MORPH_ROLLUP:-0x6900000000000000000000000000000000000010} - MORPH_NODE_DERIVATION_START_HEIGHT=1 - MORPH_NODE_SYNC_START_HEIGHT=1