-
Notifications
You must be signed in to change notification settings - Fork 16
pruner: use teeWriter and HEAD as prune target to support unclean shutdown #307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,26 @@ var ( | |
| emptyKeccakCodeHash = codehash.EmptyKeccakCodeHash.Bytes() | ||
| ) | ||
|
|
||
| // teeWriter writes to both the real database and the state bloom filter. | ||
| // This ensures GenerateTrie persists trie nodes to disk (filling gaps from | ||
| // unclean shutdowns) while also recording their hashes in the bloom filter | ||
| // for pruning protection. | ||
| type teeWriter struct { | ||
| db ethdb.KeyValueWriter | ||
| bloom *stateBloom | ||
| } | ||
|
|
||
| func (w *teeWriter) Put(key, value []byte) error { | ||
| if err := w.db.Put(key, value); err != nil { | ||
| return err | ||
| } | ||
| return w.bloom.Put(key, value) | ||
| } | ||
|
|
||
| func (w *teeWriter) Delete(key []byte) error { | ||
| return w.db.Delete(key) | ||
| } | ||
|
|
||
| // Pruner is an offline tool to prune the stale state with the | ||
| // help of the snapshot. The workflow of pruner is very simple: | ||
| // | ||
|
|
@@ -81,10 +101,6 @@ type Pruner struct { | |
| trieCachePath string | ||
| headHeader *types.Header | ||
| snaptree *snapshot.Tree | ||
| // snapDiskRoot is set when the snapshot journal was missing and we fell | ||
| // back to the persisted snapshot disk-layer root. Prune() uses it as the | ||
| // pruning target when no explicit root is provided. | ||
| snapDiskRoot common.Hash | ||
| } | ||
|
|
||
| // NewPruner creates the pruner instance. | ||
|
|
@@ -94,28 +110,8 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6 | |
| return nil, errors.New("Failed to load head block") | ||
| } | ||
| snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, false) | ||
| var snapDiskRoot common.Hash | ||
| if err != nil { | ||
| // The snapshot journal may be missing because geth was not shut down | ||
| // cleanly (SIGKILL before BlockChain.Stop could write the journal). | ||
| // Fall back: initialise the snapshot tree with the persisted disk | ||
| // snapshot root so that Prune() can still target that state. | ||
| snapDiskRoot = rawdb.ReadSnapshotRoot(db) | ||
| if snapDiskRoot == (common.Hash{}) { | ||
| return nil, err // No snapshot at all — nothing we can do. | ||
| } | ||
| log.Warn("Snapshot journal missing, falling back to snapshot disk-layer root", | ||
| "snapDiskRoot", snapDiskRoot, "chainHead", headBlock.Root()) | ||
| // If the snapshot was mid-generation when the node was killed, New will | ||
| // resume and wait for generation to finish (async=false). This can take | ||
| // a long time for large state; the log below makes that visible. | ||
| log.Info("Loading snapshot from disk-layer root (may wait for snapshot generation to finish)...", | ||
| "snapDiskRoot", snapDiskRoot) | ||
| snaptree, err = snapshot.New(db, trie.NewDatabase(db), 256, snapDiskRoot, false, false, false) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| log.Info("Snapshot ready", "snapDiskRoot", snapDiskRoot) | ||
| return nil, err // The relevant snapshot(s) might not exist | ||
| } | ||
| // Sanitize the bloom filter size if it's too small. | ||
| if bloomSize < 256 { | ||
|
|
@@ -133,7 +129,6 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6 | |
| trieCachePath: trieCachePath, | ||
| headHeader: headBlock.Header(), | ||
| snaptree: snaptree, | ||
| snapDiskRoot: snapDiskRoot, | ||
| }, nil | ||
| } | ||
|
|
||
|
|
@@ -213,15 +208,8 @@ func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, sta | |
| // Pruning is done, now drop the "useless" layers from the snapshot. | ||
| // Firstly, flushing the target layer into the disk. After that all | ||
| // diff layers below the target will all be merged into the disk. | ||
| // | ||
| // Skip Cap when the root is already the disk layer (no diff layers exist). | ||
| // This happens in the fallback path where the snapshot journal was missing | ||
| // and we initialised the tree directly from the persisted disk root — Cap | ||
| // would otherwise return "snapshot is disk layer" and abort needlessly. | ||
| if snaptree.DiskRoot() != root { | ||
| if err := snaptree.Cap(root, 0); err != nil { | ||
| return err | ||
| } | ||
| if err := snaptree.Cap(root, 0); err != nil { | ||
| return err | ||
| } | ||
| // Secondly, flushing the snapshot journal into the disk. All diff | ||
| // layers upon are dropped silently. Eventually the entire snapshot | ||
|
|
@@ -262,7 +250,8 @@ func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, sta | |
|
|
||
| // Prune deletes all historical state nodes except the nodes belong to the | ||
| // specified state version. If user doesn't specify the state version, use | ||
| // the bottom-most snapshot diff layer as the target. | ||
| // the HEAD state as the target so the node restarts at exactly the same | ||
| // height it stopped at (zero block rewind). | ||
| func (p *Pruner) Prune(root common.Hash) error { | ||
| // If the state bloom filter is already committed previously, | ||
| // reuse it for pruning instead of generating a new one. It's | ||
|
|
@@ -275,91 +264,31 @@ func (p *Pruner) Prune(root common.Hash) error { | |
| if stateBloomRoot != (common.Hash{}) { | ||
| return RecoverPruning(p.datadir, p.db, p.trieCachePath) | ||
| } | ||
| // If the target state root is not specified, use the HEAD-127 as the | ||
| // target. The reason for picking it is: | ||
| // - in most of the normal cases, the related state is available | ||
| // - the probability of this layer being reorg is very low | ||
| var layers []snapshot.Snapshot | ||
| if root == (common.Hash{}) { | ||
| // When the snapshot journal was missing (unclean shutdown), we fell | ||
| // back to the persisted disk snapshot root in NewPruner. Use that | ||
| // root directly as the pruning target instead of requiring 128 diff | ||
| // layers that don't exist. | ||
| if p.snapDiskRoot != (common.Hash{}) { | ||
| log.Info("Using snapshot disk-layer root as pruning target (journal was missing)", | ||
| "snapDiskRoot", p.snapDiskRoot) | ||
| root = p.snapDiskRoot | ||
| } else { | ||
| // Retrieve all snapshot layers from the current HEAD. | ||
| // In theory there are 128 difflayers + 1 disk layer present, | ||
| // so 128 diff layers are expected to be returned. | ||
| layers = p.snaptree.Snapshots(p.headHeader.Root, 128, true) | ||
| if len(layers) != 128 { | ||
| // Reject if the accumulated diff layers are less than 128. It | ||
| // means in most of normal cases, there is no associated state | ||
| // with bottom-most diff layer. | ||
| return fmt.Errorf("snapshot not old enough yet: need %d more blocks", 128-len(layers)) | ||
| } | ||
| // Use the bottom-most diff layer as the target | ||
| root = layers[len(layers)-1].Root() | ||
| } | ||
| } | ||
| // Ensure the root is really present. The weak assumption | ||
| // is the presence of root can indicate the presence of the | ||
| // entire trie. | ||
| if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 { | ||
| // The special case is for clique based networks(rinkeby, goerli | ||
| // and some other private networks), it's possible that two | ||
| // consecutive blocks will have same root. In this case snapshot | ||
| // difflayer won't be created. So HEAD-127 may not paired with | ||
| // head-127 layer. Instead the paired layer is higher than the | ||
| // bottom-most diff layer. Try to find the bottom-most snapshot | ||
| // layer with state available. | ||
| // | ||
| // Note HEAD and HEAD-1 is ignored. Usually there is the associated | ||
| // state available, but we don't want to use the topmost state | ||
| // as the pruning target. | ||
| var found bool | ||
| for i := len(layers) - 2; i >= 2; i-- { | ||
| if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 { | ||
| root = layers[i].Root() | ||
| found = true | ||
| log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i) | ||
| break | ||
| } | ||
| } | ||
| if !found { | ||
| if len(layers) > 0 { | ||
| return errors.New("no snapshot paired state") | ||
| } | ||
| return fmt.Errorf("associated state[%x] is not present", root) | ||
| } | ||
| // Use HEAD as the pruning target. On L2 chains reorgs are | ||
| // essentially non-existent, so the HEAD-127 safety margin | ||
| // from L1 is unnecessary. Combined with the teeWriter that | ||
| // persists the trie from snapshot, this guarantees zero | ||
| // height drop after pruning regardless of shutdown method. | ||
| root = p.headHeader.Root | ||
| log.Info("Selecting HEAD as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()) | ||
| } else { | ||
| if len(layers) > 0 { | ||
| log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127) | ||
| } else { | ||
| log.Info("Selecting user-specified state as the pruning target", "root", root) | ||
| } | ||
| log.Info("Selecting user-specified state as the pruning target", "root", root) | ||
| } | ||
| // Before start the pruning, delete the clean trie cache first. | ||
| // It's necessary otherwise in the next restart we will hit the | ||
| // deleted state root in the "clean cache" so that the incomplete | ||
| // state is picked for usage. | ||
| deleteCleanTrieCache(p.trieCachePath) | ||
|
|
||
| // All the state roots of the middle layer should be forcibly pruned, | ||
| // otherwise the dangling state will be left. | ||
| middleRoots := make(map[common.Hash]struct{}) | ||
| for _, layer := range layers { | ||
| if layer.Root() == root { | ||
| break | ||
| } | ||
| middleRoots[layer.Root()] = struct{}{} | ||
| } | ||
| // Traverse the target state, re-construct the whole state trie and | ||
| // commit to the given bloom filter. | ||
| // commit to both the database and the bloom filter. Writing to the | ||
| // database ensures the target trie is complete on disk even after | ||
| // an unclean shutdown, so the post-pruning restart lands exactly | ||
| // at the target block instead of rewinding further. | ||
| start := time.Now() | ||
| if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil { | ||
| writer := &teeWriter{db: p.db, bloom: p.stateBloom} | ||
| if err := snapshot.GenerateTrie(p.snaptree, root, p.db, writer); err != nil { | ||
| return err | ||
| } | ||
| // Traverse the genesis, put all genesis state entries into the | ||
|
|
@@ -374,7 +303,7 @@ func (p *Pruner) Prune(root common.Hash) error { | |
| return err | ||
| } | ||
| log.Info("State bloom filter committed", "name", filterName) | ||
| return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start) | ||
| return prune(p.snaptree, root, p.db, p.stateBloom, filterName, nil, start) | ||
| } | ||
|
|
||
| // RecoverPruning will resume the pruning procedure during the system restart. | ||
|
|
@@ -452,7 +381,14 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { | |
| if genesis == nil { | ||
| return errors.New("missing genesis block") | ||
| } | ||
| t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db)) | ||
| // genesis.Root() may be a zkTrie root (overridden via GenesisStateRoot | ||
| // for block hash compatibility). Resolve it to the actual MPT disk root | ||
| // so trie.NewSecure can open the trie. | ||
| genesisRoot := genesis.Root() | ||
| if mptRoot, err := rawdb.ReadDiskStateRoot(db, genesisRoot); err == nil { | ||
| genesisRoot = mptRoot | ||
| } | ||
| t, err := trie.NewSecure(genesisRoot, trie.NewDatabase(db)) | ||
|
Comment on lines
+384
to
+391
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Verify ReadDiskStateRoot behavior for missing keys and check if there's additional validation
echo "=== Check ReadDiskStateRoot implementation ==="
rg -A 10 "func ReadDiskStateRoot" --type go
echo ""
echo "=== Check if diskStateRootKey is used elsewhere for validation ==="
rg "diskStateRootKey" --type go -C 3
echo ""
echo "=== Check how trie.NewSecure handles empty root ==="
rg -A 5 "func NewSecure" --type go | head -30Repository: morph-l2/go-ethereum Length of output: 3857 🏁 Script executed: #!/bin/bash
echo "=== Check trie.New (called by NewSecure) with empty root ==="
rg -A 15 "^func New\(root common.Hash" trie/trie.go
echo ""
echo "=== Check if genesis root mapping is always written before read ==="
rg -B 5 -A 5 "WriteDiskStateRoot.*genesis" core/
echo ""
echo "=== Check genesis initialization in pruner or state package ==="
rg "WriteDiskStateRoot" core/state/ core/genesis.go -B 2 -A 2
echo ""
echo "=== Check if there's any nil root handling in trie operations ==="
rg "common.Hash\{\}" trie/ -C 2 | head -50Repository: morph-l2/go-ethereum Length of output: 6160 Missing validation for empty hash from The While Proposed fix: validate non-empty hash genesisRoot := genesis.Root()
- if mptRoot, err := rawdb.ReadDiskStateRoot(db, genesisRoot); err == nil {
+ if mptRoot, err := rawdb.ReadDiskStateRoot(db, genesisRoot); err == nil && mptRoot != (common.Hash{}) {
genesisRoot = mptRoot
}🤖 Prompt for AI Agents |
||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 19376
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1808
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 2607
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 124
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 646
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1369
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1626
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 254
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1932
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 2319
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 380
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 2538
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 139
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 46
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 353
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1796
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 449
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 165
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 696
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 630
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 197
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 104
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1988
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 859
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 947
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 608
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 1913
🏁 Script executed:
Repository: morph-l2/go-ethereum
Length of output: 46
User-specified roots cannot be validated against disk layer before Cap() call.
The unconditional
snaptree.Cap(root, 0)call assumesrootis a diff layer. Per theCap()implementation (core/state/snapshot/snapshot.go:370-378), it returns an error"snapshot [%#x] is disk layer"if passed a disk layer root.For auto-selected roots (when
root == empty), this is safe:Snapshots()withnodisk=truereturns only diff layers. However, whenrootis user-specified (lines 290+), only existence is validated—not that it's a non-disk layer. Similarly inRecoverPruning(), thestateBloomRootfromfindBloomFilter()is validated to exist in layers but could theoretically be the current disk root.While this would fail with a clear error message rather than silently corrupt data, adding a defensive check (e.g.,
if root == snaptree.DiskRoot() { return fmt.Errorf(...) }) before the Cap() call would provide earlier validation and clearer error messaging for invalid user input.🤖 Prompt for AI Agents
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Analyzed this and concluded the defensive check is unnecessary — all code paths are already safe:
Auto-selected root (
root == empty): defaults top.headHeader.Root, which is the chain HEAD. SinceNewPrunercreates the snapshot tree with the sameheadBlock.Root(), HEAD is always the topmost diff layer (if it were the disk layer, there'd be no diff layers and pruning would be meaningless). Safe.User-specified root: before reaching
Cap(root, 0), the code already callsGenerateTrie(snaptree, root, ...)at L291. If a user somehow passes the disk layer root,Cap()returns a clear error:"snapshot [%#x] is disk layer". No silent corruption or data loss.RecoverPruning:
stateBloomRootoriginates from a previousPrune()call (saved in the bloom filter filename). Additionally, L359-368 explicitly validatesstateBloomRootagainstSnapshots(nodisk=true), which only returns diff layers — a disk layer root would be filtered out and fail with"non-existent target state".Adding a pre-check would only shift the error a few lines earlier with a slightly friendlier message, but provides no functional benefit. Leaving as-is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@curryxbo, your analysis is well-reasoned and correct on all three points. The existing error surfacing fromCap()is clear and non-corrupting, and theSnapshots(nodisk=true)guard inRecoverPruningalready provides the necessary validation for the bloom-filter recovery path. The suggested pre-check would have been purely cosmetic — acknowledged, and happy to leave it as-is.(˶• ᴗ •˶)ᵒᵏᵎᵎ