Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,13 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
var diskRoot common.Hash
if bc.cacheConfig.SnapshotLimit > 0 {
diskRoot = rawdb.ReadSnapshotRoot(bc.db)
// The stored snapshot root may be a zkStateRoot (Poseidon hash) written
// by an older code path. Translate it to the on-disk mptStateRoot so that
// the backward walk below can compare apples-to-apples with translated
// block roots.
if mptRoot, err := rawdb.ReadDiskStateRoot(bc.db, diskRoot); err == nil {
diskRoot = mptRoot
}
}
if diskRoot != (common.Hash{}) {
log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash(), "snaproot", diskRoot)
Expand Down Expand Up @@ -558,9 +565,17 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, root common.Hash, repair bo
beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true)

for {
// If a root threshold was requested but not yet crossed, check
if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root {
beyondRoot, rootNumber = true, newHeadBlock.NumberU64()
// If a root threshold was requested but not yet crossed, check.
// The block root may be a zkStateRoot while the target root is
// an mptStateRoot, so also compare via the on-disk mapping.
if root != (common.Hash{}) && !beyondRoot {
blockRoot := newHeadBlock.Root()
if mptRoot, err := rawdb.ReadDiskStateRoot(bc.db, blockRoot); err == nil {
blockRoot = mptRoot
}
if blockRoot == root {
beyondRoot, rootNumber = true, newHeadBlock.NumberU64()
}
}
if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil {
log.Trace("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
Expand Down Expand Up @@ -845,8 +860,16 @@ func (bc *BlockChain) Stop() {
// Ensure that the entirety of the state snapshot is journalled to disk.
var snapBase common.Hash
if bc.snaps != nil {
// The snapshot disk layer is keyed by mptStateRoot (after ZK→MPT
// translation in generateSnapshot/Rebuild), but CurrentBlock().Root()
// may be a zkStateRoot. Resolve to mptStateRoot so that Journal()
// can find the layer in the snapshot tree.
journalRoot := bc.CurrentBlock().Root()
if mptRoot, err := rawdb.ReadDiskStateRoot(bc.db, journalRoot); err == nil {
journalRoot = mptRoot
}
var err error
if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
if snapBase, err = bc.snaps.Journal(journalRoot); err != nil {
log.Error("Failed to journal state snapshot", "err", err)
}
}
Expand Down
134 changes: 86 additions & 48 deletions core/state/pruner/pruner.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ type Pruner struct {
trieCachePath string
headHeader *types.Header
snaptree *snapshot.Tree
// snapDiskRoot is set when the snapshot journal was missing and we fell
// back to the persisted snapshot disk-layer root. Prune() uses it as the
// pruning target when no explicit root is provided.
snapDiskRoot common.Hash
}

// NewPruner creates the pruner instance.
Expand All @@ -94,28 +90,22 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6
return nil, errors.New("Failed to load head block")
}
snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, false)
var snapDiskRoot common.Hash
if err != nil {
// The snapshot journal may be missing because geth was not shut down
// cleanly (SIGKILL before BlockChain.Stop could write the journal).
// Fall back: initialise the snapshot tree with the persisted disk
// snapshot root so that Prune() can still target that state.
snapDiskRoot = rawdb.ReadSnapshotRoot(db)
if snapDiskRoot == (common.Hash{}) {
return nil, err // No snapshot at all — nothing we can do.
// The snapshot journal may be missing (unclean shutdown, or data copied
// from a running node). In that case in-memory diff layers are lost and
// the on-disk snapshot root lags behind the chain head by up to 128
// blocks. Retry with the persisted disk snapshot root so pruning can
// still target the available snapshot state.
diskRoot := rawdb.ReadSnapshotRoot(db)
if diskRoot == (common.Hash{}) {
return nil, err
}
log.Warn("Snapshot journal missing, falling back to snapshot disk-layer root",
"snapDiskRoot", snapDiskRoot, "chainHead", headBlock.Root())
// If the snapshot was mid-generation when the node was killed, New will
// resume and wait for generation to finish (async=false). This can take
// a long time for large state; the log below makes that visible.
log.Info("Loading snapshot from disk-layer root (may wait for snapshot generation to finish)...",
"snapDiskRoot", snapDiskRoot)
snaptree, err = snapshot.New(db, trie.NewDatabase(db), 256, snapDiskRoot, false, false, false)
log.Warn("Snapshot journal missing, falling back to disk snapshot root",
"diskRoot", diskRoot, "chainHead", headBlock.Root())
snaptree, err = snapshot.New(db, trie.NewDatabase(db), 256, diskRoot, false, false, false)
if err != nil {
return nil, err
}
log.Info("Snapshot ready", "snapDiskRoot", snapDiskRoot)
}
// Sanitize the bloom filter size if it's too small.
if bloomSize < 256 {
Expand All @@ -133,7 +123,6 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6
trieCachePath: trieCachePath,
headHeader: headBlock.Header(),
snaptree: snaptree,
snapDiskRoot: snapDiskRoot,
}, nil
}

Expand Down Expand Up @@ -214,10 +203,9 @@ func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, sta
// Firstly, flushing the target layer into the disk. After that all
// diff layers below the target will all be merged into the disk.
//
// Skip Cap when the root is already the disk layer (no diff layers exist).
// This happens in the fallback path where the snapshot journal was missing
// and we initialised the tree directly from the persisted disk root — Cap
// would otherwise return "snapshot is disk layer" and abort needlessly.
// Skip Cap when the root is already the disk layer (no diff layers to
// flatten). This happens when we fell back to DiskRoot() above because
// fewer than 128 diff layers were available.
if snaptree.DiskRoot() != root {
if err := snaptree.Cap(root, 0); err != nil {
return err
Expand Down Expand Up @@ -280,26 +268,40 @@ func (p *Pruner) Prune(root common.Hash) error {
// - in most of the normal cases, the related state is available
// - the probability of this layer being reorg is very low
var layers []snapshot.Snapshot
// If an explicit root was provided and it is a zkStateRoot, translate it
// to the locally-computed mptStateRoot before using it as the pruning target.
if root != (common.Hash{}) {
if mptRoot, err := rawdb.ReadDiskStateRoot(p.db, root); err == nil {
log.Info("Translated explicit ZK root to MPT root for pruning", "zkRoot", root, "mptRoot", mptRoot)
root = mptRoot
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if root == (common.Hash{}) {
// When the snapshot journal was missing (unclean shutdown), we fell
// back to the persisted disk snapshot root in NewPruner. Use that
// root directly as the pruning target instead of requiring 128 diff
// layers that don't exist.
if p.snapDiskRoot != (common.Hash{}) {
log.Info("Using snapshot disk-layer root as pruning target (journal was missing)",
"snapDiskRoot", p.snapDiskRoot)
root = p.snapDiskRoot
} else {
// Retrieve all snapshot layers from the current HEAD.
// In theory there are 128 difflayers + 1 disk layer present,
// so 128 diff layers are expected to be returned.
layers = p.snaptree.Snapshots(p.headHeader.Root, 128, true)
if len(layers) != 128 {
// Reject if the accumulated diff layers are less than 128. It
// means in most of normal cases, there is no associated state
// with bottom-most diff layer.
// Retrieve all snapshot layers from the current HEAD.
// In theory there are 128 difflayers + 1 disk layer present,
// so 128 diff layers are expected to be returned.
//
// The block header may carry a zkStateRoot (Poseidon hash) while
// snapshot diff layers are keyed by the locally-computed mptStateRoot.
// Translate before lookup so that Snapshots() can find the layer.
headRoot := p.headHeader.Root
if mptRoot, err := rawdb.ReadDiskStateRoot(p.db, headRoot); err == nil {
headRoot = mptRoot
}
layers = p.snaptree.Snapshots(headRoot, 128, true)
if len(layers) != 128 {
// Fewer than 128 diff layers available. This happens when the
// snapshot was recently rebuilt or the journal had no diff layers
// (e.g. clean shutdown right after a Rebuild). Fall back to the
// snapshot disk layer root so pruning can still proceed.
diskRoot := p.snaptree.DiskRoot()
if diskRoot == (common.Hash{}) {
return fmt.Errorf("snapshot not old enough yet: need %d more blocks", 128-len(layers))
}
log.Info("Fewer than 128 snapshot diff layers, using disk root as pruning target",
"layers", len(layers), "diskRoot", diskRoot)
root = diskRoot
} else {
// Use the bottom-most diff layer as the target
root = layers[len(layers)-1].Root()
}
Expand Down Expand Up @@ -406,7 +408,19 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err
// still feasible to recover the pruning correctly.
snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, true)
if err != nil {
return err // The relevant snapshot(s) might not exist
// Same fallback as NewPruner: journal may be missing (unclean shutdown
// or data copied from a running node). Retry with the persisted disk
// snapshot root so recovery can proceed.
diskRoot := rawdb.ReadSnapshotRoot(db)
if diskRoot == (common.Hash{}) {
return err
}
log.Warn("Snapshot journal missing in RecoverPruning, falling back to disk snapshot root",
"diskRoot", diskRoot, "chainHead", headBlock.Root())
snaptree, err = snapshot.New(db, trie.NewDatabase(db), 256, diskRoot, false, false, true)
if err != nil {
return err
}
}
stateBloom, err := NewStateBloomFromDisk(stateBloomPath)
if err != nil {
Expand All @@ -422,9 +436,16 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err

// All the state roots of the middle layers should be forcibly pruned,
// otherwise the dangling state will be left.
//
// Translate the head root: block headers carry zkStateRoot while snapshot
// layers are keyed by mptStateRoot.
headRoot := headBlock.Root()
if mptRoot, err := rawdb.ReadDiskStateRoot(db, headRoot); err == nil {
headRoot = mptRoot
}
var (
found bool
layers = snaptree.Snapshots(headBlock.Root(), 128, true)
layers = snaptree.Snapshots(headRoot, 128, true)
middleRoots = make(map[common.Hash]struct{})
)
for _, layer := range layers {
Expand All @@ -434,6 +455,17 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err
}
middleRoots[layer.Root()] = struct{}{}
}
if !found {
// The stateBloomRoot may be the disk layer itself (when the original
// prune used DiskRoot() as target via the fallback path). Snapshots()
// with nodisk=true excludes the disk layer, so check explicitly.
if snaptree.DiskRoot() == stateBloomRoot {
found = true
// middleRoots stays empty — no diff layers between disk and target
log.Info("Pruning target is the snapshot disk layer, resuming recovery",
"stateBloomRoot", stateBloomRoot)
}
}
if !found {
log.Error("Pruning target state is not existent")
return errors.New("non-existent target state")
Expand All @@ -452,7 +484,13 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
if genesis == nil {
return errors.New("missing genesis block")
}
t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db))
// The genesis block root may be a zkStateRoot; resolve to the
// on-disk mptStateRoot so trie.NewSecure can find the nodes.
genesisRoot := genesis.Root()
if mptRoot, err := rawdb.ReadDiskStateRoot(db, genesisRoot); err == nil {
genesisRoot = mptRoot
}
t, err := trie.NewSecure(genesisRoot, trie.NewDatabase(db))
if err != nil {
return err
}
Expand Down Expand Up @@ -531,11 +569,11 @@ const warningLog = `

WARNING!

The clean trie cache is not found. Please delete it by yourself after the
The clean trie cache is not found. Please delete it by yourself after the
pruning. Remember don't start the Geth without deleting the clean trie cache
otherwise the entire database may be damaged!

Check the command description "geth snapshot prune-zk-state --help" for more details.
Check the command description "geth snapshot prune-state --help" for more details.
`

func deleteCleanTrieCache(path string) {
Expand Down
17 changes: 17 additions & 0 deletions core/state/snapshot/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,23 @@ func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) {
// database and head block asynchronously. The snapshot is returned immediately
// and generation is continued in the background until done.
func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) *diskLayer {
// For MPT nodes that sync ZK-era blocks the block header carries a
// zkStateRoot (Poseidon hash) while the on-disk trie is keyed by the
// locally-computed mptStateRoot (Keccak256 hash). The snapshot trie
// walk uses trie.New(root, triedb) directly — it does NOT go through
// cachingDB.OpenTrie, so it never sees the ReadDiskStateRoot redirect.
// If we leave root as the zkStateRoot the trie lookup fails immediately
// with "missing trie node", the generator goroutine blocks on genAbort
// forever, and waitBuild() hangs the caller.
//
// Resolve the zkStateRoot → mptStateRoot mapping before doing anything
// else so that both WriteSnapshotRoot and the generator goroutine use
// the correct on-disk root.
if mptRoot, err := rawdb.ReadDiskStateRoot(diskdb, root); err == nil {
log.Info("Snapshot generation: resolved ZK state root to MPT root",
"zkRoot", root, "mptRoot", mptRoot)
root = mptRoot
}
// Create a new disk layer with an initialized state marker at zero
var (
stats = &generatorStats{start: time.Now()}
Expand Down
28 changes: 19 additions & 9 deletions core/state/snapshot/journal.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,27 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
// which is below the snapshot. In this case the snapshot can be recovered
// by re-executing blocks but right now it's unavailable.
if head := snapshot.Root(); head != root {
// If it's legacy snapshot, or it's new-format snapshot but
// it's not in recovery mode, returns the error here for
// rebuilding the entire snapshot forcibly.
if !recovery {
// Special case: MPT nodes syncing ZK-era blocks store the snapshot
// under the locally-computed mptStateRoot while the block header
// (and therefore the `root` argument) carries the zkStateRoot.
// If the on-disk snapshot root equals the MPT translation of the
// requested root, the snapshot is perfectly valid — accept it
// without triggering a costly rebuild.
if translated, err := rawdb.ReadDiskStateRoot(diskdb, root); err == nil && head == translated {
log.Info("Snapshot root is MPT translation of block root — accepting",
"blockRoot", root, "mptRoot", head)
} else if !recovery {
// If it's legacy snapshot, or it's new-format snapshot but
// it's not in recovery mode, returns the error here for
// rebuilding the entire snapshot forcibly.
return nil, false, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
} else {
// It's in snapshot recovery, the assumption is held that
// the disk layer is always higher than chain head. It can
// be eventually recovered when the chain head beyonds the
// disk layer.
log.Warn("Snapshot is not continuous with chain", "snaproot", head, "chainroot", root)
}
// It's in snapshot recovery, the assumption is held that
// the disk layer is always higher than chain head. It can
// be eventually recovered when the chain head beyonds the
// disk layer.
log.Warn("Snapshot is not continuous with chain", "snaproot", head, "chainroot", root)
}
// Everything loaded correctly, resume any suspended operations
if !generator.Done {
Expand Down
8 changes: 5 additions & 3 deletions core/state/snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,9 +730,11 @@ func (t *Tree) Rebuild(root common.Hash) {
// Start generating a new snapshot from scratch on a background thread. The
// generator will run a wiper first if there's not one running right now.
log.Info("Rebuilding state snapshot")
t.layers = map[common.Hash]snapshot{
root: generateSnapshot(t.diskdb, t.triedb, t.cache, root),
}
base := generateSnapshot(t.diskdb, t.triedb, t.cache, root)
// generateSnapshot may have translated root (e.g. zkStateRoot → mptStateRoot
// for MPT nodes syncing ZK-era blocks). Use base.root as the map key so that
// all subsequent Snapshot()/Update() lookups find the layer correctly.
t.layers = map[common.Hash]snapshot{base.root: base}
}

// AccountIterator creates a new account iterator for the specified root hash and
Expand Down
Loading
Loading