diff --git a/core/state/pruner/pruner.go b/core/state/pruner/pruner.go index f1f56b0fc..54c99adc8 100644 --- a/core/state/pruner/pruner.go +++ b/core/state/pruner/pruner.go @@ -63,6 +63,26 @@ var ( emptyKeccakCodeHash = codehash.EmptyKeccakCodeHash.Bytes() ) +// teeWriter writes to both the real database and the state bloom filter. +// This ensures GenerateTrie persists trie nodes to disk (filling gaps from +// unclean shutdowns) while also recording their hashes in the bloom filter +// for pruning protection. +type teeWriter struct { + db ethdb.KeyValueWriter + bloom *stateBloom +} + +func (w *teeWriter) Put(key, value []byte) error { + if err := w.db.Put(key, value); err != nil { + return err + } + return w.bloom.Put(key, value) +} + +func (w *teeWriter) Delete(key []byte) error { + return w.db.Delete(key) +} + // Pruner is an offline tool to prune the stale state with the // help of the snapshot. The workflow of pruner is very simple: // @@ -81,10 +101,6 @@ type Pruner struct { trieCachePath string headHeader *types.Header snaptree *snapshot.Tree - // snapDiskRoot is set when the snapshot journal was missing and we fell - // back to the persisted snapshot disk-layer root. Prune() uses it as the - // pruning target when no explicit root is provided. - snapDiskRoot common.Hash } // NewPruner creates the pruner instance. @@ -94,28 +110,8 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6 return nil, errors.New("Failed to load head block") } snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, false) - var snapDiskRoot common.Hash if err != nil { - // The snapshot journal may be missing because geth was not shut down - // cleanly (SIGKILL before BlockChain.Stop could write the journal). - // Fall back: initialise the snapshot tree with the persisted disk - // snapshot root so that Prune() can still target that state. - snapDiskRoot = rawdb.ReadSnapshotRoot(db) - if snapDiskRoot == (common.Hash{}) { - return nil, err // No snapshot at all — nothing we can do. - } - log.Warn("Snapshot journal missing, falling back to snapshot disk-layer root", - "snapDiskRoot", snapDiskRoot, "chainHead", headBlock.Root()) - // If the snapshot was mid-generation when the node was killed, New will - // resume and wait for generation to finish (async=false). This can take - // a long time for large state; the log below makes that visible. - log.Info("Loading snapshot from disk-layer root (may wait for snapshot generation to finish)...", - "snapDiskRoot", snapDiskRoot) - snaptree, err = snapshot.New(db, trie.NewDatabase(db), 256, snapDiskRoot, false, false, false) - if err != nil { - return nil, err - } - log.Info("Snapshot ready", "snapDiskRoot", snapDiskRoot) + return nil, err // The relevant snapshot(s) might not exist } // Sanitize the bloom filter size if it's too small. if bloomSize < 256 { @@ -133,7 +129,6 @@ func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint6 trieCachePath: trieCachePath, headHeader: headBlock.Header(), snaptree: snaptree, - snapDiskRoot: snapDiskRoot, }, nil } @@ -213,15 +208,8 @@ func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, sta // Pruning is done, now drop the "useless" layers from the snapshot. // Firstly, flushing the target layer into the disk. After that all // diff layers below the target will all be merged into the disk. - // - // Skip Cap when the root is already the disk layer (no diff layers exist). - // This happens in the fallback path where the snapshot journal was missing - // and we initialised the tree directly from the persisted disk root — Cap - // would otherwise return "snapshot is disk layer" and abort needlessly. - if snaptree.DiskRoot() != root { - if err := snaptree.Cap(root, 0); err != nil { - return err - } + if err := snaptree.Cap(root, 0); err != nil { + return err } // Secondly, flushing the snapshot journal into the disk. All diff // layers upon are dropped silently. Eventually the entire snapshot @@ -262,7 +250,8 @@ func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, sta // Prune deletes all historical state nodes except the nodes belong to the // specified state version. If user doesn't specify the state version, use -// the bottom-most snapshot diff layer as the target. +// the HEAD state as the target so the node restarts at exactly the same +// height it stopped at (zero block rewind). func (p *Pruner) Prune(root common.Hash) error { // If the state bloom filter is already committed previously, // reuse it for pruning instead of generating a new one. It's @@ -275,71 +264,16 @@ func (p *Pruner) Prune(root common.Hash) error { if stateBloomRoot != (common.Hash{}) { return RecoverPruning(p.datadir, p.db, p.trieCachePath) } - // If the target state root is not specified, use the HEAD-127 as the - // target. The reason for picking it is: - // - in most of the normal cases, the related state is available - // - the probability of this layer being reorg is very low - var layers []snapshot.Snapshot if root == (common.Hash{}) { - // When the snapshot journal was missing (unclean shutdown), we fell - // back to the persisted disk snapshot root in NewPruner. Use that - // root directly as the pruning target instead of requiring 128 diff - // layers that don't exist. - if p.snapDiskRoot != (common.Hash{}) { - log.Info("Using snapshot disk-layer root as pruning target (journal was missing)", - "snapDiskRoot", p.snapDiskRoot) - root = p.snapDiskRoot - } else { - // Retrieve all snapshot layers from the current HEAD. - // In theory there are 128 difflayers + 1 disk layer present, - // so 128 diff layers are expected to be returned. - layers = p.snaptree.Snapshots(p.headHeader.Root, 128, true) - if len(layers) != 128 { - // Reject if the accumulated diff layers are less than 128. It - // means in most of normal cases, there is no associated state - // with bottom-most diff layer. - return fmt.Errorf("snapshot not old enough yet: need %d more blocks", 128-len(layers)) - } - // Use the bottom-most diff layer as the target - root = layers[len(layers)-1].Root() - } - } - // Ensure the root is really present. The weak assumption - // is the presence of root can indicate the presence of the - // entire trie. - if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 { - // The special case is for clique based networks(rinkeby, goerli - // and some other private networks), it's possible that two - // consecutive blocks will have same root. In this case snapshot - // difflayer won't be created. So HEAD-127 may not paired with - // head-127 layer. Instead the paired layer is higher than the - // bottom-most diff layer. Try to find the bottom-most snapshot - // layer with state available. - // - // Note HEAD and HEAD-1 is ignored. Usually there is the associated - // state available, but we don't want to use the topmost state - // as the pruning target. - var found bool - for i := len(layers) - 2; i >= 2; i-- { - if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 { - root = layers[i].Root() - found = true - log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i) - break - } - } - if !found { - if len(layers) > 0 { - return errors.New("no snapshot paired state") - } - return fmt.Errorf("associated state[%x] is not present", root) - } + // Use HEAD as the pruning target. On L2 chains reorgs are + // essentially non-existent, so the HEAD-127 safety margin + // from L1 is unnecessary. Combined with the teeWriter that + // persists the trie from snapshot, this guarantees zero + // height drop after pruning regardless of shutdown method. + root = p.headHeader.Root + log.Info("Selecting HEAD as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()) } else { - if len(layers) > 0 { - log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127) - } else { - log.Info("Selecting user-specified state as the pruning target", "root", root) - } + log.Info("Selecting user-specified state as the pruning target", "root", root) } // Before start the pruning, delete the clean trie cache first. // It's necessary otherwise in the next restart we will hit the @@ -347,19 +281,14 @@ func (p *Pruner) Prune(root common.Hash) error { // state is picked for usage. deleteCleanTrieCache(p.trieCachePath) - // All the state roots of the middle layer should be forcibly pruned, - // otherwise the dangling state will be left. - middleRoots := make(map[common.Hash]struct{}) - for _, layer := range layers { - if layer.Root() == root { - break - } - middleRoots[layer.Root()] = struct{}{} - } // Traverse the target state, re-construct the whole state trie and - // commit to the given bloom filter. + // commit to both the database and the bloom filter. Writing to the + // database ensures the target trie is complete on disk even after + // an unclean shutdown, so the post-pruning restart lands exactly + // at the target block instead of rewinding further. start := time.Now() - if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil { + writer := &teeWriter{db: p.db, bloom: p.stateBloom} + if err := snapshot.GenerateTrie(p.snaptree, root, p.db, writer); err != nil { return err } // Traverse the genesis, put all genesis state entries into the @@ -374,7 +303,7 @@ func (p *Pruner) Prune(root common.Hash) error { return err } log.Info("State bloom filter committed", "name", filterName) - return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start) + return prune(p.snaptree, root, p.db, p.stateBloom, filterName, nil, start) } // RecoverPruning will resume the pruning procedure during the system restart. @@ -452,7 +381,14 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { if genesis == nil { return errors.New("missing genesis block") } - t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db)) + // genesis.Root() may be a zkTrie root (overridden via GenesisStateRoot + // for block hash compatibility). Resolve it to the actual MPT disk root + // so trie.NewSecure can open the trie. + genesisRoot := genesis.Root() + if mptRoot, err := rawdb.ReadDiskStateRoot(db, genesisRoot); err == nil { + genesisRoot = mptRoot + } + t, err := trie.NewSecure(genesisRoot, trie.NewDatabase(db)) if err != nil { return err }