From 43fc208ae36bc875c5814c97606c1103f27159ec Mon Sep 17 00:00:00 2001 From: Jordan Schalm Date: Thu, 16 May 2024 14:13:38 -0400 Subject: [PATCH 1/3] remove unused file --- state/protocol/snapshots/dynamic_bootstrap.go | 122 ------------------ 1 file changed, 122 deletions(-) delete mode 100644 state/protocol/snapshots/dynamic_bootstrap.go diff --git a/state/protocol/snapshots/dynamic_bootstrap.go b/state/protocol/snapshots/dynamic_bootstrap.go deleted file mode 100644 index 6647fa101bf..00000000000 --- a/state/protocol/snapshots/dynamic_bootstrap.go +++ /dev/null @@ -1,122 +0,0 @@ -package snapshots - -import ( - "errors" - "fmt" - - "github.com/onflow/flow-go/model/flow" - "github.com/onflow/flow-go/state/protocol" -) - -var ErrSnapshotPhaseMismatch = errors.New("snapshot does not contain a valid sealing segment") -var ErrSnapshotHistoryLimit = errors.New("reached the snapshot history limit") - -// GetDynamicBootstrapSnapshot returns `refSnapshot` if it is valid for use in dynamic bootstrapping. -// Otherwise returns an error. (Effectively this validates that the input snapshot can be used in dynamic bootstrapping.) -// Expected error returns during normal operations: -// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment -// All other errors should be treated as exceptions. -func GetDynamicBootstrapSnapshot(state protocol.State, refSnapshot protocol.Snapshot) (protocol.Snapshot, error) { - return getValidSnapshot(state, refSnapshot, 0, false, 0) -} - -// GetClosestDynamicBootstrapSnapshot will return a valid snapshot for dynamic bootstrapping -// Expected error returns during normal operations: -// If a snapshot does contain an invalid sealing segment query the state -// by height of each block in the segment and return a snapshot at the point -// where the transition happens. -// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment -// * ErrSnapshotHistoryLimit - reached the snapshot history limit -// All other errors should be treated as exceptions. -func GetClosestDynamicBootstrapSnapshot(state protocol.State, refSnapshot protocol.Snapshot, snapshotHistoryLimit int) (protocol.Snapshot, error) { - return getValidSnapshot(state, refSnapshot, 0, true, snapshotHistoryLimit) -} - -// GetCounterAndPhase returns the current epoch counter and phase, at `height`. -// No errors are expected during normal operation. -func GetCounterAndPhase(state protocol.State, height uint64) (uint64, flow.EpochPhase, error) { - snapshot := state.AtHeight(height) - - counter, err := snapshot.Epochs().Current().Counter() - if err != nil { - return 0, 0, fmt.Errorf("failed to get counter for block (height=%d): %w", height, err) - } - - phase, err := snapshot.Phase() - if err != nil { - return 0, 0, fmt.Errorf("failed to get phase for block (height=%d): %w", height, err) - } - - return counter, phase, nil -} - -func IsEpochOrPhaseDifferent(counter1, counter2 uint64, phase1, phase2 flow.EpochPhase) bool { - return counter1 != counter2 || phase1 != phase2 -} - -// getValidSnapshot will return a valid snapshot that has a sealing segment which -// 1. does not contain any blocks that span an epoch transition -// 2. does not contain any blocks that span an epoch phase transition -// If a snapshot does contain an invalid sealing segment query the state -// by height of each block in the segment and return a snapshot at the point -// where the transition happens. -// Expected error returns during normal operations: -// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment -// * ErrSnapshotHistoryLimit - failed to find a valid snapshot after checking `snapshotHistoryLimit` blocks -// All other errors should be treated as exceptions. -func getValidSnapshot( - state protocol.State, - snapshot protocol.Snapshot, - blocksVisited int, - findNextValidSnapshot bool, - snapshotHistoryLimit int, -) (protocol.Snapshot, error) { - segment, err := snapshot.SealingSegment() - if err != nil { - return nil, fmt.Errorf("failed to get sealing segment: %w", err) - } - - counterAtHighest, phaseAtHighest, err := GetCounterAndPhase(state, segment.Highest().Header.Height) - if err != nil { - return nil, fmt.Errorf("failed to get counter and phase at highest block in the segment: %w", err) - } - - counterAtLowest, phaseAtLowest, err := GetCounterAndPhase(state, segment.Sealed().Header.Height) - if err != nil { - return nil, fmt.Errorf("failed to get counter and phase at lowest block in the segment: %w", err) - } - - // Check if the counters and phase are different this indicates that the sealing segment - // of the snapshot requested spans either an epoch transition or phase transition. - if IsEpochOrPhaseDifferent(counterAtHighest, counterAtLowest, phaseAtHighest, phaseAtLowest) { - if !findNextValidSnapshot { - return nil, ErrSnapshotPhaseMismatch - } - - // Visit each node in strict order of decreasing height starting at head - // to find the block that straddles the transition boundary. - for i := len(segment.Blocks) - 1; i >= 0; i-- { - blocksVisited++ - - // NOTE: Check if we have reached our history limit, in edge cases - // where the sealing segment is abnormally long we want to short circuit - // the recursive calls and return an error. The API caller can retry. - if blocksVisited > snapshotHistoryLimit { - return nil, fmt.Errorf("%w: (%d)", ErrSnapshotHistoryLimit, snapshotHistoryLimit) - } - - counterAtBlock, phaseAtBlock, err := GetCounterAndPhase(state, segment.Blocks[i].Header.Height) - if err != nil { - return nil, fmt.Errorf("failed to get epoch counter and phase for snapshot at block %s: %w", segment.Blocks[i].ID(), err) - } - - // Check if this block straddles the transition boundary, if it does return the snapshot - // at that block height. - if IsEpochOrPhaseDifferent(counterAtHighest, counterAtBlock, phaseAtHighest, phaseAtBlock) { - return getValidSnapshot(state, state.AtHeight(segment.Blocks[i].Header.Height), blocksVisited, true, snapshotHistoryLimit) - } - } - } - - return snapshot, nil -} From 1292ef9f28807d3728a7b5562d854d4a01e544a8 Mon Sep 17 00:00:00 2001 From: Jordan Schalm Date: Thu, 16 May 2024 14:15:39 -0400 Subject: [PATCH 2/3] modify dynamic startup logic - fail if both a snapshot file and dynamic startup flags are provided - log the root height and ID once dynamic startup completes --- cmd/dynamic_startup.go | 45 +++++++++++++++++++++++++-------- cmd/util/cmd/common/snapshot.go | 14 +++++++--- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/cmd/dynamic_startup.go b/cmd/dynamic_startup.go index 616773c1e00..97cb118eacc 100644 --- a/cmd/dynamic_startup.go +++ b/cmd/dynamic_startup.go @@ -47,45 +47,59 @@ func ValidateDynamicStartupFlags(accessPublicKey, accessAddress string, startPha // DynamicStartPreInit is the pre-init func that will check if a node has already bootstrapped // from a root protocol snapshot. If not attempt to get a protocol snapshot where the following // conditions are met. -// 1. Target epoch < current epoch (in the past), set root snapshot to current snapshot -// 2. Target epoch == "current", wait until target phase == current phase before setting root snapshot -// 3. Target epoch > current epoch (in future), wait until target epoch and target phase is reached before +// 1. Target epoch < current epoch (in the past), set root snapshot to current snapshot +// 2. Target epoch == "current", wait until target phase == current phase before setting root snapshot +// 3. Target epoch > current epoch (in future), wait until target epoch and target phase is reached before +// // setting root snapshot func DynamicStartPreInit(nodeConfig *NodeConfig) error { ctx := context.Background() log := nodeConfig.Logger.With().Str("component", "dynamic-startup").Logger() - // skip dynamic startup if the protocol state is bootstrapped + // CASE 1: The state is already bootstrapped - nothing to do isBootstrapped, err := badgerstate.IsBootstrapped(nodeConfig.DB) if err != nil { return fmt.Errorf("could not check if state is boostrapped: %w", err) } if isBootstrapped { - log.Info().Msg("protocol state already bootstrapped, skipping dynamic startup") + log.Debug().Msg("protocol state already bootstrapped, skipping dynamic startup") return nil } - // skip dynamic startup if a root snapshot file is specified - this takes priority + // CASE 2: The state is not already bootstrapped. + // We will either bootstrap from a file or using Dynamic Startup. rootSnapshotPath := filepath.Join(nodeConfig.BootstrapDir, bootstrap.PathRootProtocolStateSnapshot) - if utilsio.FileExists(rootSnapshotPath) { - log.Info(). + rootSnapshotFileExists := utilsio.FileExists(rootSnapshotPath) + dynamicStartupFlagsSet := anyDynamicStartupFlagsAreSet(nodeConfig) + + // If the user has provided both a root snapshot file AND dynamic startup specification, return an error. + // Previously, the snapshot file would take precedence over the Dynamic Startup flags. + // This caused operators to inadvertently bootstrap from an old snapshot file when attempting to use Dynamic Startup. + // Therefore, we instead require the operator to explicitly choose one option or the other. + if rootSnapshotFileExists && dynamicStartupFlagsSet { + return fmt.Errorf("must specify either a root snapshot file (%s) or Dynamic Startup flags (--dynamic-startup-*) but not both", rootSnapshotPath) + } + + // CASE 2.1: Use the root snapshot file to bootstrap. + if rootSnapshotFileExists { + log.Debug(). Str("root_snapshot_path", rootSnapshotPath). Msg("protocol state is not bootstrapped, will bootstrap using configured root snapshot file, skipping dynamic startup") return nil } + // CASE 2.2: Use Dynamic Startup to bootstrap. + // get flow client with secure client connection to download protocol snapshot from access node config, err := common.NewFlowClientConfig(nodeConfig.DynamicStartupANAddress, nodeConfig.DynamicStartupANPubkey, flow.ZeroID, false) if err != nil { return fmt.Errorf("failed to create flow client config for node dynamic startup pre-init: %w", err) } - flowClient, err := common.FlowClient(config) if err != nil { return fmt.Errorf("failed to create flow client for node dynamic startup pre-init: %w", err) } - getSnapshotFunc := func(ctx context.Context) (protocol.Snapshot, error) { return common.GetSnapshot(ctx, flowClient) } @@ -95,7 +109,6 @@ func DynamicStartPreInit(nodeConfig *NodeConfig) error { if err != nil { return fmt.Errorf("failed to validate flag --dynamic-start-epoch: %w", err) } - startupPhase := flow.GetEpochPhase(nodeConfig.DynamicStartupEpochPhase) // validate the rest of the dynamic startup flags @@ -121,6 +134,16 @@ func DynamicStartPreInit(nodeConfig *NodeConfig) error { return nil } +// anyDynamicStartupFlagsAreSet returns true if either the AN address or AN public key for Dynamic Startup are set. +// All other Dynamic Startup flags have default values (and aren't required) hence they aren't checked here. +// Both these flags must be set for Dynamic Startup to occur. +func anyDynamicStartupFlagsAreSet(config *NodeConfig) bool { + if len(config.DynamicStartupANAddress) > 0 || len(config.DynamicStartupANPubkey) > 0 { + return true + } + return false +} + // validateDynamicStartEpochFlags parse the start epoch flag and return the uin64 value, // if epoch = current return the current epoch counter func validateDynamicStartEpochFlags(ctx context.Context, getSnapshot common.GetProtocolSnapshot, flagEpoch string) (uint64, error) { diff --git a/cmd/util/cmd/common/snapshot.go b/cmd/util/cmd/common/snapshot.go index 5d73895d5ff..08ae0e6c524 100644 --- a/cmd/util/cmd/common/snapshot.go +++ b/cmd/util/cmd/common/snapshot.go @@ -9,6 +9,8 @@ import ( "github.com/rs/zerolog" "github.com/sethvargo/go-retry" + "github.com/onflow/flow-go/utils/logging" + "github.com/onflow/flow-go-sdk/access/grpc" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/state/protocol" @@ -88,10 +90,16 @@ func GetSnapshotAtEpochAndPhase(ctx context.Context, log zerolog.Logger, startup // check if we are in or past the target epoch and phase if currEpochCounter > startupEpoch || (currEpochCounter == startupEpoch && currEpochPhase >= startupEpochPhase) { + head, err := snapshot.Head() + if err != nil { + return fmt.Errorf("could not get Dynamic Startup snapshot header: %w", err) + } log.Info(). - Dur("time-waiting", time.Since(start)). - Uint64("current-epoch", currEpochCounter). - Str("current-epoch-phase", currEpochPhase.String()). + Dur("time_waiting", time.Since(start)). + Uint64("current_epoch", currEpochCounter). + Str("current_epoch_phase", currEpochPhase.String()). + Hex("finalized_root_block_id", logging.ID(head.ID())). + Uint64("finalized_block_height", head.Height). Msg("finished dynamic startup - reached desired epoch and phase") return nil From a0e67e79153eefa370a738a071abbc49d4d86915 Mon Sep 17 00:00:00 2001 From: Jordan Schalm Date: Fri, 17 May 2024 12:04:46 -0400 Subject: [PATCH 3/3] test mocks accomodate new log field --- cmd/dynamic_startup_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/dynamic_startup_test.go b/cmd/dynamic_startup_test.go index 27da13fca72..381a3712c6b 100644 --- a/cmd/dynamic_startup_test.go +++ b/cmd/dynamic_startup_test.go @@ -29,6 +29,7 @@ func getMockSnapshot(t *testing.T, epochCounter uint64, phase flow.EpochPhase) * snapshot := new(protocolmock.Snapshot) snapshot.On("Epochs").Return(epochQuery) snapshot.On("Phase").Return(phase, nil) + snapshot.On("Head").Return(unittest.BlockHeaderFixture(), nil) return snapshot }