diff --git a/Makefile b/Makefile index ec36a316f827..a25686247a18 100644 --- a/Makefile +++ b/Makefile @@ -505,13 +505,21 @@ localnet-debug: localnet-stop localnet-build-dlv localnet-build-nodes .PHONY: localnet-start localnet-stop localnet-debug localnet-build-env localnet-build-dlv localnet-build-nodes -test-system: build-v53 build +# build-system-test-current builds the binaries necessary for running system tests, but only those on the current branch +# this is useful if you are iterating on tests which rely on changes to the current branch only (which is most common in development) +build-system-test-current: build cosmovisor mkdir -p ./tests/systemtests/binaries/ cp $(BUILDDIR)/simd ./tests/systemtests/binaries/ + cp tools/cosmovisor/cosmovisor ./tests/systemtests/binaries/ + +# build-system-test builds the binaries necessary for runnings system tests and places them in the correct locations +build-system-test: build-system-test-current build-v53 mkdir -p ./tests/systemtests/binaries/v0.53 mv $(BUILDDIR)/simdv53 ./tests/systemtests/binaries/v0.53/simd + +test-system: build-system-test $(MAKE) -C tests/systemtests test -.PHONY: test-system +.PHONY: build-system-test-current build-system-test test-system # build-v53 checks out the v0.53.x branch, builds the binary, and renames it to simdv53. build-v53: diff --git a/simapp/upgrades.go b/simapp/upgrades.go index 1c8e2c9d8146..a0ed0fb2f214 100644 --- a/simapp/upgrades.go +++ b/simapp/upgrades.go @@ -1,7 +1,12 @@ package simapp import ( + "bytes" "context" + "fmt" + "os" + + "github.com/cosmos/gogoproto/jsonpb" storetypes "cosmossdk.io/store/types" @@ -16,21 +21,55 @@ import ( // NOTE: This upgrade defines a reference implementation of what an upgrade // could look like when an application is migrating from Cosmos SDK version // v0.53.x to v0.54.x. -const UpgradeName = "v053-to-v054" +const ( + UpgradeName = "v053-to-v054" + ManualUpgradeName = "manual1" +) func (app SimApp) RegisterUpgradeHandlers() { app.UpgradeKeeper.SetUpgradeHandler( UpgradeName, func(ctx context.Context, _ upgradetypes.Plan, fromVM module.VersionMap) (module.VersionMap, error) { - sdk.UnwrapSDKContext(ctx).Logger().Debug("this is a debug level message to test that verbose logging mode has properly been enabled during a chain upgrade") + logger := sdk.UnwrapSDKContext(ctx).Logger() + logger.Debug("this is a debug level message to test that verbose logging mode has properly been enabled during a chain upgrade") + logger.Debug(fmt.Sprintf("applying upgrade %s", UpgradeName)) return app.ModuleManager.RunMigrations(ctx, app.Configurator(), fromVM) }, ) + // we add another upgrade, to be performed manually which does some small state breakage + app.UpgradeKeeper.SetUpgradeHandler( + ManualUpgradeName, + func(ctx context.Context, plan upgradetypes.Plan, fromVM module.VersionMap) (module.VersionMap, error) { + logger := sdk.UnwrapSDKContext(ctx).Logger() + logger.Debug(fmt.Sprintf("applying upgrade %s", ManualUpgradeName)) + // do some minimal state breaking update + err := app.GovKeeper.Constitution.Set(ctx, + fmt.Sprintf("we have expected upgrade %q and that's now our constitution", plan.Name)) + return fromVM, err + }, + ) + // we check that we can read the upgrade info from disk, which is necessary for setting store key upgrades upgradeInfo, err := app.UpgradeKeeper.ReadUpgradeInfoFromDisk() if err != nil { panic(err) } + if upgradeInfo.Name != "" { + app.Logger().Info("read upgrade info from disk", "upgrade_info", upgradeInfo) + } + + // this allows to test stateful manual upgrades with Cosmovisor + if manualUpgradeVar, ok := os.LookupEnv("SIMAPP_MANUAL_UPGRADE"); ok { + var manualUpgrade upgradetypes.Plan + err := (&jsonpb.Unmarshaler{}).Unmarshal(bytes.NewBufferString(manualUpgradeVar), &manualUpgrade) + if err != nil { + panic("invalid SIMAPP_MANUAL_UPGRADE: " + err.Error()) + } + err = app.UpgradeKeeper.SetManualUpgrade(&manualUpgrade) + if err != nil { + panic("failed to set manual upgrade: " + err.Error()) + } + } if upgradeInfo.Name == UpgradeName && !app.UpgradeKeeper.IsSkipHeight(upgradeInfo.Height) { storeUpgrades := storetypes.StoreUpgrades{ diff --git a/systemtests/system.go b/systemtests/system.go index cd391cad8043..606d52879d6a 100644 --- a/systemtests/system.go +++ b/systemtests/system.go @@ -175,10 +175,24 @@ func (s *SystemUnderTest) SetupChain() { func (s *SystemUnderTest) StartChain(t *testing.T, xargs ...string) { t.Helper() + s.doStartChain(t, false, xargs...) +} + +// StartChainWithCosmovisor starts the chain wrapping its execution with Cosmovisor. +func (s *SystemUnderTest) StartChainWithCosmovisor(t *testing.T, xargs ...string) { + t.Helper() + s.doStartChain(t, true, xargs...) +} + +func (s *SystemUnderTest) doStartChain(t *testing.T, useCosmovisor bool, xargs ...string) { + t.Helper() + if useCosmovisor { + s.initCosmovisor(t) + } s.Log("Start chain\n") s.ChainStarted = true // HACK: force db_backend - s.startNodesAsync(t, append([]string{"start", "--log_level=info", "--log_no_color", "--db_backend=goleveldb"}, xargs...)...) + s.startNodesAsync(t, useCosmovisor, append([]string{"start", "--log_level=info", "--log_no_color", "--db_backend=goleveldb"}, xargs...)...) s.AwaitNodeUp(t, s.rpcAddr) @@ -195,6 +209,48 @@ func (s *SystemUnderTest) StartChain(t *testing.T, xargs ...string) { s.AwaitNextBlock(t, 10e9) } +func (s *SystemUnderTest) cosmovisorEnv(t *testing.T, home string) []string { + t.Helper() + absHome, err := filepath.Abs(home) + require.NoError(t, err) + return []string{ + fmt.Sprintf("DAEMON_HOME=%s", absHome), + fmt.Sprintf("DAEMON_NAME=%s", s.projectName), + } +} + +func (s *SystemUnderTest) cosmovisorPath() string { + return filepath.Join(WorkDir, "binaries", "cosmovisor") +} + +// ExecCosmovisor executes the Cosmovisor binary with the given arguments +// for each node in the network with the home directory set properly for each node. +func (s *SystemUnderTest) ExecCosmovisor(t *testing.T, async bool, args ...string) { + s.withEachNodeHome(func(i int, home string) { + env := s.cosmovisorEnv(t, home) + t.Logf("Calling Cosmovisor with args %+v and env %+v", args, env) + cmd := exec.Command( + s.cosmovisorPath(), + args..., + ) + cmd.Dir = WorkDir + env = append(env, "COSMOVISOR_COLOR_LOGS=false") + cmd.Env = env + if async { + require.NoError(t, cmd.Start(), "cosmovisor init %d", i) + s.awaitProcessCleanup(cmd) + } else { + require.NoError(t, cmd.Run(), "cosmovisor init %d", i) + } + }) +} + +func (s *SystemUnderTest) initCosmovisor(t *testing.T) { + t.Helper() + binary := locateExecutable(s.execBinary) + s.ExecCosmovisor(t, false, "init", binary) +} + // MarkDirty whole chain will be reset when marked dirty func (s *SystemUnderTest) MarkDirty() { s.dirty = true @@ -591,16 +647,32 @@ func RunShellCmd(cmd string, args ...string) (string, error) { } // startNodesAsync runs the given app cli command for all cluster nodes and returns without waiting -func (s *SystemUnderTest) startNodesAsync(t *testing.T, xargs ...string) { +func (s *SystemUnderTest) startNodesAsync(t *testing.T, useCosmovisor bool, xargs ...string) { t.Helper() s.withEachNodeHome(func(i int, home string) { - args := append(xargs, "--home="+home) + absHome, err := filepath.Abs(home) + require.NoError(t, err, "failed to get absolute home path") + args := append(xargs, "--home="+absHome) + var binary string + var env []string + if useCosmovisor { + binary = s.cosmovisorPath() + args = append([]string{"run"}, args...) // cosmovisor run + cfgPath := filepath.Join(absHome, "cosmovisor", "config.toml") + args = append(args, "--cosmovisor-config", cfgPath) + env = s.cosmovisorEnv(t, absHome) + } else { + binary = locateExecutable(s.execBinary) + } s.Logf("Execute `%s %s`\n", s.execBinary, strings.Join(args, " ")) - cmd := exec.Command( //nolint:gosec // used by tests only - locateExecutable(s.execBinary), + cmd := exec.Command( + binary, args..., ) cmd.Dir = WorkDir + if useCosmovisor { + cmd.Env = env + } s.watchLogs(i, cmd) require.NoError(t, cmd.Start(), "node %d", i) s.Logf("Node started: %d\n", cmd.Process.Pid) diff --git a/systemtests/test_runner.go b/systemtests/test_runner.go index e4b79483a120..b0c8066a203c 100644 --- a/systemtests/test_runner.go +++ b/systemtests/test_runner.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "strconv" "strings" "testing" @@ -17,6 +18,9 @@ var ( Sut *SystemUnderTest Verbose bool execBinaryName string + // Store original configuration for ResetSut + originalNodesCount int + originalBlockTime time.Duration ) func RunTests(m *testing.M) { @@ -47,6 +51,11 @@ func RunTests(m *testing.M) { } execBinaryName = *execBinary + // store original configuration for ResetSut, we do this with global variables for now since Sut is global + // and we want the same initial configuration when we do a complete reset + originalNodesCount = *nodesCount + originalBlockTime = *blockTime + Sut = NewSystemUnderTest(*execBinary, Verbose, *nodesCount, *blockTime) Sut.SetupChain() // setup chain and keyring @@ -120,6 +129,25 @@ const ( |_| \__,_|_|_|\___|\__,_|` ) +// ResetSut completely resets Sut by deleting all state and creating a fresh instance of Sut. +func ResetSut(t *testing.T) { + t.Helper() + // stop current instance if it exists + if Sut != nil { + Sut.StopChain() + } + + // delete entire testnet directory to remove all state + err := os.RemoveAll(filepath.Join(WorkDir, "testnet")) + if err != nil { + t.Fatalf("failed to remove testnet directory: %v", err) + } + + // create fresh Sut instance with original configuration + Sut = NewSystemUnderTest(execBinaryName, Verbose, originalNodesCount, originalBlockTime) + Sut.SetupChain() +} + func printResultFlag(ok bool) { if ok { fmt.Println(successFlag) diff --git a/tests/systemtests/cosmovisor_test.go b/tests/systemtests/cosmovisor_test.go new file mode 100644 index 000000000000..c5497c223754 --- /dev/null +++ b/tests/systemtests/cosmovisor_test.go @@ -0,0 +1,214 @@ +//go:build system_test + +package systemtests + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "testing" + "time" + + "github.com/cosmos/gogoproto/jsonpb" + "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" + + systest "cosmossdk.io/systemtests" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/cosmos/cosmos-sdk/types/address" + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" +) + +func TestCosmovisorUpgrade(t *testing.T) { + t.Run("gov upgrade, then manual upgrade", func(t *testing.T) { + // this test + // 1. starts a legacy v0.53 chain with Cosmovisor + // 2. submits a gov upgrade proposal to switch to v0.54 + // 3. adds a binary for the gov upgrade + // 4. waits for the upgrade to be applied and checks the symlink + // 5. adds a manual upgrade which simapp has configured to make a small state breaking update + // 6. waits for the manual upgrade to be applied and checks the symlink + const ( + upgrade1Height = 25 + upgrade1Name = "v053-to-v054" // must match UpgradeName in simapp/upgrades.go + upgrade2Height int64 = 30 + upgrade2Name = "manual1" + ) + + systest.ResetSut(t) + + currentBranchBinary := systest.Sut.ExecBinary() + + legacyBinary := systest.WorkDir + "/binaries/v0.53/simd" + systest.Sut.SetExecBinary(legacyBinary) + systest.Sut.SetupChain() + + votingPeriod := 5 * time.Second // enough time to vote + systest.Sut.ModifyGenesisJSON(t, systest.SetGovVotingPeriod(t, votingPeriod)) + + systest.Sut.StartChainWithCosmovisor(t) + + cli := systest.NewCLIWrapper(t, systest.Sut, systest.Verbose) + govAddr := sdk.AccAddress(address.Module("gov")).String() + // submit upgrade proposal + proposal := fmt.Sprintf(` + { + "messages": [ + { + "@type": "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade", + "authority": %q, + "plan": { + "name": %q, + "height": "%d" + } + } + ], + "metadata": "ipfs://CID", + "deposit": "100000000stake", + "title": "my upgrade", + "summary": "testing" + }`, govAddr, upgrade1Name, upgrade1Height) + proposalID := cli.SubmitAndVoteGovProposal(proposal) + + // add binary for gov upgrade + systest.Sut.ExecCosmovisor( + t, + true, + "add-upgrade", + upgrade1Name, + currentBranchBinary, + ) + + requireCurrentPointsTo(t, "genesis") + + systest.Sut.AwaitBlockHeight(t, 21, 60*time.Second) + + t.Logf("current_height: %d\n", systest.Sut.CurrentHeight()) + raw := cli.CustomQuery("q", "gov", "proposal", proposalID) + proposalStatus := gjson.Get(raw, "proposal.status").String() + require.Equal(t, "PROPOSAL_STATUS_PASSED", proposalStatus, raw) + + // we create a wrapper for the current branch binary which sets up the manual upgrade + wrapperPath := createWrapper(t, upgrade2Name, upgrade2Height, currentBranchBinary) + + // add manual upgrade + systest.Sut.ExecCosmovisor( + t, + true, + "add-upgrade", + upgrade2Name, + wrapperPath, + fmt.Sprintf("--upgrade-height=%d", upgrade2Height), + ) + + systest.Sut.AwaitBlockHeight(t, upgrade1Height+1) + + requireCurrentPointsTo(t, fmt.Sprintf("upgrades/%s", upgrade1Name)) + // make sure a gov upgrade was triggered + regex, err := regexp.Compile(fmt.Sprintf(`UPGRADE %q NEEDED at height: %d: module=x/upgrade`, + upgrade1Name, upgrade1Height)) + require.NoError(t, err) + require.Equal(t, systest.Sut.NodesCount(), systest.Sut.FindLogMessage(regex)) + // make sure the upgrade-info.json was readable by nodes when they restarted + regex, err = regexp.Compile("read upgrade info from disk") + require.NoError(t, err) + require.Equal(t, systest.Sut.NodesCount(), systest.Sut.FindLogMessage(regex)) + // make sure we ran the upgrade handler + + systest.Sut.AwaitBlockHeight(t, upgrade2Height+1) + require.Equal(t, systest.Sut.NodesCount(), systest.Sut.FindLogMessage( + regexp.MustCompile(fmt.Sprintf(`applying upgrade %q`, upgrade1Name))), + ) + + requireCurrentPointsTo(t, fmt.Sprintf("upgrades/%s", upgrade2Name)) + + // smoke test that new version runs + cli = systest.NewCLIWrapper(t, systest.Sut, systest.Verbose) + got := cli.Run("tx", "protocolpool", "fund-community-pool", "100stake", "--from=node0") + systest.RequireTxSuccess(t, got) + }) + + t.Run("manual upgrade", func(t *testing.T) { + // this test: + // 1. starts a legacy v0.53 chain with Cosmovisor + // 2. adds a manual upgrade to v0.54 which has an environment variable set to manually perform the migration + // 3. waits for the manual upgrade to be applied and checks the symlink + const ( + upgradeHeight = 10 + upgradeName = "v053-to-v054" // must match UpgradeName in simapp/upgrades.go + ) + // Scenario: + // start a legacy chain with some state + // when a chain upgrade proposal is executed + // then the chain upgrades successfully + systest.ResetSut(t) + + currentBranchBinary := systest.Sut.ExecBinary() + + legacyBinary := systest.WorkDir + "/binaries/v0.53/simd" + systest.Sut.SetExecBinary(legacyBinary) + systest.Sut.SetupChain() + + systest.Sut.StartChainWithCosmovisor(t) + requireCurrentPointsTo(t, "genesis") + + // we create a wrapper for the current branch binary which sets up the manual upgrade + wrapperPath := createWrapper(t, upgradeName, upgradeHeight, currentBranchBinary) + + // schedule manual upgrade to latest version + systest.Sut.ExecCosmovisor( + t, + true, + "add-upgrade", + upgradeName, + wrapperPath, + fmt.Sprintf("--upgrade-height=%d", upgradeHeight), + ) + + systest.Sut.AwaitBlockHeight(t, upgradeHeight+1, 60*time.Second) + + requireCurrentPointsTo(t, fmt.Sprintf("upgrades/%s", upgradeName)) + + // make sure the upgrade handler was called + require.Equal(t, systest.Sut.NodesCount(), systest.Sut.FindLogMessage( + regexp.MustCompile(fmt.Sprintf(`applying upgrade %s`, upgradeName))), + ) + + // smoke test that new version runs + cli := systest.NewCLIWrapper(t, systest.Sut, systest.Verbose) + got := cli.Run("tx", "protocolpool", "fund-community-pool", "100stake", "--from=node0") + systest.RequireTxSuccess(t, got) + }) +} + +func requireCurrentPointsTo(t *testing.T, expected string) { + t.Helper() + for i := 0; i < systest.Sut.NodesCount(); i++ { + curSymLink := filepath.Join(systest.Sut.NodeDir(i), "cosmovisor", "current") + resolved, err := os.Readlink(curSymLink) + require.NoError(t, err, "failed to read current symlink for node %d", i) + require.Equal(t, expected, resolved, "current symlink for node %d does not point to expected directory", i) + } +} + +func createWrapper(t *testing.T, upgradeName string, upgradeHeight int64, binary string) string { + t.Helper() + plan := upgradetypes.Plan{ + Name: upgradeName, + Height: upgradeHeight, + } + str, err := (&jsonpb.Marshaler{}).MarshalToString(&plan) + require.NoError(t, err, "failed to marshal upgrade plan to JSON") + + wrapperTxt := fmt.Sprintf(`#!/usr/bin/env bash +set -e +SIMAPP_MANUAL_UPGRADE='%s' exec %s "$@"`, str, binary) + wrapperPath := filepath.Join(systest.WorkDir, "testnet", fmt.Sprintf("%s.sh", upgradeName)) + wrapperPath, err = filepath.Abs(wrapperPath) + require.NoError(t, err, "failed to get absolute path for manual upgrade script") + err = os.WriteFile(wrapperPath, []byte(wrapperTxt), 0o755) + require.NoError(t, err, "failed to write manual upgrade script") + return wrapperPath +} diff --git a/tests/systemtests/go.mod b/tests/systemtests/go.mod index 76c8f06e2970..f54d4b0367db 100644 --- a/tests/systemtests/go.mod +++ b/tests/systemtests/go.mod @@ -12,6 +12,7 @@ require ( cosmossdk.io/math v1.5.3 cosmossdk.io/systemtests v1.2.1 github.com/cosmos/cosmos-sdk v0.54.0-rc.1 + github.com/cosmos/gogoproto v1.7.0 github.com/stretchr/testify v1.10.0 github.com/tidwall/gjson v1.18.0 github.com/tidwall/sjson v1.2.5 @@ -53,7 +54,6 @@ require ( github.com/cosmos/cosmos-proto v1.0.0-beta.5 // indirect github.com/cosmos/go-bip39 v1.0.0 // indirect github.com/cosmos/gogogateway v1.2.0 // indirect - github.com/cosmos/gogoproto v1.7.0 // indirect github.com/cosmos/iavl v1.2.6 // indirect github.com/cosmos/ics23/go v0.11.0 // indirect github.com/cosmos/ledger-cosmos-go v0.14.0 // indirect diff --git a/tests/systemtests/protocolpool_test.go b/tests/systemtests/protocolpool_test.go index 89df4ad8f43a..be6672fc9e9e 100644 --- a/tests/systemtests/protocolpool_test.go +++ b/tests/systemtests/protocolpool_test.go @@ -145,8 +145,8 @@ func TestQueryProtocolPool(t *testing.T) { // delegate tokens to validator // check distribution + systemtests.ResetSut(t) sut := systemtests.Sut - sut.ResetChain(t) // set up gov params so we can pass props quickly modifyGovParams(t) @@ -263,7 +263,7 @@ func TestQueryProtocolPool(t *testing.T) { // - submit prop and vote until passed // Check that funds are distributed and continuous fund is cleaned up once expired func TestContinuousFunds(t *testing.T) { - systemtests.Sut.ResetChain(t) + systemtests.ResetSut(t) cli := systemtests.NewCLIWrapper(t, systemtests.Sut, systemtests.Verbose) // set up gov params so we can pass props quickly @@ -366,7 +366,7 @@ func TestContinuousFunds(t *testing.T) { // // Check that some funds have been distributed and that the fund is canceled. func TestCancelContinuousFunds(t *testing.T) { - systemtests.Sut.ResetChain(t) + systemtests.ResetSut(t) cli := systemtests.NewCLIWrapper(t, systemtests.Sut, systemtests.Verbose) // set up gov params so we can pass props quickly diff --git a/tests/systemtests/staking_test.go b/tests/systemtests/staking_test.go index 4e07bf220fa3..9647ca0efc0c 100644 --- a/tests/systemtests/staking_test.go +++ b/tests/systemtests/staking_test.go @@ -15,8 +15,8 @@ func TestStakeUnstake(t *testing.T) { // Scenario: // delegate tokens to validator // undelegate some tokens + systemtests.ResetSut(t) sut := systemtests.Sut - sut.ResetChain(t) cli := systemtests.NewCLIWrapper(t, sut, systemtests.Verbose) diff --git a/tests/systemtests/unordered_tx_test.go b/tests/systemtests/unordered_tx_test.go index 93df6c5bde46..eab8503d17ea 100644 --- a/tests/systemtests/unordered_tx_test.go +++ b/tests/systemtests/unordered_tx_test.go @@ -20,7 +20,7 @@ func TestUnorderedTXDuplicate(t *testing.T) { // when a new tx with the same unordered nonce is broadcasted, // then the new tx should be rejected. - systest.Sut.ResetChain(t) + systest.ResetSut(t) cli := systest.NewCLIWrapper(t, systest.Sut, systest.Verbose) // add genesis account with some tokens account1Addr := cli.AddKey("account1") diff --git a/tests/systemtests/upgrade_test.go b/tests/systemtests/upgrade_test.go index c741df3c1dee..42bc5774ed11 100644 --- a/tests/systemtests/upgrade_test.go +++ b/tests/systemtests/upgrade_test.go @@ -17,18 +17,17 @@ import ( "github.com/cosmos/cosmos-sdk/types/address" ) -const ( - testSeed = "scene learn remember glide apple expand quality spawn property shoe lamp carry upset blossom draft reject aim file trash miss script joy only measure" - upgradeHeight int64 = 22 - upgradeName = "v053-to-v054" // must match UpgradeName in simapp/upgrades.go -) - func TestChainUpgrade(t *testing.T) { + const ( + upgradeHeight int64 = 22 + upgradeName = "v053-to-v054" // must match UpgradeName in simapp/upgrades.go + ) + // Scenario: // start a legacy chain with some state // when a chain upgrade proposal is executed // then the chain upgrades successfully - systest.Sut.StopChain() + systest.ResetSut(t) currentBranchBinary := systest.Sut.ExecBinary() currentInitializer := systest.Sut.TestnetInitializer() diff --git a/tools/cosmovisor/.gitignore b/tools/cosmovisor/.gitignore index fe1602e647d3..f9e533602faa 100644 --- a/tools/cosmovisor/.gitignore +++ b/tools/cosmovisor/.gitignore @@ -1 +1,2 @@ /cosmovisor +build/ diff --git a/tools/cosmovisor/CHANGELOG.md b/tools/cosmovisor/CHANGELOG.md index b2c1d038723a..2180a31370fc 100644 --- a/tools/cosmovisor/CHANGELOG.md +++ b/tools/cosmovisor/CHANGELOG.md @@ -36,13 +36,9 @@ Ref: https://keepachangelog.com/en/1.0.0/ ## [Unreleased] -### Improvements - -* [#23720](https://github.com/cosmos/cosmos-sdk/pull/23720) Get block height from db after node execution fails - -### Bug Fixes +### Breaking Changes -* [#23683](https://github.com/cosmos/cosmos-sdk/pull/23683) Replace `SigInt` with `SigTerm` to gracefully shutdown the process. +* [#24821](https://github.com/cosmos/cosmos-sdk/pull/24821) Reimplement core Cosmovisor logic for managing and observing governance and manual upgrades. ## v1.7.1 - 2025-01-12 diff --git a/tools/cosmovisor/README.md b/tools/cosmovisor/README.md index 3b5f722c5d1b..11ba40eb8141 100644 --- a/tools/cosmovisor/README.md +++ b/tools/cosmovisor/README.md @@ -4,8 +4,11 @@ sidebar_position: 1 # Cosmovisor -`cosmovisor` is a process manager for Cosmos SDK application binaries that automates application binary switch at chain upgrades. -It polls the `upgrade-info.json` file that is created by the x/upgrade module at upgrade height, and then can automatically download the new binary, stop the current binary, switch from the old binary to the new one, and finally restart the node with the new binary. +`cosmovisor` is a process manager for Cosmos SDK application binaries that automates application binary switch at chain +upgrades. +It polls the `upgrade-info.json` file that is created by the x/upgrade module at upgrade height, and then can +automatically download the new binary, stop the current binary, switch from the old binary to the new one, and finally +restart the node with the new binary. * [Design](#design) * [Contributing](#contributing) @@ -33,7 +36,9 @@ Cosmovisor is designed to be used as a wrapper for a `Cosmos SDK` app: * it will manage an app by restarting and upgrading if needed; * it is configured using environment variables, not positional arguments. -*Note: If new versions of the application are not set up to run in-place store migrations, migrations will need to be run manually before restarting `cosmovisor` with the new binary. For this reason, we recommend applications adopt in-place store migrations.* +*Note: If new versions of the application are not set up to run in-place store migrations, migrations will need to be +run manually before restarting `cosmovisor` with the new binary. For this reason, we recommend applications adopt +in-place store migrations.* :::tip Only the latest version of cosmovisor is actively developed/maintained. @@ -47,13 +52,15 @@ Versions prior to v1.0.0 have a vulnerability that could lead to a DOS. Please u Cosmovisor is part of the Cosmos SDK monorepo, but it's a separate module with it's own release schedule. -Release branches have the following format `release/cosmovisor/vA.B.x`, where A and B are a number (e.g. `release/cosmovisor/v1.3.x`). Releases are tagged using the following format: `cosmovisor/vA.B.C`. +Release branches have the following format `release/cosmovisor/vA.B.x`, where A and B are a number (e.g. +`release/cosmovisor/v1.3.x`). Releases are tagged using the following format: `cosmovisor/vA.B.C`. ## Setup ### Installation -You can download Cosmovisor from the [GitHub releases](https://github.com/cosmos/cosmos-sdk/releases/tag/cosmovisor%2Fv1.5.0). +You can download Cosmovisor from +the [GitHub releases](https://github.com/cosmos/cosmos-sdk/releases/tag/cosmovisor%2Fv1.5.0). To install the latest version of `cosmovisor`, run the following command: @@ -83,33 +90,69 @@ The first argument passed to `cosmovisor` is the action for `cosmovisor` to take * `help`, `--help`, or `-h` - Output `cosmovisor` help information and check your `cosmovisor` configuration. * `run` - Run the configured binary using the rest of the provided arguments. * `version` - Output the `cosmovisor` version and also run the binary with the `version` argument. -* `config` - Display the current `cosmovisor` configuration, that means displaying the environment variables value that `cosmovisor` is using. -* `add-upgrade` - Add an upgrade manually to `cosmovisor`. This command allow you to easily add the binary corresponding to an upgrade in cosmovisor. - -All arguments passed to `cosmovisor run` will be passed to the application binary (as a subprocess). `cosmovisor` will return `/dev/stdout` and `/dev/stderr` of the subprocess as its own. For this reason, `cosmovisor run` cannot accept any command-line arguments other than those available to the application binary. - -`cosmovisor` reads its configuration from environment variables, or its configuration file (use `--cosmovisor-config `): - -* `DAEMON_HOME` is the location where the `cosmovisor/` directory is kept that contains the genesis binary, the upgrade binaries, and any additional auxiliary files associated with each binary (e.g. `$HOME/.gaiad`, `$HOME/.regend`, `$HOME/.simd`, etc.). +* `config` - Display the current `cosmovisor` configuration, that means displaying the environment variables value that + `cosmovisor` is using. +* `init` - Initialize the `cosmovisor` folder structure and copy the provided executable to the appropriate location. +* `add-upgrade` - Add an upgrade manually to `cosmovisor`. This command allow you to easily add the binary corresponding + to an upgrade in cosmovisor or to schedule a manual upgrade that will set the node's `--halt-height` flag. +* `add-batch-upgrade` - Adds a batch of manually scheduled upgrades that will set the node's `--halt-height` flag. +* `prepare-upgrade` - Prepare for an upgrade by downloading the new binary and placing it in the appropriate directory. +* `show-manual-upgrades` - Show pending manual upgrades that set the `--halt-height` flag. + +All arguments passed to `cosmovisor run` will be passed to the application binary (as a subprocess). `cosmovisor` will +return `/dev/stdout` and `/dev/stderr` of the subprocess as its own. For this reason, `cosmovisor run` cannot accept any +command-line arguments other than those available to the application binary. + +`cosmovisor` reads its configuration from environment variables, or its configuration file (use +`--cosmovisor-config `): + +* `DAEMON_HOME` is the location where the `cosmovisor/` directory is kept that contains the genesis binary, the upgrade + binaries, and any additional auxiliary files associated with each binary (e.g. `$HOME/.gaiad`, `$HOME/.regend`, + `$HOME/.simd`, etc.). * `DAEMON_NAME` is the name of the binary itself (e.g. `gaiad`, `regend`, `simd`, etc.). -* `DAEMON_ALLOW_DOWNLOAD_BINARIES` (*optional*), if set to `true`, will enable auto-downloading of new binaries (for security reasons, this is intended for full nodes rather than validators). By default, `cosmovisor` will not auto-download new binaries. -* `DAEMON_DOWNLOAD_MUST_HAVE_CHECKSUM` (*optional*, default = `false`), if `true` cosmovisor will require that a checksum is provided in the upgrade plan for the binary to be downloaded. If `false`, cosmovisor will not require a checksum to be provided, but still check the checksum if one is provided. -* `DAEMON_RESTART_AFTER_UPGRADE` (*optional*, default = `true`), if `true`, restarts the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. Otherwise (`false`), `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note restart is only after the upgrade and does not auto-restart the subprocess after an error occurs. -* `DAEMON_RESTART_DELAY` (*optional*, default none), allow a node operator to define a delay between the node halt (for upgrade) and backup by the specified time. The value must be a duration (e.g. `1s`). -* `DAEMON_SHUTDOWN_GRACE` (*optional*, default none), if set, send interrupt to binary and wait the specified time to allow for cleanup/cache flush to disk before sending the kill signal. The value must be a duration (e.g. `1s`). -* `DAEMON_POLL_INTERVAL` (*optional*, default 300 milliseconds), is the interval length for polling the upgrade plan file. The value must be a duration (e.g. `1s`). +* `DAEMON_ALLOW_DOWNLOAD_BINARIES` (*optional*), if set to `true`, will enable auto-downloading of new binaries (for + security reasons, this is intended for full nodes rather than validators). By default, `cosmovisor` will not + auto-download new binaries. +* `DAEMON_DOWNLOAD_MUST_HAVE_CHECKSUM` (*optional*, default = `false`), if `true` cosmovisor will require that a + checksum is provided in the upgrade plan for the binary to be downloaded. If `false`, cosmovisor will not require a + checksum to be provided, but still check the checksum if one is provided. +* `DAEMON_RESTART_AFTER_UPGRADE` (*optional*, default = `true`), if `true`, restarts the subprocess with the same + command-line arguments and flags (but with the new binary) after a successful upgrade. Otherwise (`false`), + `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note restart + is only after the upgrade and does not auto-restart the subprocess after an error occurs. +* `DAEMON_RESTART_DELAY` (*optional*, default none), allow a node operator to define a delay between the node halt (for + upgrade) and backup by the specified time. The value must be a duration (e.g. `1s`). +* `DAEMON_SHUTDOWN_GRACE` (*optional*, default none), if set, send interrupt to binary and wait the specified time to + allow for cleanup/cache flush to disk before sending the kill signal. The value must be a duration (e.g. `1s`). +* `DAEMON_POLL_INTERVAL` (*optional*, default 300 milliseconds), is the interval length for polling the upgrade plan + file. The value must be a duration (e.g. `1s`). * `DAEMON_DATA_BACKUP_DIR` option to set a custom backup directory. If not set, `DAEMON_HOME` is used. -* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `true`, upgrades directly without performing a backup. Otherwise (`false`, default) backs up the data before trying the upgrade. The default value of false is useful and recommended in case of failures and when a backup needed to rollback. We recommend using the default backup option `UNSAFE_SKIP_BACKUP=false`. -* `DAEMON_PREUPGRADE_MAX_RETRIES` (defaults to `0`). The maximum number of times to call [`pre-upgrade`](https://docs.cosmos.network/main/build/building-apps/app-upgrade#pre-upgrade-handling) in the application after exit status of `31`. After the maximum number of retries, Cosmovisor fails the upgrade. -* `COSMOVISOR_DISABLE_LOGS` (defaults to `false`). If set to true, this will disable Cosmovisor logs (but not the underlying process) completely. This may be useful, for example, when a Cosmovisor subcommand you are executing returns a valid JSON you are then parsing, as logs added by Cosmovisor make this output not a valid JSON. -* `COSMOVISOR_COLOR_LOGS` (defaults to `true`). If set to true, this will colorise Cosmovisor logs (but not the underlying process). -* `COSMOVISOR_TIMEFORMAT_LOGS` (defaults to `kitchen`). If set to a value (`layout|ansic|unixdate|rubydate|rfc822|rfc822z|rfc850|rfc1123|rfc1123z|rfc3339|rfc3339nano|kitchen`), this will add timestamp prefix to Cosmovisor logs (but not the underlying process). -* `COSMOVISOR_CUSTOM_PREUPGRADE` (defaults to ``). If set, this will run $DAEMON_HOME/cosmovisor/$COSMOVISOR_CUSTOM_PREUPGRADE prior to upgrade with the arguments [ upgrade.Name, upgrade.Height ]. Executes a custom script (separate and prior to the chain daemon pre-upgrade command) -* `COSMOVISOR_DISABLE_RECASE` (defaults to `false`). If set to true, the upgrade directory will expected to match the upgrade plan name without any case changes +* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `true`, upgrades directly without performing a backup. + Otherwise (`false`, default) backs up the data before trying the upgrade. The default value of false is useful and + recommended in case of failures and when a backup needed to rollback. We recommend using the default backup option + `UNSAFE_SKIP_BACKUP=false`. +* `DAEMON_PREUPGRADE_MAX_RETRIES` (defaults to `0`). The maximum number of times to call [ + `pre-upgrade`](https://docs.cosmos.network/main/build/building-apps/app-upgrade#pre-upgrade-handling) in the + application after exit status of `31`. After the maximum number of retries, Cosmovisor fails the upgrade. +* `COSMOVISOR_DISABLE_LOGS` (defaults to `false`). If set to true, this will disable Cosmovisor logs (but not the + underlying process) completely. This may be useful, for example, when a Cosmovisor subcommand you are executing + returns a valid JSON you are then parsing, as logs added by Cosmovisor make this output not a valid JSON. +* `COSMOVISOR_COLOR_LOGS` (defaults to `true`). If set to true, this will colorise Cosmovisor logs (but not the + underlying process). +* `COSMOVISOR_TIMEFORMAT_LOGS` (defaults to `kitchen`). If set to a value ( + `layout|ansic|unixdate|rubydate|rfc822|rfc822z|rfc850|rfc1123|rfc1123z|rfc3339|rfc3339nano|kitchen`), this will add + timestamp prefix to Cosmovisor logs (but not the underlying process). +* `COSMOVISOR_CUSTOM_PREUPGRADE` (defaults to ``). If set, this will run + $DAEMON_HOME/cosmovisor/$COSMOVISOR_CUSTOM_PREUPGRADE prior to upgrade with the + arguments [ upgrade.Name, upgrade.Height ]. Executes a custom script (separate and prior to the chain daemon + pre-upgrade command) +* `COSMOVISOR_DISABLE_RECASE` (defaults to `false`). If set to true, the upgrade directory will expected to match the + upgrade plan name without any case changes ### Folder Layout -`$DAEMON_HOME/cosmovisor` is expected to belong completely to `cosmovisor` and the subprocesses that are controlled by it. The folder content is organized as follows: +`$DAEMON_HOME/cosmovisor` is expected to belong completely to `cosmovisor` and the subprocesses that are controlled by +it. The folder content is organized as follows: ```text . @@ -125,9 +168,18 @@ All arguments passed to `cosmovisor run` will be passed to the application binar └── preupgrade.sh (optional) ``` -The `cosmovisor/` directory includes a subdirectory for each version of the application (i.e. `genesis` or `upgrades/`). Within each subdirectory is the application binary (i.e. `bin/$DAEMON_NAME`) and any additional auxiliary files associated with each binary. `current` is a symbolic link to the currently active directory (i.e. `genesis` or `upgrades/`). The `name` variable in `upgrades/` is the lowercased URI-encoded name of the upgrade as specified in the upgrade module plan. Note that the upgrade name path are normalized to be lowercased: for instance, `MyUpgrade` is normalized to `myupgrade`, and its path is `upgrades/myupgrade`. +The `cosmovisor/` directory includes a subdirectory for each version of the application (i.e. `genesis` or +`upgrades/`). Within each subdirectory is the application binary (i.e. `bin/$DAEMON_NAME`) and any additional +auxiliary files associated with each binary. `current` is a symbolic link to the currently active directory (i.e. +`genesis` or `upgrades/`). The `name` variable in `upgrades/` is the lowercased URI-encoded name of the +upgrade as specified in the upgrade module plan. Note that the upgrade name path are normalized to be lowercased: for +instance, `MyUpgrade` is normalized to `myupgrade`, and its path is `upgrades/myupgrade`. -Please note that `$DAEMON_HOME/cosmovisor` only stores the *application binaries*. The `cosmovisor` binary itself can be stored in any typical location (e.g. `/usr/local/bin`). The application will continue to store its data in the default data directory (e.g. `$HOME/.simapp`) or the data directory specified with the `--home` flag. `$DAEMON_HOME` is dependent of the data directory and must be set to the same directory as the data directory, you will end up with a configuration like the following: +Please note that `$DAEMON_HOME/cosmovisor` only stores the *application binaries*. The `cosmovisor` binary itself can be +stored in any typical location (e.g. `/usr/local/bin`). The application will continue to store its data in the default +data directory (e.g. `$HOME/.simapp`) or the data directory specified with the `--home` flag. `$DAEMON_HOME` is +dependent of the data directory and must be set to the same directory as the data directory, you will end up with a +configuration like the following: ```text .simapp @@ -148,11 +200,18 @@ The system administrator is responsible for: * creating the `/cosmovisor/upgrades//bin` folders * placing the different versions of the `` executable in the appropriate `bin` folders. -`cosmovisor` will set the `current` link to point to `genesis` at first start (i.e. when no `current` link exists) and then handle switching binaries at the correct points in time so that the system administrator can prepare days in advance and relax at upgrade time. +`cosmovisor` will set the `current` link to point to `genesis` at first start (i.e. when no `current` link exists) and +then handle switching binaries at the correct points in time so that the system administrator can prepare days in +advance and relax at upgrade time. -In order to support downloadable binaries, a tarball for each upgrade binary will need to be packaged up and made available through a canonical URL. Additionally, a tarball that includes the genesis binary and all available upgrade binaries can be packaged up and made available so that all the necessary binaries required to sync a fullnode from start can be easily downloaded. +In order to support downloadable binaries, a tarball for each upgrade binary will need to be packaged up and made +available through a canonical URL. Additionally, a tarball that includes the genesis binary and all available upgrade +binaries can be packaged up and made available so that all the necessary binaries required to sync a fullnode from start +can be easily downloaded. -The `DAEMON` specific code and operations (e.g. cometBFT config, the application db, syncing blocks, etc.) all work as expected. The application binaries' directives such as command-line flags and environment variables also work as expected. +The `DAEMON` specific code and operations (e.g. cometBFT config, the application db, syncing blocks, etc.) all work as +expected. The application binaries' directives such as command-line flags and environment variables also work as +expected. ### Initialization @@ -167,44 +226,105 @@ It does the following: It uses the `DAEMON_HOME` and `DAEMON_NAME` environment variables for folder location and executable name. -The `cosmovisor init` command is specifically for initializing cosmovisor, and should not be confused with a chain's `init` command (e.g. `cosmovisor run init`). +The `cosmovisor init` command is specifically for initializing cosmovisor, and should not be confused with a chain's +`init` command (e.g. `cosmovisor run init`). ### Detecting Upgrades -`cosmovisor` is polling the `$DAEMON_HOME/data/upgrade-info.json` file for new upgrade instructions. The file is created by the x/upgrade module in `BeginBlocker` when an upgrade is detected and the blockchain reaches the upgrade height. +`cosmovisor` is polling the `$DAEMON_HOME/data/upgrade-info.json` file for new upgrade instructions. The file is created +by the x/upgrade module in `BeginBlocker` when an upgrade is detected and the blockchain reaches the upgrade height. The following heuristic is applied to detect the upgrade: -* When starting, `cosmovisor` doesn't know much about currently running upgrade, except the binary which is `current/bin/`. It tries to read the `current/update-info.json` file to get information about the current upgrade name. -* If neither `cosmovisor/current/upgrade-info.json` nor `data/upgrade-info.json` exist, then `cosmovisor` will wait for `data/upgrade-info.json` file to trigger an upgrade. -* If `cosmovisor/current/upgrade-info.json` doesn't exist but `data/upgrade-info.json` exists, then `cosmovisor` assumes that whatever is in `data/upgrade-info.json` is a valid upgrade request. In this case `cosmovisor` tries immediately to make an upgrade according to the `name` attribute in `data/upgrade-info.json`. -* Otherwise, `cosmovisor` waits for changes in `upgrade-info.json`. As soon as a new upgrade name is recorded in the file, `cosmovisor` will trigger an upgrade mechanism. +* When starting, `cosmovisor` doesn't know much about currently running upgrade, except the binary which is + `current/bin/`. It tries to read the `current/update-info.json` file to get information about the current upgrade + name. +* If neither `cosmovisor/current/upgrade-info.json` nor `data/upgrade-info.json` exist, then `cosmovisor` will wait for + `data/upgrade-info.json` file to trigger an upgrade. +* If `cosmovisor/current/upgrade-info.json` doesn't exist but `data/upgrade-info.json` exists, then `cosmovisor` assumes + that whatever is in `data/upgrade-info.json` is a valid upgrade request. In this case `cosmovisor` tries immediately + to make an upgrade according to the `name` attribute in `data/upgrade-info.json`. +* If `cosmovisor/current/upgrade-info.json` exists, `cosmovisor` waits for changes in `upgrade-info.json`. As soon as a new upgrade name different from the current one is recorded in the file, `cosmovisor` will trigger an upgrade mechanism. When the upgrade mechanism is triggered, `cosmovisor` will: -1. if `DAEMON_ALLOW_DOWNLOAD_BINARIES` is enabled, start by auto-downloading a new binary into `cosmovisor//bin` (where `` is the `upgrade-info.json:name` attribute); -2. update the `current` symbolic link to point to the new directory and save `data/upgrade-info.json` to `cosmovisor/current/upgrade-info.json`. +1. if `DAEMON_ALLOW_DOWNLOAD_BINARIES` is enabled, start by auto-downloading a new binary into `cosmovisor//bin` ( + where `` is the `upgrade-info.json:name` attribute); +2. update the `current` symbolic link to point to the new directory and save `data/upgrade-info.json` to + `cosmovisor/current/upgrade-info.json`. + +### Scheduling Manual Upgrades + +The `add-upgrade` and `add-upgrade-batch` commands can be used to schedule manual upgrades which will +set the node's `--halt-height` flag. + +Manually scheduled upgrades will be stored in the `data/upgrade-info.json.batch` file as an array of upgrade plans. +A running Cosmovisor process will pick up updates to this file and restart the node with its `--halt-height` flag +set to the height of the earliest scheduled manual upgrade. + +To detect when a node has reached a manual upgrade height, Cosmovisor will continuously poll the node's +`/block` RPC endpoint to determine the current block height. +When Cosmovisor observes that the current height is equal to the desirest halt height, it will shut down +the node and perform the upgrade. +After the upgrade is completed, the manual upgrade will be removed from the `data/upgrade-info.json.batch` file. +If Cosmovisor detects that there is a manual upgrade scheduled for before the chain's current height, it will +assume that this is an error condition requiring manual intervention and it will shut down the node. + +#### Expected `/block` RPC Endpoint + +The RPC endpoint to use for polling the node's current height can be configured using the `DAEMON_RPC_ADDRESS` +environment variable and the `daemon_rpc_address` config variable. +The default address is `http://localhost:26657`. + +Cosmovisor will check for a valid response under either the `/block` or `/v1/block` path and expects that +the response conforms to the following JSON format: + +```json +{ + "result": { + "block": { + "header": { + "height": "" + } + } + } +} +``` ### Adding Upgrade Binary -`cosmovisor` has an `add-upgrade` command that allows to easily link a binary to an upgrade. It creates a new folder in `cosmovisor/upgrades/` and copies the provided executable file to `cosmovisor/upgrades//bin/`. +`cosmovisor` has an `add-upgrade` command that allows to easily link a binary to an upgrade. It creates a new folder in +`cosmovisor/upgrades/` and copies the provided executable file to `cosmovisor/upgrades//bin/`. -Using the `--upgrade-height` flag allows to specify at which height the binary should be switched, without going via a gorvernance proposal. -This enables support for an emergency coordinated upgrades where the binary must be switched at a specific height, but there is no time to go through a governance proposal. +Using the `--upgrade-height` flag allows to specify at which height the binary should be switched, without going via a +gorvernance proposal. +This enables support for an emergency coordinated upgrades where the binary must be switched at a specific height, but +there is no time to go through a governance proposal. :::warning -`--upgrade-height` creates an `upgrade-info.json` file. This means if a chain upgrade via governance proposal is executed before the specified height with `--upgrade-height`, the governance proposal will overwrite the `upgrade-info.json` plan created by `add-upgrade --upgrade-height `. +`--upgrade-height` creates an `upgrade-info.json` file. This means if a chain upgrade via governance proposal is +executed before the specified height with `--upgrade-height`, the governance proposal will overwrite the +`upgrade-info.json` plan created by `add-upgrade --upgrade-height `. Take this into consideration when using `--upgrade-height`. ::: ### Auto-Download -Generally, `cosmovisor` requires that the system administrator place all relevant binaries on disk before the upgrade happens. However, for people who don't need such control and want an automated setup (maybe they are syncing a non-validating fullnode and want to do little maintenance), there is another option. +Generally, `cosmovisor` requires that the system administrator place all relevant binaries on disk before the upgrade +happens. However, for people who don't need such control and want an automated setup (maybe they are syncing a +non-validating fullnode and want to do little maintenance), there is another option. -**NOTE: we don't recommend using auto-download** because it doesn't verify in advance if a binary is available. If there will be any issue with downloading a binary, the cosmovisor will stop and won't restart an App (which could lead to a chain halt). +**NOTE: we don't recommend using auto-download** because it doesn't verify in advance if a binary is available. If there +will be any issue with downloading a binary, the cosmovisor will stop and won't restart an App (which could lead to a +chain halt). -If `DAEMON_ALLOW_DOWNLOAD_BINARIES` is set to `true`, and no local binary can be found when an upgrade is triggered, `cosmovisor` will attempt to download and install the binary itself based on the instructions in the `info` attribute in the `data/upgrade-info.json` file. The files is constructed by the x/upgrade module and contains data from the upgrade `Plan` object. The `Plan` has an info field that is expected to have one of the following two valid formats to specify a download: +If `DAEMON_ALLOW_DOWNLOAD_BINARIES` is set to `true`, and no local binary can be found when an upgrade is triggered, +`cosmovisor` will attempt to download and install the binary itself based on the instructions in the `info` attribute in +the `data/upgrade-info.json` file. The files is constructed by the x/upgrade module and contains data from the upgrade +`Plan` object. The `Plan` has an info field that is expected to have one of the following two valid formats to specify a +download: -1. Store an os/architecture -> binary URI map in the upgrade plan info field as JSON under the `"binaries"` key. For example: +1. Store an os/architecture -> binary URI map in the upgrade plan info field as JSON under the `"binaries"` key. For + example: ```json { @@ -214,7 +334,7 @@ If `DAEMON_ALLOW_DOWNLOAD_BINARIES` is set to `true`, and no local binary can be } ``` - You can include multiple binaries at once to ensure more than one environment will receive the correct binaries: + You can include multiple binaries at once to ensure more than one environment will receive the correct binaries: ```json { @@ -226,7 +346,7 @@ If `DAEMON_ALLOW_DOWNLOAD_BINARIES` is set to `true`, and no local binary can be } ``` - When submitting this as a proposal ensure there are no spaces. An example command using `gaiad` could look like: + When submitting this as a proposal ensure there are no spaces. An example command using `gaiad` could look like: ```shell > gaiad tx upgrade software-upgrade Vega \ @@ -243,15 +363,21 @@ If `DAEMON_ALLOW_DOWNLOAD_BINARIES` is set to `true`, and no local binary can be --yes ``` -2. Store a link to a file that contains all information in the above format (e.g. if you want to specify lots of binaries, changelog info, etc. without filling up the blockchain). For example: +2. Store a link to a file that contains all information in the above format (e.g. if you want to specify lots of + binaries, changelog info, etc. without filling up the blockchain). For example: ```text https://example.com/testnet-1001-info.json?checksum=sha256:deaaa99fda9407c4dbe1d04bd49bab0cc3c1dd76fa392cd55a9425be074af01e ``` -When `cosmovisor` is triggered to download the new binary, `cosmovisor` will parse the `"binaries"` field, download the new binary with [go-getter](https://github.com/hashicorp/go-getter), and unpack the new binary in the `upgrades/` folder so that it can be run as if it was installed manually. +When `cosmovisor` is triggered to download the new binary, `cosmovisor` will parse the `"binaries"` field, download the +new binary with [go-getter](https://github.com/hashicorp/go-getter), and unpack the new binary in the `upgrades/` +folder so that it can be run as if it was installed manually. -Note that for this mechanism to provide strong security guarantees, all URLs should include a SHA 256/512 checksum. This ensures that no false binary is run, even if someone hacks the server or hijacks the DNS. `go-getter` will always ensure the downloaded file matches the checksum if it is provided. `go-getter` will also handle unpacking archives into directories (in this case the download link should point to a `zip` file of all data in the `bin` directory). +Note that for this mechanism to provide strong security guarantees, all URLs should include a SHA 256/512 checksum. This +ensures that no false binary is run, even if someone hacks the server or hijacks the DNS. `go-getter` will always ensure +the downloaded file matches the checksum if it is provided. `go-getter` will also handle unpacking archives into +directories (in this case the download link should point to a `zip` file of all data in the `bin` directory). To properly create a sha256 checksum on linux, you can use the `sha256sum` utility. For example: @@ -261,7 +387,8 @@ sha256sum ./testdata/repo/zip_directory/autod.zip The result will look something like the following: `29139e1381b8177aec909fab9a75d11381cab5adf7d3af0c05ff1c9c117743a7`. -You can also use `sha512sum` if you would prefer to use longer hashes, or `md5sum` if you would prefer to use broken hashes. Whichever you choose, make sure to set the hash algorithm properly in the checksum argument to the URL. +You can also use `sha512sum` if you would prefer to use longer hashes, or `md5sum` if you would prefer to use broken +hashes. Whichever you choose, make sure to set the hash algorithm properly in the checksum argument to the URL. ### Preparing for an Upgrade @@ -297,7 +424,8 @@ INFO Upgrade preparation complete name=v1.0.0 height=1000000 ## Example: SimApp Upgrade -The following instructions provide a demonstration of `cosmovisor` using the simulation application (`simapp`) shipped with the Cosmos SDK's source code. The following commands are to be run from within the `cosmos-sdk` repository. +The following instructions provide a demonstration of `cosmovisor` using the simulation application (`simapp`) shipped +with the Cosmos SDK's source code. The following commands are to be run from within the `cosmos-sdk` repository. ### Chain Setup @@ -376,7 +504,9 @@ Update app to the latest version (e.g. v0.50.0). :::note -Migration plans are defined using the `x/upgrade` module and described in [In-Place Store Migrations](https://github.com/cosmos/cosmos-sdk/blob/main/docs/learn/advanced/15-upgrade.md). Migrations can perform any deterministic state change. +Migration plans are defined using the `x/upgrade` module and described +in [In-Place Store Migrations](https://github.com/cosmos/cosmos-sdk/blob/main/docs/learn/advanced/15-upgrade.md). +Migrations can perform any deterministic state change. The migration plan to upgrade the simapp from v0.47 to v0.50 is defined in `simapp/upgrade.go`. @@ -400,7 +530,8 @@ The migration name must match the one defined in the migration plan. cosmovisor add-upgrade v047-to-v050 ./build/simd ``` -Open a new terminal window and submit an upgrade proposal along with a deposit and a vote (these commands must be run within 20 seconds of each other): +Open a new terminal window and submit an upgrade proposal along with a deposit and a vote (these commands must be run +within 20 seconds of each other): ```shell ./build/simd tx upgrade software-upgrade v047-to-v050 --title upgrade --summary upgrade --upgrade-height 200 --upgrade-info "{}" --no-validate --from validator --yes @@ -408,4 +539,5 @@ Open a new terminal window and submit an upgrade proposal along with a deposit a ./build/simd tx gov vote 1 yes --from validator --yes ``` -The upgrade will occur automatically at height 200. Note: you may need to change the upgrade height in the snippet above if your test play takes more time. +The upgrade will occur automatically at height 200. Note: you may need to change the upgrade height in the snippet above +if your test play takes more time. diff --git a/tools/cosmovisor/RELEASE_NOTES.md b/tools/cosmovisor/RELEASE_NOTES.md deleted file mode 100644 index e840af68b7cb..000000000000 --- a/tools/cosmovisor/RELEASE_NOTES.md +++ /dev/null @@ -1,9 +0,0 @@ -# Cosmovisor v1.5.0 Release Notes - -See the [CHANGELOG](https://github.com/cosmos/cosmos-sdk/blob/tools/cosmovisor/v1.5.0/tools/cosmovisor/CHANGELOG.md) for details on the changes in v1.5.0. - -## Installation instructions - -```go -go install cosmossdk.io/tools/cosmovisor/cmd/cosmovisor@latest -``` diff --git a/tools/cosmovisor/args.go b/tools/cosmovisor/args.go index f5418a5b2cbf..3292a85d413d 100644 --- a/tools/cosmovisor/args.go +++ b/tools/cosmovisor/args.go @@ -1,7 +1,7 @@ package cosmovisor import ( - "encoding/json" + "bytes" "errors" "fmt" "io" @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/cosmos/gogoproto/jsonpb" "github.com/pelletier/go-toml/v2" "github.com/spf13/viper" @@ -35,6 +36,7 @@ const ( EnvInterval = "DAEMON_POLL_INTERVAL" EnvPreupgradeMaxRetries = "DAEMON_PREUPGRADE_MAX_RETRIES" EnvGRPCAddress = "DAEMON_GRPC_ADDRESS" + EnvRPCAddress = "DAEMON_RPC_ADDRESS" EnvDisableLogs = "COSMOVISOR_DISABLE_LOGS" EnvColorLogs = "COSMOVISOR_COLOR_LOGS" EnvTimeFormatLogs = "COSMOVISOR_TIMEFORMAT_LOGS" @@ -65,15 +67,18 @@ type Config struct { UnsafeSkipBackup bool `toml:"unsafe_skip_backup" mapstructure:"unsafe_skip_backup" default:"false"` DataBackupPath string `toml:"daemon_data_backup_dir" mapstructure:"daemon_data_backup_dir"` PreUpgradeMaxRetries int `toml:"daemon_preupgrade_max_retries" mapstructure:"daemon_preupgrade_max_retries" default:"0"` + RPCAddress string `toml:"daemon_rpc_address" mapstructure:"daemon_rpc_address" default:"http://localhost:26657"` GRPCAddress string `toml:"daemon_grpc_address" mapstructure:"daemon_grpc_address"` DisableLogs bool `toml:"cosmovisor_disable_logs" mapstructure:"cosmovisor_disable_logs" default:"false"` ColorLogs bool `toml:"cosmovisor_color_logs" mapstructure:"cosmovisor_color_logs" default:"true"` TimeFormatLogs string `toml:"cosmovisor_timeformat_logs" mapstructure:"cosmovisor_timeformat_logs" default:"kitchen"` CustomPreUpgrade string `toml:"cosmovisor_custom_preupgrade" mapstructure:"cosmovisor_custom_preupgrade" default:""` DisableRecase bool `toml:"cosmovisor_disable_recase" mapstructure:"cosmovisor_disable_recase" default:"false"` - - // currently running upgrade - currentUpgrade upgradetypes.Plan + // MaxRestartRetries is the maximum number of times + // to restart the binary after spurious shutdowns, + // (those not due to valid upgrades or halt heights changes). + // A value of 0 means no limit. + MaxRestartRetries int `toml:"max_restart_retries" mapstructure:"max_restart_retries" default:"5"` } // Root returns the root directory where all info lives @@ -107,9 +112,14 @@ func (cfg *Config) BaseUpgradeDir() string { return filepath.Join(cfg.Root(), upgradesDir) } +// UpgradeInfoDir is the directory where upgrade-info.json is expected to be created by `x/upgrade/keeper`. +func (cfg *Config) UpgradeInfoDir() string { + return filepath.Join(cfg.Home, "data") +} + // UpgradeInfoFilePath is the expected upgrade-info filename created by `x/upgrade/keeper`. func (cfg *Config) UpgradeInfoFilePath() string { - return filepath.Join(cfg.Home, "data", upgradetypes.UpgradeInfoFilename) + return filepath.Join(cfg.UpgradeInfoDir(), upgradetypes.UpgradeInfoFilename) } // UpgradeInfoBatchFilePath is the same as UpgradeInfoFilePath but with a batch suffix. @@ -297,6 +307,11 @@ func GetConfigFromEnv(skipValidate bool) (*Config, error) { cfg.GRPCAddress = "localhost:9090" } + cfg.RPCAddress = os.Getenv(EnvRPCAddress) + if cfg.RPCAddress == "" { + cfg.RPCAddress = "http://localhost:26657" + } + if !skipValidate { errs = append(errs, cfg.validate()...) if len(errs) > 0 { @@ -408,7 +423,6 @@ func (cfg *Config) SetCurrentUpgrade(u upgradetypes.Plan) (rerr error) { return fmt.Errorf("creating current symlink: %w", err) } - cfg.currentUpgrade = u f, err := os.Create(filepath.Join(cfg.Root(), upgrade, upgradetypes.UpgradeInfoFilename)) if err != nil { return err @@ -420,39 +434,88 @@ func (cfg *Config) SetCurrentUpgrade(u upgradetypes.Plan) (rerr error) { } }() - bz, err := json.Marshal(u) + out, err := (&jsonpb.Marshaler{}).MarshalToString(&u) if err != nil { return err } - _, err = f.Write(bz) + _, err = f.Write([]byte(out)) return err } -// UpgradeInfo returns the current upgrade info -func (cfg *Config) UpgradeInfo() (upgradetypes.Plan, error) { - if cfg.currentUpgrade.Name != "" { - return cfg.currentUpgrade, nil - } - - filename := filepath.Join(cfg.Root(), currentLink, upgradetypes.UpgradeInfoFilename) +// PendingUpgradeInfo returns pending upgrade info written by x/upgrade. +func (cfg *Config) PendingUpgradeInfo() (*upgradetypes.Plan, error) { + filename := cfg.UpgradeInfoFilePath() _, err := os.Lstat(filename) - var u upgradetypes.Plan var bz []byte if err != nil { // no current directory - goto returnError + return nil, fmt.Errorf("failed to read %q: %w", filename, err) } if bz, err = os.ReadFile(filename); err != nil { - goto returnError + return nil, fmt.Errorf("failed to read %q: %w", filename, err) + } + return cfg.ParseUpgradeInfo(bz) +} + +// CurrentBinaryUpgradeInfo returns the upgrade info for the current active binary, if any. +func (cfg *Config) CurrentBinaryUpgradeInfo() (*upgradetypes.Plan, error) { + filename := filepath.Join(cfg.Root(), currentLink, upgradetypes.UpgradeInfoFilename) + bz, err := os.ReadFile(filename) + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("failed to read %q: %w", filename, err) } - if err = json.Unmarshal(bz, &u); err != nil { - goto returnError + return cfg.ParseUpgradeInfo(bz) +} + +// CurrentBinaryUpgradeName returns the upgrade info for the current active binary, if any. +func (cfg *Config) CurrentBinaryUpgradeName() string { + upgradeInfo, err := cfg.CurrentBinaryUpgradeInfo() + if err != nil { + return "" } - cfg.currentUpgrade = u - return cfg.currentUpgrade, nil + if upgradeInfo == nil { + return "" + } + return upgradeInfo.Name +} + +// ParseUpgradeInfo parses the upgrade info from the given byte slice. +func (cfg *Config) ParseUpgradeInfo(bz []byte) (*upgradetypes.Plan, error) { + var upgradePlan upgradetypes.Plan + if err := jsonpb.Unmarshal(bytes.NewReader(bz), &upgradePlan); err != nil { + return nil, fmt.Errorf("error unmarshalling upgrade info: %w", err) + } + if err := upgradePlan.ValidateBasic(); err != nil { + return nil, fmt.Errorf("upgrade info failed validation upgrade info: %w", err) + } + if !cfg.DisableRecase { + upgradePlan.Name = strings.ToLower(upgradePlan.Name) + } + return &upgradePlan, nil +} + +const LastKnownHeightFile = ".last_known_height" + +func (cfg Config) ReadLastKnownHeight() uint64 { + filename := filepath.Join(cfg.UpgradeInfoDir(), LastKnownHeightFile) + bz, err := os.ReadFile(filename) + if err != nil { + return 0 + } + + h, err := strconv.ParseUint(string(bz), 10, 64) + if err != nil { + return 0 + } + + return h +} -returnError: - cfg.currentUpgrade.Name = "_" - return cfg.currentUpgrade, fmt.Errorf("failed to read %q: %w", filename, err) +func (cfg Config) WriteLastKnownHeight(height uint64) error { + filename := filepath.Join(cfg.UpgradeInfoDir(), LastKnownHeightFile) + return os.WriteFile(filename, []byte(strconv.FormatUint(height, 10)), 0o644) } // BooleanOption checks and validate env option @@ -575,7 +638,7 @@ func (cfg Config) DetailString() string { var sb strings.Builder sb.WriteString("Configurable Values:\n") for _, kv := range configEntries { - fmt.Fprintf(&sb, " %s: %s\n", kv.name, kv.value) + _, _ = fmt.Fprintf(&sb, " %s: %s\n", kv.name, kv.value) } sb.WriteString("Derived Values:\n") dnl := 0 @@ -586,7 +649,7 @@ func (cfg Config) DetailString() string { } dFmt := fmt.Sprintf(" %%%ds: %%s\n", dnl) for _, kv := range derivedEntries { - fmt.Fprintf(&sb, dFmt, kv.name, kv.value) + _, _ = fmt.Fprintf(&sb, dFmt, kv.name, kv.value) } return sb.String() } @@ -614,7 +677,9 @@ func (cfg Config) Export() (string, error) { // convert the time value to its format option cfg.TimeFormatLogs = ValueToTimeFormatOption(cfg.TimeFormatLogs) - defer file.Close() + defer func(file *os.File) { + _ = file.Close() + }(file) // write the configuration to the file err = toml.NewEncoder(file).Encode(cfg) diff --git a/tools/cosmovisor/args_test.go b/tools/cosmovisor/args_test.go index 6394bfa03400..ae2c83636de7 100644 --- a/tools/cosmovisor/args_test.go +++ b/tools/cosmovisor/args_test.go @@ -472,6 +472,7 @@ var newConfig = func( UnsafeSkipBackup: skipBackup, DataBackupPath: dataBackupPath, GRPCAddress: grpcAddress, + RPCAddress: "http://localhost:26657", PreUpgradeMaxRetries: preupgradeMaxRetries, DisableLogs: disableLogs, ColorLogs: colorLogs, diff --git a/tools/cosmovisor/cmd/cosmovisor/add_upgrade.go b/tools/cosmovisor/cmd/cosmovisor/add_upgrade.go index 742b53ca5ca4..350d5eb17b48 100644 --- a/tools/cosmovisor/cmd/cosmovisor/add_upgrade.go +++ b/tools/cosmovisor/cmd/cosmovisor/add_upgrade.go @@ -1,7 +1,6 @@ package main import ( - "encoding/json" "fmt" "os" "path" @@ -9,7 +8,7 @@ import ( "github.com/spf13/cobra" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" ) @@ -23,14 +22,14 @@ func NewAddUpgradeCmd() *cobra.Command { RunE: addUpgradeCmd, } - addUpgrade.Flags().Bool(cosmovisor.FlagForce, false, "overwrite existing upgrade binary / upgrade-info.json file") + addUpgrade.Flags().Bool(cosmovisor.FlagForce, false, "overwrite existing upgrade binary and plan with the same name") addUpgrade.Flags().Int64(cosmovisor.FlagUpgradeHeight, 0, "define a height at which to upgrade the binary automatically (without governance proposal)") return addUpgrade } // addUpgrade adds upgrade info to manifest -func addUpgrade(cfg *cosmovisor.Config, force bool, upgradeHeight int64, upgradeName, executablePath, upgradeInfoPath string) error { +func addUpgrade(cfg *cosmovisor.Config, force bool, upgradeHeight int64, upgradeName, executablePath string) (*upgradetypes.Plan, error) { logger := cfg.Logger(os.Stdout) if !cfg.DisableRecase { @@ -39,65 +38,40 @@ func addUpgrade(cfg *cosmovisor.Config, force bool, upgradeHeight int64, upgrade if _, err := os.Stat(executablePath); err != nil { if os.IsNotExist(err) { - return fmt.Errorf("invalid executable path: %w", err) + return nil, fmt.Errorf("invalid executable path: %w", err) } - return fmt.Errorf("failed to load executable path: %w", err) + return nil, fmt.Errorf("failed to load executable path: %w", err) } // create upgrade dir upgradeLocation := cfg.UpgradeDir(upgradeName) if err := os.MkdirAll(path.Join(upgradeLocation, "bin"), 0o755); err != nil { - return fmt.Errorf("failed to create upgrade directory: %w", err) + return nil, fmt.Errorf("failed to create upgrade directory: %w", err) } // copy binary to upgrade dir executableData, err := os.ReadFile(executablePath) if err != nil { - return fmt.Errorf("failed to read binary: %w", err) + return nil, fmt.Errorf("failed to read binary: %w", err) } if err := saveOrAbort(cfg.UpgradeBin(upgradeName), executableData, force); err != nil { - return err + return nil, err } logger.Info(fmt.Sprintf("Using %s for %s upgrade", executablePath, upgradeName)) logger.Info(fmt.Sprintf("Upgrade binary located at %s", cfg.UpgradeBin(upgradeName))) + var plan *upgradetypes.Plan if upgradeHeight > 0 { - plan := upgradetypes.Plan{Name: upgradeName, Height: upgradeHeight} - if err := plan.ValidateBasic(); err != nil { - panic(fmt.Errorf("something is wrong with cosmovisor: %w", err)) - } - - // create upgrade-info.json file - planData, err := json.Marshal(plan) - if err != nil { - return fmt.Errorf("failed to marshal upgrade plan: %w", err) - } - - if err := saveOrAbort(upgradeInfoPath, planData, force); err != nil { - return err + plan = &upgradetypes.Plan{ + Name: upgradeName, + Height: upgradeHeight, } - - logger.Info(fmt.Sprintf("%s created, %s upgrade binary will switch at height %d", upgradeInfoPath, upgradeName, upgradeHeight)) - } - - return nil -} - -// GetConfig returns a Config using passed-in flag -func getConfigFromCmd(cmd *cobra.Command) (*cosmovisor.Config, error) { - configPath, err := cmd.Flags().GetString(cosmovisor.FlagCosmovisorConfig) - if err != nil { - return nil, fmt.Errorf("failed to get config flag: %w", err) } - cfg, err := cosmovisor.GetConfigFromFile(configPath) - if err != nil { - return nil, err - } - return cfg, nil + return plan, nil } // addUpgradeCmd parses input flags and adds upgrade info to manifest @@ -119,7 +93,14 @@ func addUpgradeCmd(cmd *cobra.Command, args []string) error { return fmt.Errorf("failed to get upgrade-height flag: %w", err) } - return addUpgrade(cfg, force, upgradeHeight, upgradeName, executablePath, cfg.UpgradeInfoFilePath()) + plan, err := addUpgrade(cfg, force, upgradeHeight, upgradeName, executablePath) + if err != nil { + return err + } + if plan == nil { + return nil // No plan to add + } + return cfg.AddManualUpgrades(force, plan) } // saveOrAbort saves data to path or aborts if file exists and force is false diff --git a/tools/cosmovisor/cmd/cosmovisor/batch_upgrade.go b/tools/cosmovisor/cmd/cosmovisor/batch_upgrade.go index a66f65b406ab..99dd6ffa8c15 100644 --- a/tools/cosmovisor/cmd/cosmovisor/batch_upgrade.go +++ b/tools/cosmovisor/cmd/cosmovisor/batch_upgrade.go @@ -2,7 +2,6 @@ package main import ( "encoding/csv" - "encoding/json" "fmt" "os" "path/filepath" @@ -11,7 +10,9 @@ import ( "github.com/spf13/cobra" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" + + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" ) func NewBatchAddUpgradeCmd() *cobra.Command { @@ -49,7 +50,7 @@ cosmovisor add-batch-upgrade --upgrade-file /path/to/batch_upgrade.csv`, // addBatchUpgrade takes in multiple specified upgrades and creates a single // batch upgrade file out of them -func addBatchUpgrade(cmd *cobra.Command, args []string) error { +func addBatchUpgrade(cmd *cobra.Command, _ []string) error { cfg, err := getConfigFromCmd(cmd) if err != nil { return err @@ -71,7 +72,7 @@ func addBatchUpgrade(cmd *cobra.Command, args []string) error { // processUpgradeList takes in a list of upgrades and creates a batch upgrade file func processUpgradeList(cfg *cosmovisor.Config, upgradeList [][]string) error { - upgradeInfoPaths := []string{} + var upgradePlans []*upgradetypes.Plan for i, upgrade := range upgradeList { if len(upgrade) != 3 { return fmt.Errorf("argument at position %d (%s) is invalid", i, upgrade) @@ -82,43 +83,18 @@ func processUpgradeList(cfg *cosmovisor.Config, upgradeList [][]string) error { if err != nil { return fmt.Errorf("upgrade height at position %d (%s) is invalid", i, upgrade[2]) } - upgradeInfoPath := cfg.UpgradeInfoFilePath() + "." + upgradeName - upgradeInfoPaths = append(upgradeInfoPaths, upgradeInfoPath) - if err := addUpgrade(cfg, true, upgradeHeight, upgradeName, upgradePath, upgradeInfoPath); err != nil { - return err - } - } - var allData []json.RawMessage - for _, uip := range upgradeInfoPaths { - fileData, err := os.ReadFile(uip) + // we use the same logic as the add-upgrade command here, appending to any existing manual upgrade data + plan, err := addUpgrade(cfg, true, upgradeHeight, upgradeName, upgradePath) if err != nil { - return fmt.Errorf("error reading file %s: %w", uip, err) + return err } - - // Verify it's valid JSON - var jsonData json.RawMessage - if err := json.Unmarshal(fileData, &jsonData); err != nil { - return fmt.Errorf("error parsing JSON from file %s: %w", uip, err) + if plan != nil { + upgradePlans = append(upgradePlans, plan) } - - // Add to our slice - allData = append(allData, jsonData) } - // Marshal the combined data - batchData, err := json.MarshalIndent(allData, "", " ") - if err != nil { - return fmt.Errorf("error marshaling combined JSON: %w", err) - } - - // Write to output file - err = os.WriteFile(cfg.UpgradeInfoBatchFilePath(), batchData, 0o600) - if err != nil { - return fmt.Errorf("error writing combined JSON to file: %w", err) - } - - return nil + return cfg.AddManualUpgrades(true, upgradePlans...) } // processUpgradeFile takes in a CSV batch upgrade file, parses it and calls processUpgradeList @@ -127,7 +103,9 @@ func processUpgradeFile(cfg *cosmovisor.Config, upgradeFile string) error { if err != nil { return fmt.Errorf("error opening upgrade CSV file %s: %w", upgradeFile, err) } - defer file.Close() + defer func(file *os.File) { + _ = file.Close() + }(file) r := csv.NewReader(file) r.FieldsPerRecord = 3 diff --git a/tools/cosmovisor/cmd/cosmovisor/config.go b/tools/cosmovisor/cmd/cosmovisor/config.go index d651e5fb3839..cbdeaa2b661f 100644 --- a/tools/cosmovisor/cmd/cosmovisor/config.go +++ b/tools/cosmovisor/cmd/cosmovisor/config.go @@ -1,9 +1,11 @@ package main import ( + "fmt" + "github.com/spf13/cobra" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" ) var configCmd = &cobra.Command{ @@ -13,7 +15,7 @@ var configCmd = &cobra.Command{ otherwise it will display the config from the environment variables.`, SilenceUsage: true, RunE: func(cmd *cobra.Command, args []string) error { - cfg, err := cosmovisor.GetConfigFromFile(cmd.Flag(cosmovisor.FlagCosmovisorConfig).Value.String()) + cfg, err := getConfigFromCmd(cmd) if err != nil { return err } @@ -22,3 +24,17 @@ otherwise it will display the config from the environment variables.`, return nil }, } + +// getConfigFromCmd retrieves the cosmovisor configuration from the command flags. +func getConfigFromCmd(cmd *cobra.Command) (*cosmovisor.Config, error) { + configPath, err := cmd.Flags().GetString(cosmovisor.FlagCosmovisorConfig) + if err != nil { + return nil, fmt.Errorf("failed to get config flag: %w", err) + } + + cfg, err := cosmovisor.GetConfigFromFile(configPath) + if err != nil { + return nil, err + } + return cfg, nil +} diff --git a/tools/cosmovisor/cmd/cosmovisor/help.go b/tools/cosmovisor/cmd/cosmovisor/help.go index 6d6df59cef7b..10d8f29b65f9 100644 --- a/tools/cosmovisor/cmd/cosmovisor/help.go +++ b/tools/cosmovisor/cmd/cosmovisor/help.go @@ -3,7 +3,7 @@ package main import ( "fmt" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" ) // GetHelpText creates the help text multi-line string. diff --git a/tools/cosmovisor/cmd/cosmovisor/help_test.go b/tools/cosmovisor/cmd/cosmovisor/help_test.go index 48d7aa148436..b73b48f04c19 100644 --- a/tools/cosmovisor/cmd/cosmovisor/help_test.go +++ b/tools/cosmovisor/cmd/cosmovisor/help_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" ) func TestGetHelpText(t *testing.T) { diff --git a/tools/cosmovisor/cmd/cosmovisor/init.go b/tools/cosmovisor/cmd/cosmovisor/init.go index 56717ec28a6f..06f6408aa7a7 100644 --- a/tools/cosmovisor/cmd/cosmovisor/init.go +++ b/tools/cosmovisor/cmd/cosmovisor/init.go @@ -10,7 +10,7 @@ import ( "github.com/spf13/cobra" "cosmossdk.io/log" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" "github.com/cosmos/cosmos-sdk/x/upgrade/plan" ) @@ -20,7 +20,9 @@ func NewInitCmd() *cobra.Command { Use: "init ", Short: "Initialize a cosmovisor daemon home directory.", Long: `Initialize a cosmovisor daemon home directory with the provided executable. -Configuration file is initialized at the default path (<-home->/cosmovisor/config.toml).`, +Configuration file is initialized at the default path (<-home->/cosmovisor/config.toml). + +The DAEMON_HOME and DAEMON_NAME environment variables must be set for this command to work.`, Args: cobra.ExactArgs(1), SilenceUsage: true, RunE: func(cmd *cobra.Command, args []string) error { diff --git a/tools/cosmovisor/cmd/cosmovisor/init_test.go b/tools/cosmovisor/cmd/cosmovisor/init_test.go index 35a73348cc39..6862bc076fe4 100644 --- a/tools/cosmovisor/cmd/cosmovisor/init_test.go +++ b/tools/cosmovisor/cmd/cosmovisor/init_test.go @@ -16,7 +16,7 @@ import ( "github.com/stretchr/testify/suite" "cosmossdk.io/log" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" ) const ( diff --git a/tools/cosmovisor/cmd/cosmovisor/main.go b/tools/cosmovisor/cmd/cosmovisor/main.go index 294dd66f7165..d5d54a7c7203 100644 --- a/tools/cosmovisor/cmd/cosmovisor/main.go +++ b/tools/cosmovisor/cmd/cosmovisor/main.go @@ -1,12 +1,11 @@ package main import ( - "context" "os" ) func main() { - if err := NewRootCmd().ExecuteContext(context.Background()); err != nil { + if err := NewRootCmd().Execute(); err != nil { os.Exit(1) } } diff --git a/tools/cosmovisor/cmd/cosmovisor/mockchain_test.go b/tools/cosmovisor/cmd/cosmovisor/mockchain_test.go new file mode 100644 index 000000000000..78e610fa3fd0 --- /dev/null +++ b/tools/cosmovisor/cmd/cosmovisor/mockchain_test.go @@ -0,0 +1,244 @@ +package main + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "cosmossdk.io/tools/cosmovisor/v2" + "cosmossdk.io/tools/cosmovisor/v2/internal" +) + +var mockNodeBinPath string + +func TestMain(m *testing.M) { + // build mock_node binary for tests + if err := buildMockNode(); err != nil { + fmt.Printf("Failed to build mock_node: %v\n", err) + os.Exit(1) + } + + // run tests + os.Exit(m.Run()) +} + +func buildMockNode() error { + wd, err := os.Getwd() + if err != nil { + return err + } + + // create build directory if it doesn't exist + buildDir := filepath.Join(wd, "build") + if err := os.MkdirAll(buildDir, 0o755); err != nil { + return err + } + + mockNodeDir := filepath.Join(wd, "..", "mock_node") + binPath := filepath.Join(buildDir, "mock_node") + + // store the absolute path for use in mockNodeWrapper + mockNodeBinPath, err = filepath.Abs(binPath) + if err != nil { + return err + } + + cmd := exec.Command("go", "build", "-o", binPath, ".") + cmd.Dir = mockNodeDir + return cmd.Run() +} + +type MockChainSetup struct { + Genesis string + GovUpgrades map[string]string + ManualUpgrades map[string]string // to be added with the add-upgrade command + Config *cosmovisor.Config +} + +func mockNodeWrapper(args string) string { + return fmt.Sprintf( + `#!/usr/bin/env bash +set -e + +echo "$@" +exec %s %s "$@" +`, mockNodeBinPath, args) +} + +func (m MockChainSetup) Setup(t *testing.T) (string, string) { + t.Helper() + dir, err := os.MkdirTemp("", "mockchain") + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(dir)) + }) + // create data directory + require.NoError(t, os.MkdirAll(filepath.Join(dir, "data"), 0o755)) + cosmovisorDir := filepath.Join(dir, "cosmovisor") + // create genesis wrapper + genDir := filepath.Join(cosmovisorDir, "genesis", "bin") + require.NoError(t, os.MkdirAll(genDir, 0o755)) + mockdPath := filepath.Join(genDir, "mockd") + require.NoError(t, + os.WriteFile(mockdPath, []byte(mockNodeWrapper(m.Genesis)), 0o755), + ) + // create gov upgrade wrappers + for name, args := range m.GovUpgrades { + upgradeDir := filepath.Join(cosmovisorDir, "upgrades", name, "bin") + require.NoError(t, os.MkdirAll(upgradeDir, 0o755)) + require.NoError(t, + os.WriteFile(filepath.Join(upgradeDir, "mockd"), + []byte(mockNodeWrapper(args)), 0o755), + ) + } + // create manual upgrade wrappers + manualUpgradeDir := filepath.Join(dir, "manual-upgrades") + require.NoError(t, os.MkdirAll(manualUpgradeDir, 0o755)) + for name, args := range m.ManualUpgrades { + filename := filepath.Join(manualUpgradeDir, name) + require.NoError(t, os.WriteFile(filename, []byte(mockNodeWrapper(args)), 0o755)) + } + + // update config and save it + if m.Config == nil { + m.Config = &cosmovisor.Config{} + } + m.Config.Name = "mockd" + m.Config.Home = dir + m.Config.DataBackupPath = dir + cfgFile, err := m.Config.Export() + require.NoError(t, err) + t.Logf("Cosmovisor config: %s", cfgFile) + + return dir, cfgFile +} + +func TestMockChain(t *testing.T) { + pollInterval := time.Second + cfg := &cosmovisor.Config{ + PollInterval: pollInterval, + RestartAfterUpgrade: true, + RPCAddress: "http://localhost:26657", + } + mockchainDir, cfgFile := MockChainSetup{ + Genesis: "--block-time 1s --upgrade-plan '{\"name\":\"gov1\",\"height\":30}'", + GovUpgrades: map[string]string{ + "gov1": "--block-time 1s --upgrade-plan '{\"name\":\"gov2\",\"height\":50}'", + "gov2": "--block-time 1s --upgrade-plan '{\"name\":\"gov3\",\"height\":70}'", + }, + ManualUpgrades: map[string]string{ + "manual10": "--block-time 1s --upgrade-plan '{\"name\":\"gov1\",\"height\":30}'", + "manual20": `--block-time 1s --upgrade-plan '{"name":"gov1","height":30}' --block-url "/v1/block" --shutdown-on-upgrade`, + "manual40": "--block-time 1s --upgrade-plan '{\"name\":\"gov2\",\"height\":50}' --upgrade-info-encoding-json", + }, + Config: cfg, + }.Setup(t) + + addManualUpgrade1 := func() { + time.Sleep(pollInterval * 3) // wait a bit + rootCmd := NewRootCmd() + rootCmd.SetArgs([]string{ + "add-upgrade", + "manual20", + filepath.Join(mockchainDir, "manual-upgrades", "manual20"), + "--upgrade-height", + "20", + "--cosmovisor-config", + cfgFile, + }) + rootCmd.SetOut(os.Stdout) + rootCmd.SetErr(os.Stderr) + require.NoError(t, rootCmd.Execute()) + } + + addManualUpgrade2 := func() { + batchInfo := fmt.Sprintf(`manual10:%s:10,manual40:%s:40`, + filepath.Join(mockchainDir, "manual-upgrades", "manual10"), + filepath.Join(mockchainDir, "manual-upgrades", "manual40"), + ) + time.Sleep(2 * time.Second) // wait for startup + rootCmd := NewRootCmd() + rootCmd.SetArgs([]string{ + "add-batch-upgrade", + "--upgrade-list", + batchInfo, + "--cosmovisor-config", + cfgFile, + }) + rootCmd.SetOut(os.Stdout) + rootCmd.SetErr(os.Stderr) + require.NoError(t, rootCmd.Execute()) + } + + execCtx, cancel := context.WithCancel(context.Background()) + defer cancel() // always cancel the context to make sure the sub-process shuts down + + var callbackCount int + testCallback := func() { + callbackCount++ + t.Logf("Test callback called for the %dth time", callbackCount) + currentBin, err := cfg.CurrentBin() + require.NoError(t, err) + switch callbackCount { + case 1: + // first startup + // we should be starting with the genesis binary + require.Contains(t, currentBin, "genesis") + // add one manual upgrade + go addManualUpgrade1() + case 2: + // first restart once we've add the first manual upgrade + // ensure that the binary is still the genesis binary + require.Contains(t, currentBin, "genesis") + // add a second batch of manual upgrades + go addManualUpgrade2() + case 3: + // next restart after adding more manual upgrades + // ensure that the binary is still the genesis binary + require.Contains(t, currentBin, "genesis") + case 4: + // should have upgraded to manual10 + require.Contains(t, currentBin, "manual10") + case 5: + // should have upgraded to manual20 + require.Contains(t, currentBin, "manual20") + case 6: + // should have upgraded to gov1 + require.Contains(t, currentBin, "gov1") + case 7: + // should have upgraded to manual40 + require.Contains(t, currentBin, "manual40") + case 8: + // should have upgraded to gov2 + require.Contains(t, currentBin, "gov2") + // this is the end of our test so we shutdown after a bit here + go func() { + time.Sleep(pollInterval * 2) + cancel() + }() + default: + t.Errorf("Unexpected callback count: %d", callbackCount) + } + } + var wg sync.WaitGroup + wg.Add(1) + go func() { + rootCmd := NewRootCmd() + rootCmd.SetArgs([]string{"run", "--home", mockchainDir, "--cosmovisor-config", cfgFile}) + rootCmd.SetOut(os.Stdout) + rootCmd.SetErr(os.Stderr) + execCtx = internal.WithTestCallback(execCtx, testCallback) + require.NoError(t, rootCmd.ExecuteContext(execCtx)) + wg.Done() + }() + wg.Wait() + + require.Equal(t, 8, callbackCount) +} diff --git a/tools/cosmovisor/cmd/cosmovisor/prepare_upgrade.go b/tools/cosmovisor/cmd/cosmovisor/prepare_upgrade.go index b138fb271a50..00f85f06a387 100644 --- a/tools/cosmovisor/cmd/cosmovisor/prepare_upgrade.go +++ b/tools/cosmovisor/cmd/cosmovisor/prepare_upgrade.go @@ -12,7 +12,7 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2/internal" "github.com/cosmos/cosmos-sdk/x/upgrade/plan" upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" @@ -34,14 +34,9 @@ gRPC must be enabled on the node for this command to work.`, } func prepareUpgradeHandler(cmd *cobra.Command, _ []string) error { - configPath, err := cmd.Flags().GetString(cosmovisor.FlagCosmovisorConfig) + cfg, err := getConfigFromCmd(cmd) if err != nil { - return fmt.Errorf("failed to get config flag: %w", err) - } - - cfg, err := cosmovisor.GetConfigFromFile(configPath) - if err != nil { - return fmt.Errorf("failed to get config: %w", err) + return err } logger := cfg.Logger(cmd.OutOrStdout()) @@ -66,7 +61,7 @@ func prepareUpgradeHandler(cmd *cobra.Command, _ []string) error { return fmt.Errorf("failed to parse upgrade info: %w", err) } - binaryURL, err := cosmovisor.GetBinaryURL(upgradeInfoParsed.Binaries) + binaryURL, err := internal.GetBinaryURL(upgradeInfoParsed.Binaries) if err != nil { return fmt.Errorf("binary URL not found in upgrade plan. Cannot prepare for upgrade: %w", err) } diff --git a/tools/cosmovisor/cmd/cosmovisor/root.go b/tools/cosmovisor/cmd/cosmovisor/root.go index 92b1af2e11cf..3481af5ce139 100644 --- a/tools/cosmovisor/cmd/cosmovisor/root.go +++ b/tools/cosmovisor/cmd/cosmovisor/root.go @@ -3,7 +3,7 @@ package main import ( "github.com/spf13/cobra" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" ) func NewRootCmd() *cobra.Command { @@ -19,7 +19,7 @@ func NewRootCmd() *cobra.Command { configCmd, NewVersionCmd(), NewAddUpgradeCmd(), - NewShowUpgradeInfoCmd(), + NewShowManualUpgradesCmd(), NewBatchAddUpgradeCmd(), NewPrepareUpgradeCmd(), ) diff --git a/tools/cosmovisor/cmd/cosmovisor/run.go b/tools/cosmovisor/cmd/cosmovisor/run.go index 4da2ac24d846..a7850881539a 100644 --- a/tools/cosmovisor/cmd/cosmovisor/run.go +++ b/tools/cosmovisor/cmd/cosmovisor/run.go @@ -1,13 +1,18 @@ package main import ( + "context" "fmt" "os" + "os/signal" "strings" + "syscall" + "time" "github.com/spf13/cobra" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" + "cosmossdk.io/tools/cosmovisor/v2/internal" ) var runCmd = &cobra.Command{ @@ -18,23 +23,35 @@ Provide '--cosmovisor-config' file path in command args or set env variables to `, SilenceUsage: true, DisableFlagParsing: true, - RunE: func(_ *cobra.Command, args []string) error { + RunE: func(cmd *cobra.Command, args []string) error { cfgPath, args, err := parseCosmovisorConfig(args) if err != nil { return fmt.Errorf("failed to parse cosmovisor config: %w", err) } - return run(cfgPath, args) + return run(cmd.Context(), cfgPath, args) }, } // run runs the configured program with the given args and monitors it for upgrades. -func run(cfgPath string, args []string, options ...RunOption) error { +func run(ctx context.Context, cfgPath string, args []string, options ...RunOption) error { cfg, err := cosmovisor.GetConfigFromFile(cfgPath) if err != nil { return err } + ctx, _ = signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM) + shutdownChan := make(chan os.Signal, 1) + signal.Notify(shutdownChan, syscall.SIGINT, syscall.SIGTERM) + // ensure we shutdown if the process is killed and context cancellation doesn't cause an exit on its own + go func() { + <-shutdownChan + fmt.Println("Received shutdown signal, exiting gracefully...") + time.Sleep(cfg.ShutdownGrace) + fmt.Println("Forcing process shutdown") + os.Exit(0) + }() + runCfg := DefaultRunConfig for _, opt := range options { opt(&runCfg) @@ -47,23 +64,8 @@ func run(cfgPath string, args []string, options ...RunOption) error { } logger := cfg.Logger(runCfg.StdOut) - launcher, err := cosmovisor.NewLauncher(logger, cfg) - if err != nil { - return err - } - - doUpgrade, err := launcher.Run(args, runCfg.StdIn, runCfg.StdOut, runCfg.StdErr) - // if RestartAfterUpgrade, we launch after a successful upgrade (given that condition launcher.Run returns nil) - for cfg.RestartAfterUpgrade && err == nil && doUpgrade { - logger.Info("upgrade detected, relaunching", "app", cfg.Name) - doUpgrade, err = launcher.Run(args, runCfg.StdIn, runCfg.StdOut, runCfg.StdErr) - } - - if doUpgrade && err == nil { - logger.Info("upgrade detected, DAEMON_RESTART_AFTER_UPGRADE is off. Verify new upgrade and start cosmovisor again.") - } - - return err + runner := internal.NewRunner(cfg, runCfg, logger) + return runner.Start(ctx, args) } func parseCosmovisorConfig(args []string) (string, []string, error) { diff --git a/tools/cosmovisor/cmd/cosmovisor/run_config.go b/tools/cosmovisor/cmd/cosmovisor/run_config.go index f025b06eb619..91a1f60bfaf6 100644 --- a/tools/cosmovisor/cmd/cosmovisor/run_config.go +++ b/tools/cosmovisor/cmd/cosmovisor/run_config.go @@ -3,6 +3,8 @@ package main import ( "io" "os" + + "cosmossdk.io/tools/cosmovisor/v2/internal" ) // DefaultRunConfig defintes a default RunConfig that writes to os.Stdout and os.Stderr @@ -12,12 +14,7 @@ var DefaultRunConfig = RunConfig{ StdErr: os.Stderr, } -// RunConfig defines the configuration for running a command -type RunConfig struct { - StdIn io.Reader - StdOut io.Writer - StdErr io.Writer -} +type RunConfig = internal.RunConfig type RunOption func(*RunConfig) diff --git a/tools/cosmovisor/cmd/cosmovisor/show_upgrade.go b/tools/cosmovisor/cmd/cosmovisor/show_upgrade.go index aa37fa36d4e7..b84d3d77ba8e 100644 --- a/tools/cosmovisor/cmd/cosmovisor/show_upgrade.go +++ b/tools/cosmovisor/cmd/cosmovisor/show_upgrade.go @@ -1,41 +1,35 @@ package main import ( + "encoding/json" "fmt" - "os" "github.com/spf13/cobra" - - "cosmossdk.io/tools/cosmovisor" ) -func NewShowUpgradeInfoCmd() *cobra.Command { +func NewShowManualUpgradesCmd() *cobra.Command { return &cobra.Command{ - Use: "show-upgrade-info", - Short: "Display current upgrade-info.json from data directory", + Use: "show-manual-upgrades", + Short: "Display planned manual upgrades", SilenceUsage: false, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { - configPath, err := cmd.Flags().GetString(cosmovisor.FlagCosmovisorConfig) + cfg, err := getConfigFromCmd(cmd) if err != nil { - return fmt.Errorf("failed to get config flag: %w", err) + return err } - cfg, err := cosmovisor.GetConfigFromFile(configPath) + data, err := cfg.ReadManualUpgrades() if err != nil { - return err + return fmt.Errorf("failed to read upgrade-info.json.batch: %w", err) } - data, err := os.ReadFile(cfg.UpgradeInfoFilePath()) + bz, err := json.MarshalIndent(data, "", " ") if err != nil { - if os.IsNotExist(err) { - cmd.Printf("No upgrade info found at %s\n", cfg.UpgradeInfoFilePath()) - return nil - } - return fmt.Errorf("failed to read upgrade-info.json: %w", err) + return fmt.Errorf("failed to marshal manual upgrade info as json: %w", err) } - cmd.Println(string(data)) + cmd.Println(string(bz)) return nil }, } diff --git a/tools/cosmovisor/cmd/cosmovisor/version.go b/tools/cosmovisor/cmd/cosmovisor/version.go index a51b376355af..501aeb469562 100644 --- a/tools/cosmovisor/cmd/cosmovisor/version.go +++ b/tools/cosmovisor/cmd/cosmovisor/version.go @@ -8,7 +8,7 @@ import ( "github.com/spf13/cobra" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" ) func NewVersionCmd() *cobra.Command { @@ -47,7 +47,7 @@ func printVersion(cmd *cobra.Command, args []string, noAppVersion bool) error { return nil } - if err := run("", append([]string{"version"}, args...)); err != nil { + if err := run(cmd.Context(), "", append([]string{"version"}, args...)); err != nil { return fmt.Errorf("failed to run version command: %w", err) } @@ -62,6 +62,7 @@ func printVersionJSON(cmd *cobra.Command, args []string, noAppVersion bool) erro buf := new(strings.Builder) if err := run( + cmd.Context(), "", []string{"version", "--long", "--output", "json"}, StdOutRunOption(buf), diff --git a/tools/cosmovisor/cmd/mock_node/main.go b/tools/cosmovisor/cmd/mock_node/main.go new file mode 100644 index 000000000000..b183e2e98076 --- /dev/null +++ b/tools/cosmovisor/cmd/mock_node/main.go @@ -0,0 +1,224 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "os" + "os/signal" + "path" + "strconv" + "syscall" + "time" + + "github.com/cosmos/gogoproto/jsonpb" + "github.com/spf13/cobra" + + "cosmossdk.io/log" + "cosmossdk.io/tools/cosmovisor/v2/internal/watchers" + + "github.com/cosmos/cosmos-sdk/server" + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" +) + +func main() { + cmd := &cobra.Command{ + Use: "mock_node", + Short: "A mock node for testing cosmovisor.", + Long: `The --halt-interval flag is required and must be specified in order to halt the node. +The --upgrade-plan and --halt-height flags are mutually exclusive. It is an error to specify both. +Based on which flag is specified the node will either exhibit --halt-height before or +x/upgrade upgrade-info.json behavior.`, + } + var blockTime time.Duration + var upgradePlan string + var haltHeight uint64 + var homePath string + var httpAddr string + var blockUrl string + var shutdownDelay time.Duration + var shutdownOnUpgrade bool + var upgradeInfoEncodingJson bool + cmd.Flags().DurationVar(&blockTime, "block-time", 0, "Duration of time between blocks. This is required to simulate a progression of blocks over time.") + cmd.Flags().StringVar(&upgradePlan, "upgrade-plan", "", "upgrade-info.json to create after the halt duration is reached. Either this flag or --halt-height must be specified but not both.") + cmd.Flags().Uint64Var(&haltHeight, server.FlagHaltHeight, 0, "Block height at which to gracefully halt the chain and shutdown the node. E") + cmd.Flags().StringVar(&homePath, "home", "", "Home directory for the mock node. upgrade-info.json will be written to the data sub-directory of this directory. Defaults to the current directory.") + cmd.Flags().StringVar(&httpAddr, "http-addr", ":26657", "HTTP server address to serve block information. Defaults to :26657.") + cmd.Flags().StringVar(&blockUrl, "block-url", "/block", "URL at which the latest block information is served. Defaults to /block.") + cmd.Flags().DurationVar(&shutdownDelay, "shutdown-delay", 0, "Duration to wait before shutting down the node upon receiving a shutdown signal. Defaults to 0 (no delay).") + cmd.Flags().BoolVar(&shutdownOnUpgrade, "shutdown-on-upgrade", false, "If true, the node will shutdown immediately after reaching the upgrade height. If false, it will continue running until a shutdown signal is received. Defaults to false.") + cmd.Flags().BoolVar(&upgradeInfoEncodingJson, "upgrade-info-encoding-json", false, "If true, the upgrade-info.json will be encoded using encoding/json instead of jsonpb. This is useful for testing compatibility with different JSON decoders. Defaults to false (uses jsonpb).") + cmd.RunE = func(cmd *cobra.Command, args []string) error { + if upgradePlan == "" && haltHeight == 0 { + return fmt.Errorf("must specify either --upgrade-plan or --halt-height") + } + if blockTime == 0 { + return fmt.Errorf("must specify --block-time") + } + if homePath == "" { + var err error + homePath, err = os.Getwd() // Default to current working directory if not specified + if err != nil { + return fmt.Errorf("unable to determine current working directory: %w", err) + } + } + node := &MockNode{ + height: 0, + blockTime: blockTime, + haltHeight: haltHeight, + homePath: homePath, + httpAddr: httpAddr, + blockUrl: blockUrl, + shutdownDelay: shutdownDelay, + shutdownOnUpgrade: shutdownOnUpgrade, + upgradeInfoEncodingJson: upgradeInfoEncodingJson, + logger: log.NewLogger(os.Stdout), + } + if upgradePlan != "" { + node.upgradePlan = &upgradetypes.Plan{} + err := jsonpb.Unmarshal(bytes.NewBufferString(upgradePlan), node.upgradePlan) + if err != nil { + return fmt.Errorf("unable to parse upgrade plan: %w", err) + } + if err := node.upgradePlan.ValidateBasic(); err != nil { + return fmt.Errorf("invalid upgrade plan: %w", err) + } + } + return node.Run(cmd.Context()) + } + if err := cmd.Execute(); err != nil { + panic(err) + } +} + +type MockNode struct { + height uint64 + blockTime time.Duration + upgradePlan *upgradetypes.Plan + haltHeight uint64 + homePath string + httpAddr string + blockUrl string + logger log.Logger + shutdownDelay time.Duration + shutdownOnUpgrade bool + upgradeInfoEncodingJson bool +} + +func (n *MockNode) Run(ctx context.Context) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + ctx, _ = signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM) + upgradeHeight := n.haltHeight + if n.upgradePlan != nil { + upgradePlanHeight := uint64(n.upgradePlan.Height) + if upgradeHeight == 0 || upgradePlanHeight < upgradeHeight { + upgradeHeight = upgradePlanHeight + } + } + + actualHeightFile := path.Join(n.homePath, "data", "actual-height") + // try to read the actual-height file if it exists + if bz, err := os.ReadFile(actualHeightFile); err == nil { + n.logger.Info("Reading existing height", "height", string(bz)) + n.height, err = strconv.ParseUint(string(bz), 10, 64) + if err != nil { + return fmt.Errorf("failed to parse actual height from file: %w", err) + } + } + + n.logger.Info("Starting mock node", "start_height", n.height, "block_time", n.blockTime, "upgrade_plan", n.upgradePlan, "halt_height", n.haltHeight) + srv := n.startHTTPServer() + ticker := time.NewTicker(n.blockTime) + defer ticker.Stop() + for n.height < upgradeHeight { + n.logger.Info("Processed mock block", "height", n.height) + select { + case <-ctx.Done(): + n.logger.Info("Received shutdown signal, stopping node") + if err := srv.Shutdown(ctx); err != nil { + n.logger.Error("Error shutting down HTTP server", "err", err) + } + if n.shutdownDelay > 0 { + n.logger.Info("Waiting for shutdown delay", "delay", n.shutdownDelay) + time.Sleep(n.shutdownDelay) + } + return nil + case <-ticker.C: + n.height++ + // Write the current height to the actual-height file + err := os.WriteFile(actualHeightFile, []byte(fmt.Sprintf("%d", n.height)), 0o644) + if err != nil { + return fmt.Errorf("failed to write actual height to file: %w", err) + } + } + } + if n.haltHeight == upgradeHeight { // if we have a halt height and we've reached it - there could be an earlier gov upgrade + // this log line matches what BaseApp does when it reaches the halt height + n.logger.Error(fmt.Sprintf("halt per configuration height %d", n.height)) + } else if n.upgradePlan != nil { + n.logger.Info("Mock node reached upgrade height, writing upgrade-info.json", "upgrade_plan", n.upgradePlan) + upgradeInfoPath := path.Join(n.homePath, "data", upgradetypes.UpgradeInfoFilename) + var out string + var err error + if n.upgradeInfoEncodingJson { + var bz []byte + bz, err = json.Marshal(n.upgradePlan) + out = string(bz) + } else { + out, err = (&jsonpb.Marshaler{ + EmitDefaults: false, + }).MarshalToString(n.upgradePlan) + } + if err != nil { + return fmt.Errorf("failed to marshal upgrade plan: %w", err) + } + err = os.MkdirAll(path.Dir(upgradeInfoPath), 0o755) + if err != nil { + return fmt.Errorf("failed to create directory for upgrade-info.json: %w", err) + } + err = os.WriteFile(upgradeInfoPath, []byte(out), 0o644) + if err != nil { + return fmt.Errorf("failed to write upgrade-info.json: %w", err) + } + } + if n.shutdownOnUpgrade { + n.logger.Info("Mock node reached upgrade height, configured to shut down immediately") + return nil + } + // Don't exit until we receive a shutdown signal + n.logger.Info("Mock node reached upgrade height, waiting for shutdown signal") + <-ctx.Done() + return nil +} + +func (n *MockNode) startHTTPServer() *http.Server { + http.HandleFunc(n.blockUrl, func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + err := json.NewEncoder(w).Encode(watchers.Response{ + Result: watchers.Result{ + Block: watchers.Block{ + Header: watchers.Header{ + Height: fmt.Sprintf("%d", n.height), + }, + }, + }, + }) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + }) + srv := &http.Server{ + Addr: n.httpAddr, + } + go func() { + if err := srv.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) { + n.logger.Error("HTTP server error", "err", err) + } + }() + return srv +} diff --git a/tools/cosmovisor/go.mod b/tools/cosmovisor/go.mod index f259dbd8971f..0605669e3064 100644 --- a/tools/cosmovisor/go.mod +++ b/tools/cosmovisor/go.mod @@ -1,12 +1,12 @@ -module cosmossdk.io/tools/cosmovisor +module cosmossdk.io/tools/cosmovisor/v2 go 1.23.5 require ( cosmossdk.io/log v1.6.0 - github.com/cometbft/cometbft-db v1.0.4 - github.com/cometbft/cometbft/v2 v2.0.0-rc1 - github.com/cosmos/cosmos-sdk v0.54.0-rc.1 + github.com/cenkalti/backoff/v5 v5.0.2 + github.com/cosmos/cosmos-sdk v0.53.0 + github.com/cosmos/gogoproto v1.7.0 github.com/fsnotify/fsnotify v1.9.0 github.com/otiai10/copy v1.14.1 github.com/pelletier/go-toml/v2 v2.2.4 @@ -59,13 +59,14 @@ require ( github.com/cockroachdb/pebble v1.1.5 // indirect github.com/cockroachdb/redact v1.1.6 // indirect github.com/cockroachdb/tokenbucket v0.0.0-20250429170803-42689b6311bb // indirect + github.com/cometbft/cometbft-db v1.0.4 // indirect github.com/cometbft/cometbft/api v1.1.0-rc1 // indirect + github.com/cometbft/cometbft/v2 v2.0.0-rc1 // indirect github.com/cosmos/btcutil v1.0.5 // indirect github.com/cosmos/cosmos-db v1.1.3 // indirect github.com/cosmos/cosmos-proto v1.0.0-beta.5 // indirect github.com/cosmos/go-bip39 v1.0.0 // indirect github.com/cosmos/gogogateway v1.2.0 // indirect - github.com/cosmos/gogoproto v1.7.0 // indirect github.com/cosmos/iavl v1.2.6 // indirect github.com/cosmos/ics23/go v0.11.0 // indirect github.com/cosmos/ledger-cosmos-go v0.14.0 // indirect @@ -130,10 +131,12 @@ require ( github.com/klauspost/cpuid/v2 v2.2.10 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect + github.com/lib/pq v1.10.9 // indirect github.com/linxGnu/grocksdb v1.10.1 // indirect github.com/lmittmann/tint v1.0.7 // indirect github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/minio/highwayhash v1.0.3 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/go-testing-interface v1.14.1 // indirect github.com/mtibben/percent v0.2.1 // indirect @@ -147,7 +150,7 @@ require ( github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.64.0 // indirect + github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect @@ -181,6 +184,7 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect go.opentelemetry.io/otel/trace v1.35.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect golang.org/x/arch v0.17.0 // indirect golang.org/x/crypto v0.39.0 // indirect golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect @@ -200,5 +204,16 @@ require ( gotest.tools/v3 v3.5.2 // indirect nhooyr.io/websocket v1.8.11 // indirect pgregory.net/rapid v1.2.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/yaml v1.5.0 // indirect +) + +// Replace all unreleased direct deps upgraded to comet v1 +replace github.com/cosmos/cosmos-sdk => ../.. + +// Replace all unreleased indirect deps upgraded to comet v1 +replace ( + cosmossdk.io/api => ../../api + cosmossdk.io/core => ../../core + cosmossdk.io/store => ../../store + cosmossdk.io/x/tx => ../../x/tx ) diff --git a/tools/cosmovisor/go.sum b/tools/cosmovisor/go.sum index da97e776b56e..cb607477795b 100644 --- a/tools/cosmovisor/go.sum +++ b/tools/cosmovisor/go.sum @@ -614,12 +614,8 @@ cloud.google.com/go/workflows v1.7.0/go.mod h1:JhSrZuVZWuiDfKEFxU0/F1PQjmpnpcoIS cloud.google.com/go/workflows v1.8.0/go.mod h1:ysGhmEajwZxGn1OhGOGKsTXc5PyxOc0vfKf5Af+to4M= cloud.google.com/go/workflows v1.9.0/go.mod h1:ZGkj1aFIOd9c8Gerkjjq7OW7I5+l6cSvT3ujaO/WwSA= cloud.google.com/go/workflows v1.10.0/go.mod h1:fZ8LmRmZQWacon9UCX1r/g/DfAXx5VcPALq2CxzdePw= -cosmossdk.io/api v1.0.0-rc.1 h1:KwZHIMveoeg6YVwvKZxJLp7be5uk6qmnqNAar2tPxVU= -cosmossdk.io/api v1.0.0-rc.1/go.mod h1:8YOT+XjVFb9eZJk62YqjFILOm8MlLhbnkC9/jxIYri8= cosmossdk.io/collections v1.3.1 h1:09e+DUId2brWsNOQ4nrk+bprVmMUaDH9xvtZkeqIjVw= cosmossdk.io/collections v1.3.1/go.mod h1:ynvkP0r5ruAjbmedE+vQ07MT6OtJ0ZIDKrtJHK7Q/4c= -cosmossdk.io/core v1.1.0-rc.1 h1:VhF5xd4uJZt/lQzbl8qT1W3Pcrklp4RSnugcWQZyf5M= -cosmossdk.io/core v1.1.0-rc.1/go.mod h1:fKHIWVYfPCC4tto9eoYFZC/yAWFlWw2mz8YgSYvjnUs= cosmossdk.io/depinject v1.2.1 h1:eD6FxkIjlVaNZT+dXTQuwQTKZrFZ4UrfCq1RKgzyhMw= cosmossdk.io/depinject v1.2.1/go.mod h1:lqQEycz0H2JXqvOgVwTsjEdMI0plswI7p6KX+MVqFOM= cosmossdk.io/errors v1.0.2 h1:wcYiJz08HThbWxd/L4jObeLaLySopyyuUFB5w4AGpCo= @@ -630,10 +626,6 @@ cosmossdk.io/math v1.5.3 h1:WH6tu6Z3AUCeHbeOSHg2mt9rnoiUWVWaQ2t6Gkll96U= cosmossdk.io/math v1.5.3/go.mod h1:uqcZv7vexnhMFJF+6zh9EWdm/+Ylyln34IvPnBauPCQ= cosmossdk.io/schema v1.1.0 h1:mmpuz3dzouCoyjjcMcA/xHBEmMChN+EHh8EHxHRHhzE= cosmossdk.io/schema v1.1.0/go.mod h1:Gb7pqO+tpR+jLW5qDcNOSv0KtppYs7881kfzakguhhI= -cosmossdk.io/store v1.10.0-rc.2 h1:7ze2UoheVTVMK4ElHtoRhYv8nlUImj34e4yp1yy1bgE= -cosmossdk.io/store v1.10.0-rc.2/go.mod h1:3p1IV4EGsULFfeyAcfj7/DBcDsy8d3VlYIEJnhhbP3U= -cosmossdk.io/x/tx v1.2.0-rc.1 h1:AartiA6eiTD9KHmnlj3uG3H8FjyjI0qNkmvmU+p6cJ8= -cosmossdk.io/x/tx v1.2.0-rc.1/go.mod h1:UzpMTUmQEFfz+m0E+lhzFIiEhtZCHjScU/NC652DBHI= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= @@ -643,6 +635,8 @@ github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 h1:/vQbFIOMb github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4/go.mod h1:hN7oaIRCjzsZ2dE+yG5k+rsdt3qcwykqK6HVGcKwsw4= github.com/99designs/keyring v1.2.2 h1:pZd3neh/EmUzWONb35LxQfvuY7kiSXAq3HQd97+XBn0= github.com/99designs/keyring v1.2.2/go.mod h1:wes/FrByc8j7lFOAGLGSNEg8f/PaI3cgTBqhFkHUrPk= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= @@ -662,10 +656,14 @@ github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0 github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= +github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= +github.com/adlio/schema v1.3.6 h1:k1/zc2jNfeiZBA5aFTRy37jlBIuCkXCm0XmvpzCKI9I= +github.com/adlio/schema v1.3.6/go.mod h1:qkxwLgPBd1FgLRHYVCmQT/rrBr3JH38J9LjmVzWNudg= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk= @@ -720,10 +718,13 @@ github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4 github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY= github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= +github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8= +github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= @@ -760,6 +761,8 @@ github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f h1:C5bqEmzEPLsHm9Mv73lSE9e9bKV23aB1vxOsmZrkl3k= github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/cockroachdb/apd/v2 v2.0.2 h1:weh8u7Cneje73dDh+2tEVLUvyBc89iwepWCD8b8034E= +github.com/cockroachdb/apd/v2 v2.0.2/go.mod h1:DDxRlzC2lo3/vSlmSoS7JkqbbrARPuFOGr0B9pvN3Gw= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= @@ -782,6 +785,8 @@ github.com/cometbft/cometbft/api v1.1.0-rc1 h1:NdlXfp4wialMwJ+1ds1DBtfysdxErUxg8 github.com/cometbft/cometbft/api v1.1.0-rc1/go.mod h1:Ivh6nSCTJPQOyfQo8dgnyu/T88it092sEqSrZSmTQN8= github.com/cometbft/cometbft/v2 v2.0.0-rc1 h1:3QyDHTFzH3a1N6c2jt03kFDCxM/hgUvhzDYBVnPVXY8= github.com/cometbft/cometbft/v2 v2.0.0-rc1/go.mod h1:/ze08eO171CqUqTqAE7FW7ydUJIVkgp6e2svpYvIR3c= +github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg= +github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= @@ -792,8 +797,6 @@ github.com/cosmos/cosmos-db v1.1.3 h1:7QNT77+vkefostcKkhrzDK9uoIEryzFrU9eoMeaQOP github.com/cosmos/cosmos-db v1.1.3/go.mod h1:kN+wGsnwUJZYn8Sy5Q2O0vCYA99MJllkKASbs6Unb9U= github.com/cosmos/cosmos-proto v1.0.0-beta.5 h1:eNcayDLpip+zVLRLYafhzLvQlSmyab+RC5W7ZfmxJLA= github.com/cosmos/cosmos-proto v1.0.0-beta.5/go.mod h1:hQGLpiIUloJBMdQMMWb/4wRApmI9hjHH05nefC0Ojec= -github.com/cosmos/cosmos-sdk v0.54.0-rc.1 h1:hCsF4kJK/iqNDNIXF+bNP72G+Ik2K77TUluWRmsuGSU= -github.com/cosmos/cosmos-sdk v0.54.0-rc.1/go.mod h1:8jH0Q4+2IhIu3kXsGDtIvUZfmeydcvNJxYlOE80ukz4= github.com/cosmos/go-bip39 v1.0.0 h1:pcomnQdrdH22njcAatO0yWojsUnCO3y2tNoV1cb6hHY= github.com/cosmos/go-bip39 v1.0.0/go.mod h1:RNJv0H/pOIVgxw6KS7QeX2a0Uo0aKUlfhZ4xuwvCdJw= github.com/cosmos/gogogateway v1.2.0 h1:Ae/OivNhp8DqBi/sh2A8a1D0y638GpL3tkmLQAiKxTE= @@ -830,6 +833,10 @@ github.com/dgraph-io/ristretto/v2 v2.1.0/go.mod h1:uejeqfYXpUomfse0+lO+13ATz4Typ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WAFKLNi6ZS0675eEUC9y3AlwSbQu1Y= github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= +github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -1224,6 +1231,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= @@ -1321,6 +1330,12 @@ github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9 github.com/onsi/gomega v1.26.0 h1:03cDLK28U6hWvCAns6NeydX3zIm4SF3ci69ulidS32Q= github.com/onsi/gomega v1.26.0/go.mod h1:r+zV744Re+DiYCIPRlYOTxn0YkOLcAnW8k1xXdMPGhM= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= +github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= +github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= @@ -1329,6 +1344,8 @@ github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxS github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= +github.com/ory/dockertest v3.3.5+incompatible h1:iLLK6SQwIhcbrG783Dghaaa3WPzGc+4Emza6EbVUUGA= +github.com/ory/dockertest v3.3.5+incompatible/go.mod h1:1vX4m9wsvi00u5bseYwXaSnhNrne+V0E6LAcBILJdPs= github.com/otiai10/copy v1.14.1 h1:5/7E6qsUMBaH5AnQ0sSLzzTg1oTECmcCmT6lvF45Na8= github.com/otiai10/copy v1.14.1/go.mod h1:oQwrEDDOci3IM8dJF0d8+jnbfPDllW6vUjNc3DoZm9I= github.com/otiai10/mint v1.6.3 h1:87qsV/aw1F5as1eH1zS/yqHY85ANKVMgkDrf9rcxbQs= @@ -1390,8 +1407,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.15.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= -github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4= -github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= +github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= @@ -1434,6 +1451,8 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= @@ -1574,6 +1593,10 @@ go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9E go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE= +go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI= golang.org/x/arch v0.17.0 h1:4O3dfLzd+lQewptAHqjewQZQDyEdejz3VwgeYwkZneU= golang.org/x/arch v0.17.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -1893,6 +1916,7 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= @@ -2400,6 +2424,6 @@ rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/yaml v1.5.0 h1:M10b2U7aEUY6hRtU870n2VTPgR5RZiL/I6Lcc2F4NUQ= +sigs.k8s.io/yaml v1.5.0/go.mod h1:wZs27Rbxoai4C0f8/9urLZtZtF3avA3gKvGyPdDqTO4= sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= diff --git a/tools/cosmovisor/internal/backoff.go b/tools/cosmovisor/internal/backoff.go new file mode 100644 index 000000000000..8490023b9f34 --- /dev/null +++ b/tools/cosmovisor/internal/backoff.go @@ -0,0 +1,64 @@ +package internal + +import ( + "fmt" + "time" + + "github.com/cenkalti/backoff/v5" + + "cosmossdk.io/log" +) + +type RetryBackoffManager struct { + lastCmd string + lastArgs []string + backoff backoff.BackOff + retryCount int + maxRestarts int + logger log.Logger +} + +// NewRetryBackoffManager creates a new RetryBackoffManager instance. +func NewRetryBackoffManager(logger log.Logger, maxRestarts int) *RetryBackoffManager { + backoffAlg := backoff.NewExponentialBackOff() + return &RetryBackoffManager{ + backoff: backoffAlg, + maxRestarts: maxRestarts, + logger: logger, + } +} + +func (r *RetryBackoffManager) BeforeRun(cmd string, args []string) error { + reset := false + // we reset the backoff if the command or its arguments have changed + if r.lastCmd != cmd || len(r.lastArgs) != len(args) { + reset = true + } else { + n := min(len(r.lastArgs), len(args)) + for i := 0; i < n; i++ { + if r.lastArgs[i] != args[i] { + reset = true + break + } + } + } + if reset { + // if the command or arguments have changed, we reset the backoff and store the new command and arguments + r.backoff.Reset() + r.retryCount = 0 + r.lastCmd = cmd + r.lastArgs = args + } else { + r.retryCount++ + if r.maxRestarts > 0 && r.retryCount >= r.maxRestarts { + return backoff.Permanent(fmt.Errorf("maximum number of restarts reached: %d", r.maxRestarts)) + } + // if the command and arguments are the same, we wait for the next backoff interval + duration := r.backoff.NextBackOff() + r.logger.Info("Applying backoff before restarting command", + "backoff_duration", duration.String()) + time.Sleep(duration) + r.logger.Info("Backoff time elapsed, restarting ") + } + return nil +} diff --git a/tools/cosmovisor/internal/process.go b/tools/cosmovisor/internal/process.go new file mode 100644 index 000000000000..687dfbe88561 --- /dev/null +++ b/tools/cosmovisor/internal/process.go @@ -0,0 +1,68 @@ +package internal + +import ( + "os/exec" + "syscall" + "time" +) + +type ProcessRunner struct { + cmd *exec.Cmd + done chan error // closed exactly once, after Wait returns +} + +func RunProcess(cmd *exec.Cmd) (*ProcessRunner, error) { + // start the process before returning a ProcessRunner + if err := cmd.Start(); err != nil { + return nil, err + } + + done := make(chan error, 1) + go func() { + // wait on the process to complete in a separate go routine + done <- cmd.Wait() + close(done) + }() + return &ProcessRunner{cmd: cmd, done: done}, nil +} + +// Done returns the error that the process returned when it exited. +func (pr *ProcessRunner) Done() <-chan error { + return pr.done +} + +// Shutdown attempts to gracefully shut down the process by sending a SIGTERM signal. +// If the process does not exit within the specified grace period, it will be forcefully killed. +// An error will only be returned if there was an error shutting down the process. +// To get the error that the process itself returned, use Done(). +func (pr *ProcessRunner) Shutdown(grace time.Duration) error { + // check if already finished + select { + case <-pr.done: + // already finished, nothing to do + return nil + default: + // not finished yet, proceed with shutdown + } + + proc := pr.cmd.Process + if proc == nil { + // this should only be true if the process has already exited + <-pr.done // make sure Wait() has returned + return nil + } + + // signal shutdown + if err := proc.Signal(syscall.SIGTERM); err != nil { + return err + } + + // wait for graceful exit or force-kill after timeout + select { + case <-pr.done: + case <-time.After(grace): + _ = proc.Kill() + <-pr.done + } + return nil +} diff --git a/tools/cosmovisor/process_test.go b/tools/cosmovisor/internal/process_test.go similarity index 53% rename from tools/cosmovisor/process_test.go rename to tools/cosmovisor/internal/process_test.go index 7d91c90e17b6..a03d771bef58 100644 --- a/tools/cosmovisor/process_test.go +++ b/tools/cosmovisor/internal/process_test.go @@ -1,9 +1,10 @@ //go:build linux || darwin -package cosmovisor_test +package internal import ( "bytes" + "context" "fmt" "io/fs" "os" @@ -15,7 +16,8 @@ import ( "github.com/stretchr/testify/require" "cosmossdk.io/log" - "cosmossdk.io/tools/cosmovisor" + + "cosmossdk.io/tools/cosmovisor/v2" upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" ) @@ -23,68 +25,66 @@ import ( var workDir string func init() { - workDir, _ = os.Getwd() + dir, err := os.Getwd() + if err != nil { + panic(err) + } + workDir = filepath.Join(dir, "..") +} + +type launchProcessFixture struct { + cfg *cosmovisor.Config + stdin *os.File + stdout *buffer + stderr *buffer + logger log.Logger + runner *Runner } // TestLaunchProcess will try running the script a few times and watch upgrades work properly // and args are passed through func TestLaunchProcess(t *testing.T) { - // binaries from testdata/validate directory - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/validate"), - cosmovisor.Config{ - Name: "dummyd", - PollInterval: 15, - UnsafeSkipBackup: true, - }, - ) - - logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmosvisor") + f := setupTestLaunchProcessFixture(t, "validate", cosmovisor.Config{ + Name: "dummyd", + PollInterval: 15, + UnsafeSkipBackup: true, + MaxRestartRetries: 1, + }) - // should run the genesis binary and produce expected output - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) require.NoError(t, err) require.Equal(t, rPath, currentBin) - launcher, err := cosmovisor.NewLauncher(logger, cfg) - require.NoError(t, err) - - upgradeFile := cfg.UpgradeInfoFilePath() + upgradeFile := f.cfg.UpgradeInfoFilePath() args := []string{"foo", "bar", "1234", upgradeFile} - doUpgrade, err := launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.True(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, fmt.Sprintf("Genesis foo bar 1234 %s\nUPGRADE \"chain2\" NEEDED at height: 49: {}\n", upgradeFile), stdout.String()) + err = f.runner.Start(context.Background(), args) + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, fmt.Sprintf("Genesis foo bar 1234 %s\nUPGRADE \"chain2\" NEEDED at height: 49: {}\n", upgradeFile), f.stdout.String()) // ensure this is upgraded now and produces new output - currentBin, err = cfg.CurrentBin() + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain2")) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain2")) require.NoError(t, err) - require.Equal(t, rPath, currentBin) args = []string{"second", "run", "--verbose"} - stdout.Reset() - stderr.Reset() + f.stdout.Reset() + f.stderr.Reset() - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.False(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 2 is live!\nArgs: second run --verbose\nFinished successfully\n", stdout.String()) + err = f.runner.Start(context.Background(), args) + require.ErrorContains(t, err, "maximum number of restarts reached") + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 2 is live!\nArgs: second run --verbose\nFinished successfully\n", f.stdout.String()) // ended without other upgrade - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) @@ -92,164 +92,124 @@ func TestLaunchProcess(t *testing.T) { // TestPlanDisableRecase will test upgrades without lower case plan names func TestPlanDisableRecase(t *testing.T) { - // binaries from testdata/validate directory - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/norecase"), - cosmovisor.Config{ - Name: "dummyd", - PollInterval: 20, - UnsafeSkipBackup: true, - DisableRecase: true, - }, - ) + f := setupTestLaunchProcessFixture(t, "norecase", cosmovisor.Config{ + Name: "dummyd", + PollInterval: 20, + UnsafeSkipBackup: true, + DisableRecase: true, + MaxRestartRetries: 1, + }) - logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmosvisor") - - // should run the genesis binary and produce expected output - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) require.NoError(t, err) require.Equal(t, rPath, currentBin) - launcher, err := cosmovisor.NewLauncher(logger, cfg) - require.NoError(t, err) - - upgradeFile := cfg.UpgradeInfoFilePath() + upgradeFile := f.cfg.UpgradeInfoFilePath() args := []string{"foo", "bar", "1234", upgradeFile} - doUpgrade, err := launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.True(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, fmt.Sprintf("Genesis foo bar 1234 %s\nUPGRADE \"Chain2\" NEEDED at height: 49: {}\n", upgradeFile), stdout.String()) + err = f.runner.Start(context.Background(), args) + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, fmt.Sprintf("Genesis foo bar 1234 %s\nUPGRADE \"Chain2\" NEEDED at height: 49: {}\n", upgradeFile), f.stdout.String()) // ensure this is upgraded now and produces new output - currentBin, err = cfg.CurrentBin() + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("Chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("Chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) args = []string{"second", "run", "--verbose"} - stdout.Reset() - stderr.Reset() + f.stdout.Reset() + f.stderr.Reset() - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.False(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 2 is live!\nArgs: second run --verbose\nFinished successfully\n", stdout.String()) + err = f.runner.Start(context.Background(), args) + require.ErrorContains(t, err, "maximum number of restarts reached") + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 2 is live!\nArgs: second run --verbose\nFinished successfully\n", f.stdout.String()) // ended without other upgrade - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("Chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("Chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) } func TestLaunchProcessWithRestartDelay(t *testing.T) { - // binaries from testdata/validate directory - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/validate"), - cosmovisor.Config{ - Name: "dummyd", - RestartDelay: 5 * time.Second, - PollInterval: 20, - UnsafeSkipBackup: true, - }, - ) - - logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmosvisor") + f := setupTestLaunchProcessFixture(t, "validate", cosmovisor.Config{ + Name: "dummyd", + RestartDelay: 5 * time.Second, + PollInterval: 20, + UnsafeSkipBackup: true, + }) // should run the genesis binary and produce expected output - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) require.NoError(t, err) require.Equal(t, rPath, currentBin) - launcher, err := cosmovisor.NewLauncher(logger, cfg) - require.NoError(t, err) - - upgradeFile := cfg.UpgradeInfoFilePath() + upgradeFile := f.cfg.UpgradeInfoFilePath() start := time.Now() - doUpgrade, err := launcher.Run([]string{"foo", "bar", "1234", upgradeFile}, stdin, stdout, stderr) - require.NoError(t, err) - require.True(t, doUpgrade) + err = f.runner.Start(context.Background(), []string{"foo", "bar", "1234", upgradeFile}) + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) // may not be the best way but the fastest way to check we meet the delay // in addition to comparing both the runtime of this test and TestLaunchProcess in addition - if time.Since(start) < cfg.RestartDelay { + if time.Since(start) < f.cfg.RestartDelay { require.FailNow(t, "restart delay not met") } } // TestPlanShutdownGrace will test upgrades without lower case plan names func TestPlanShutdownGrace(t *testing.T) { - // binaries from testdata/validate directory - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/dontdie"), - cosmovisor.Config{ - Name: "dummyd", - PollInterval: 15, - UnsafeSkipBackup: true, - ShutdownGrace: 2 * time.Second, - }, - ) - - logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmosvisor") + f := setupTestLaunchProcessFixture(t, "dontdie", cosmovisor.Config{ + Name: "dummyd", + PollInterval: 15, + UnsafeSkipBackup: true, + ShutdownGrace: 2 * time.Second, + MaxRestartRetries: 1, + }) // should run the genesis binary and produce expected output - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) require.NoError(t, err) require.Equal(t, rPath, currentBin) - launcher, err := cosmovisor.NewLauncher(logger, cfg) - require.NoError(t, err) - - upgradeFile := cfg.UpgradeInfoFilePath() + upgradeFile := f.cfg.UpgradeInfoFilePath() args := []string{"foo", "bar", "1234", upgradeFile} - doUpgrade, err := launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.True(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, fmt.Sprintf("Genesis foo bar 1234 %s\nUPGRADE \"Chain2\" NEEDED at height: 49: {}\nWARN Need Flush\nFlushed\n", upgradeFile), stdout.String()) + err = f.runner.Start(context.Background(), args) + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, fmt.Sprintf("Genesis foo bar 1234 %s\nUPGRADE \"Chain2\" NEEDED at height: 49: {}\nWARN Need Flush\nFlushed\n", upgradeFile), f.stdout.String()) // ensure this is upgraded now and produces new output - currentBin, err = cfg.CurrentBin() + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) args = []string{"second", "run", "--verbose"} - stdout.Reset() - stderr.Reset() + f.stdout.Reset() + f.stderr.Reset() - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.False(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 2 is live!\nArgs: second run --verbose\nFinished successfully\n", stdout.String()) + err = f.runner.Start(context.Background(), args) + require.ErrorContains(t, err, "maximum number of restarts reached") + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 2 is live!\nArgs: second run --verbose\nFinished successfully\n", f.stdout.String()) // ended without other upgrade - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) } @@ -257,80 +217,68 @@ func TestPlanShutdownGrace(t *testing.T) { // TestLaunchProcess will try running the script a few times and watch upgrades work properly // and args are passed through func TestLaunchProcessWithDownloads(t *testing.T) { + f := setupTestLaunchProcessFixture(t, "download", cosmovisor.Config{ + Name: "autod", + AllowDownloadBinaries: true, + PollInterval: 100, + UnsafeSkipBackup: true, + MaxRestartRetries: 1, + }) + // test case upgrade path (binaries from testdata/download directory): // genesis -> chain2-zip_bin // chain2-zip_bin -> ref_to_chain3-zip_dir.json = (json for the next download instructions) -> chain3-zip_dir // chain3-zip_dir - doesn't upgrade - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/download"), - cosmovisor.Config{ - Name: "autod", - AllowDownloadBinaries: true, - PollInterval: 100, - UnsafeSkipBackup: true, - }, - ) - - logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmovisor") - upgradeFilename := cfg.UpgradeInfoFilePath() + upgradeFilename := f.cfg.UpgradeInfoFilePath() // should run the genesis binary and produce expected output - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) - require.NoError(t, err) - require.Equal(t, rPath, currentBin) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) - launcher, err := cosmovisor.NewLauncher(logger, cfg) require.NoError(t, err) + require.Equal(t, rPath, currentBin) - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() args := []string{"some", "args", upgradeFilename} - doUpgrade, err := launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.True(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Genesis autod. Args: some args "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain2" NEEDED at height: 49: zip_binary`+"\n", stdout.String()) - currentBin, err = cfg.CurrentBin() + err = f.runner.Start(context.Background(), args) + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, "Genesis autod. Args: some args "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain2" NEEDED at height: 49: zip_binary`+"\n", f.stdout.String()) + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) // start chain2 - stdout.Reset() - stderr.Reset() + f.stdout.Reset() + f.stderr.Reset() args = []string{"run", "--fast", upgradeFilename} - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 2 from zipped binary\nArgs: run --fast "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain3" NEEDED at height: 936: ref_to_chain3-zip_dir.json module=main`+"\n", stdout.String()) + err = f.runner.Start(context.Background(), args) // ended with one more upgrade - require.True(t, doUpgrade) - currentBin, err = cfg.CurrentBin() + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 2 from zipped binary\nArgs: run --fast "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain3" NEEDED at height: 936: ref_to_chain3-zip_dir.json module=main`+"\n", f.stdout.String()) + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain3")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain3")) require.NoError(t, err) require.Equal(t, rPath, currentBin) // run the last chain args = []string{"end", "--halt", upgradeFilename} - stdout.Reset() - stderr.Reset() - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.False(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 3 from zipped directory\nArgs: end --halt "+upgradeFilename+"\n", stdout.String()) + f.stdout.Reset() + f.stderr.Reset() + err = f.runner.Start(context.Background(), args) + require.ErrorContains(t, err, "maximum number of restarts reached") + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 3 from zipped directory\nArgs: end --halt "+upgradeFilename+"\n", f.stdout.String()) // and this doesn't upgrade - currentBin, err = cfg.CurrentBin() + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain3")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain3")) require.NoError(t, err) require.Equal(t, rPath, currentBin) } @@ -342,36 +290,28 @@ func TestLaunchProcessWithDownloadsAndMissingPreupgrade(t *testing.T) { // genesis -> chain2-zip_bin // chain2-zip_bin -> ref_to_chain3-zip_dir.json = (json for the next download instructions) -> chain3-zip_dir // chain3-zip_dir - doesn't upgrade - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/download"), - cosmovisor.Config{ - Name: "autod", - AllowDownloadBinaries: true, - PollInterval: 100, - UnsafeSkipBackup: true, - CustomPreUpgrade: "missing.sh", - }, - ) + f := setupTestLaunchProcessFixture(t, "download", cosmovisor.Config{ + Name: "autod", + AllowDownloadBinaries: true, + PollInterval: 100, + UnsafeSkipBackup: true, + CustomPreUpgrade: "missing.sh", + }) - logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmovisor") - upgradeFilename := cfg.UpgradeInfoFilePath() + upgradeFilename := f.cfg.UpgradeInfoFilePath() // should run the genesis binary and produce expected output - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) require.NoError(t, err) require.Equal(t, rPath, currentBin) - launcher, err := cosmovisor.NewLauncher(logger, cfg) require.NoError(t, err) // Missing Preupgrade Script - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() args := []string{"some", "args", upgradeFilename} - _, err = launcher.Run(args, stdin, stdout, stderr) + err = f.runner.Start(context.Background(), args) require.ErrorContains(t, err, "missing.sh") require.ErrorIs(t, err, fs.ErrNotExist) @@ -384,84 +324,71 @@ func TestLaunchProcessWithDownloadsAndPreupgrade(t *testing.T) { // genesis -> chain2-zip_bin // chain2-zip_bin -> ref_to_chain3-zip_dir.json = (json for the next download instructions) -> chain3-zip_dir // chain3-zip_dir - doesn't upgrade - cfg := prepareConfig( - t, - fmt.Sprintf("%s/%s", workDir, "testdata/download"), - cosmovisor.Config{ - Name: "autod", - AllowDownloadBinaries: true, - PollInterval: 100, - UnsafeSkipBackup: true, - CustomPreUpgrade: "preupgrade.sh", - }, - ) + f := setupTestLaunchProcessFixture(t, "download", cosmovisor.Config{ + Name: "autod", + AllowDownloadBinaries: true, + PollInterval: 100, + UnsafeSkipBackup: true, + CustomPreUpgrade: "preupgrade.sh", + MaxRestartRetries: 1, + }) - buf := newBuffer() // inspect output using buf.String() - logger := log.NewLogger(buf).With(log.ModuleKey, "cosmovisor") - upgradeFilename := cfg.UpgradeInfoFilePath() + upgradeFilename := f.cfg.UpgradeInfoFilePath() // should run the genesis binary and produce expected output - currentBin, err := cfg.CurrentBin() + currentBin, err := f.cfg.CurrentBin() require.NoError(t, err) - rPath, err := filepath.EvalSymlinks(cfg.GenesisBin()) + rPath, err := filepath.EvalSymlinks(f.cfg.GenesisBin()) require.NoError(t, err) require.Equal(t, rPath, currentBin) - launcher, err := cosmovisor.NewLauncher(logger, cfg) - require.NoError(t, err) - stdin, _ := os.Open(os.DevNull) - stdout, stderr := newBuffer(), newBuffer() args := []string{"some", "args", upgradeFilename} - doUpgrade, err := launcher.Run(args, stdin, stdout, stderr) + err = f.runner.Start(context.Background(), args) - require.NoError(t, err) - require.True(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Genesis autod. Args: some args "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain2" NEEDED at height: 49: zip_binary`+"\n", stdout.String()) - currentBin, err = cfg.CurrentBin() + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, "Genesis autod. Args: some args "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain2" NEEDED at height: 49: zip_binary`+"\n", f.stdout.String()) + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain2")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain2")) require.NoError(t, err) require.Equal(t, rPath, currentBin) // should have preupgrade.sh results - require.FileExists(t, filepath.Join(cfg.Home, "upgrade_name_chain2_height_49")) + require.FileExists(t, filepath.Join(f.cfg.Home, "upgrade_name_chain2_height_49")) // start chain2 - stdout.Reset() - stderr.Reset() + f.stdout.Reset() + f.stderr.Reset() args = []string{"run", "--fast", upgradeFilename} - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 2 from zipped binary\nArgs: run --fast "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain3" NEEDED at height: 936: ref_to_chain3-zip_dir.json module=main`+"\n", stdout.String()) + err = f.runner.Start(context.Background(), args) // ended with one more upgrade - require.True(t, doUpgrade) - currentBin, err = cfg.CurrentBin() + require.ErrorIs(t, err, ErrUpgradeNoDaemonRestart) + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 2 from zipped binary\nArgs: run --fast "+upgradeFilename+"\n"+`ERROR: UPGRADE "chain3" NEEDED at height: 936: ref_to_chain3-zip_dir.json module=main`+"\n", f.stdout.String()) + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain3")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain3")) require.NoError(t, err) require.Equal(t, rPath, currentBin) // should have preupgrade.sh results - require.FileExists(t, filepath.Join(cfg.Home, "upgrade_name_chain3_height_936")) + require.FileExists(t, filepath.Join(f.cfg.Home, "upgrade_name_chain3_height_936")) // run the last chain args = []string{"end", "--halt", upgradeFilename} - stdout.Reset() - stderr.Reset() - doUpgrade, err = launcher.Run(args, stdin, stdout, stderr) - require.NoError(t, err) - require.False(t, doUpgrade) - require.Empty(t, stderr.String()) - require.Equal(t, "Chain 3 from zipped directory\nArgs: end --halt "+upgradeFilename+"\n", stdout.String()) + f.stdout.Reset() + f.stderr.Reset() + err = f.runner.Start(context.Background(), args) + require.ErrorContains(t, err, "maximum number of restarts reached") + require.Empty(t, f.stderr.String()) + require.Equal(t, "Chain 3 from zipped directory\nArgs: end --halt "+upgradeFilename+"\n", f.stdout.String()) // and this doesn't upgrade - currentBin, err = cfg.CurrentBin() + currentBin, err = f.cfg.CurrentBin() require.NoError(t, err) - rPath, err = filepath.EvalSymlinks(cfg.UpgradeBin("chain3")) + rPath, err = filepath.EvalSymlinks(f.cfg.UpgradeBin("chain3")) require.NoError(t, err) require.Equal(t, rPath, currentBin) } @@ -497,7 +424,7 @@ func TestSkipUpgrade(t *testing.T) { for i := range cases { tc := cases[i] require := require.New(t) - h := cosmovisor.IsSkipUpgradeHeight(tc.args, tc.upgradeInfo) + h := IsSkipUpgradeHeight(tc.args, tc.upgradeInfo) require.Equal(h, tc.expectRes) } } @@ -536,7 +463,7 @@ func TestUpgradeSkipHeights(t *testing.T) { for i := range cases { tc := cases[i] require := require.New(t) - h := cosmovisor.UpgradeSkipHeights(tc.args) + h := UpgradeSkipHeights(tc.args) require.Equal(h, tc.expectRes) } } @@ -547,6 +474,35 @@ type buffer struct { m sync.Mutex } +func setupTestLaunchProcessFixture(t *testing.T, testdataDir string, cfg cosmovisor.Config) *launchProcessFixture { + t.Helper() + // binaries from testdata/validate directory + preppedCfg := prepareConfig( + t, + fmt.Sprintf("%s/testdata/%s", workDir, testdataDir), + cfg, + ) + + logger := log.NewTestLogger(t).With(log.ModuleKey, "cosmosvisor") + + // should run the genesis binary and produce expected output + stdin, _ := os.Open(os.DevNull) + stdout, stderr := newBuffer(), newBuffer() + + return &launchProcessFixture{ + cfg: preppedCfg, + stdin: stdin, + stdout: stdout, + stderr: stderr, + logger: logger, + runner: NewRunner(preppedCfg, RunConfig{ + StdIn: stdin, + StdOut: stdout, + StdErr: stderr, + }, logger), + } +} + func newBuffer() *buffer { return &buffer{} } diff --git a/tools/cosmovisor/internal/runner.go b/tools/cosmovisor/internal/runner.go new file mode 100644 index 000000000000..8f6305652112 --- /dev/null +++ b/tools/cosmovisor/internal/runner.go @@ -0,0 +1,266 @@ +package internal + +import ( + "context" + "errors" + "fmt" + "io" + "os/exec" + + "cosmossdk.io/log" + + "cosmossdk.io/tools/cosmovisor/v2" + "cosmossdk.io/tools/cosmovisor/v2/internal/watchers" + + "github.com/cosmos/cosmos-sdk/x/upgrade/plan" + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" +) + +type Runner struct { + runCfg RunConfig + cfg *cosmovisor.Config + logger log.Logger + lastSeenHeight uint64 +} + +// NewRunner creates a new Runner instance with the provided configuration and logger. +func NewRunner(cfg *cosmovisor.Config, runCfg RunConfig, logger log.Logger) *Runner { + return &Runner{ + runCfg: runCfg, + cfg: cfg, + logger: logger, + } +} + +func (r *Runner) Start(ctx context.Context, args []string) error { + retryMgr := NewRetryBackoffManager(r.logger, r.cfg.MaxRestartRetries) + for { + // First we check if we need to upgrade and if we do we perform the upgrade + upgraded, err := UpgradeIfNeeded(r.cfg, r.logger, r.lastSeenHeight) + if err != nil { + return err + } + // If we upgraded, we need to restart the process, but some configurations do not allow automatic restarts + if upgraded { + r.logger.Info("Upgrade completed, restarting process") + if !r.cfg.RestartAfterUpgrade { + r.logger.Info("DAEMON_RESTART_AFTER_UPGRADE is disabled, exiting process") + return ErrUpgradeNoDaemonRestart + } + } + // Now we compute the command to run and figure out the halt height if needed + cmd, haltHeight, err := r.ComputeRunPlan(args) + if err != nil { + return err + } + + // Usually restarts should be due to either: + // 1. an upgrade that requires a restart + // 2. a change in the halt height due to a new manual upgrade plan + // There are also cases where an app could just shut down due to some error. + // If we're in that sort of situation, we want to retry running the command, but + // we apply a backoff strategy to avoid hammering the process in case of repeated failures. + // We pass the current command and args to the retry manager so it can check whether + // the command or its arguments have changed (e.g. if the binary was updated or the halt height changed), + // or if we're just in some sort of error restart loop. + if err := retryMgr.BeforeRun(cmd.Path, cmd.Args); err != nil { + return err + } + + // In order to make in process testing feasible, we allow a test callback to be set + // and we call it here right before running the process. + // Without this it would be much harder to test the cosmovisor runner in a controlled but realistic scenario. + if testCallback := GetTestCallback(ctx); testCallback != nil { + testCallback() + } + + // Now we actually run the process + err = r.RunProcess(ctx, cmd, haltHeight) + // There are three types of cases we're checking for here: + // 1. errRestartNeeded: this is a custom error that is returned whenever the run loop detects that a restart is needed. + // 2. errDone: this is a sentinel error that indicates that the cosmovisor process itself should be stopped gracefully. + // 3. Any other error or the: this is an unexpected error that should trigger a restart of the process with a backoff strategy. + if ok := errors.Is(err, errRestartNeeded); ok { + r.logger.Info("Process shutdown complete, restart needed") + } else if errors.Is(err, errDone) { + r.logger.Info("Shutting down Cosmovisor process gracefully") + return nil + } else if err != nil { + r.logger.Error("Process exited with error, attempting to restart", "error", err) + } else { + r.logger.Info("Process exited without error, restarting") + } + } +} + +var errDone = errors.New("done") + +var ErrUpgradeNoDaemonRestart = errors.New("upgrade completed, but DAEMON_RESTART_AFTER_UPGRADE is disabled") + +// ComputeRunPlan computes the command to run based on the current configuration and arguments +// as well as determining the halt height if a manual upgrade is present. +// This is called to determine run arguments first and allows us to observe whether +// run arguments have changed or if the process is in a restart loop because of some error, +// which is important for the retry backoff manager. +func (r *Runner) ComputeRunPlan(args []string) (cmd *exec.Cmd, haltHeight uint64, err error) { + bin, err := r.cfg.CurrentBin() + if err != nil { + return nil, 0, fmt.Errorf("error creating symlink to genesis: %w", err) + } + + if err := plan.EnsureBinary(bin); err != nil { + return nil, 0, fmt.Errorf("current binary is invalid: %w", err) + } + + cmd = exec.Command(bin, args...) + cmd.Stdin = r.runCfg.StdIn + cmd.Stdout = r.runCfg.StdOut + cmd.Stderr = r.runCfg.StdErr + r.logger.Info("Checking for upgrade-info.json.batch") + manualUpgradeBatch, err := r.cfg.ReadManualUpgrades() + if err != nil { + return nil, 0, err + } + manualUpgrade := manualUpgradeBatch.FirstUpgrade() + if manualUpgrade != nil { + haltHeight = uint64(manualUpgrade.Height) + r.logger.Info("Setting --halt-height flag for manual upgrade", "halt_height", haltHeight) + cmd.Args = append(cmd.Args, fmt.Sprintf("--halt-height=%d", haltHeight)) + } + return +} + +// RunProcess runs the given command until either a upgrade is detected or the process exits. +func (r *Runner) RunProcess(ctx context.Context, cmd *exec.Cmd, haltHeight uint64) error { + currentBinaryUpgradeName := r.cfg.CurrentBinaryUpgradeName() + // start the fsnotify watcher to watch for changes in the upgrade info directory + dirWatcher, err := watchers.NewFSNotifyWatcher(ctx, r.logger, r.cfg.UpgradeInfoDir(), []string{ + r.cfg.UpgradeInfoFilePath(), + r.cfg.UpgradeInfoBatchFilePath(), + }) + if err != nil { + // if fsnotify is not available, we fall back to polling so we don't return an error here + r.logger.Warn("failed to initialize fsnotify, it's probably not available on this platform, using polling only", "error", err) + } + + // keep the original context for cancellation detection + parentCtx := ctx + // create child context for controlling watchers + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // start watchers for upgrade plans, manual upgrades and height updates + eh := watchers.DebugLoggerErrorHandler(r.logger) + upgradePlanWatcher := watchers.InitFileWatcher[*upgradetypes.Plan](ctx, eh, r.cfg.PollInterval, dirWatcher, r.cfg.UpgradeInfoFilePath(), r.cfg.ParseUpgradeInfo) + manualUpgradesWatcher := watchers.InitFileWatcher[cosmovisor.ManualUpgradeBatch](ctx, eh, r.cfg.PollInterval, dirWatcher, r.cfg.UpgradeInfoBatchFilePath(), r.cfg.ParseManualUpgrades) + heightChecker := watchers.NewHTTPRPCBLockChecker(r.cfg.RPCAddress, r.logger) + heightWatcher := watchers.NewHeightWatcher(eh, heightChecker, r.cfg.PollInterval, func(height uint64) error { + r.lastSeenHeight = height + return r.cfg.WriteLastKnownHeight(height) + }) + + if haltHeight > 0 { + // only watch for height updates if we have a halt height set + r.logger.Info("Starting height watcher", "halt_height", haltHeight) + heightWatcher.Start(ctx) + } + + r.logger.Info("Starting process", "path", cmd.Path, "args", cmd.Args) + processRunner, err := RunProcess(cmd) + if err != nil { + return fmt.Errorf("failed to start process: %w", err) + } + defer func() { + // always check for the latest block height before shutting down so that we have it in the last known height file + _, _ = heightChecker.GetLatestBlockHeight() + _ = processRunner.Shutdown(r.cfg.ShutdownGrace) + }() + + correctHeightConfirmed := false + for { + select { + // listen to the parent context's cancellation + case <-parentCtx.Done(): + r.logger.Info("Parent context canceled, shutting down") + return errDone + case upgradePlan, ok := <-upgradePlanWatcher.Updated(): + // TODO check skip upgrade heights?? (although not sure why we need this as the node should not emit an upgrade plan if skip heights is enabled) + if !ok { + return nil + } + r.logger.Info("Received upgrade-info.json") + if upgradePlan.Name != currentBinaryUpgradeName { + // only restart if we have a different upgrade name than the current binary's upgrade name + return errRestartNeeded + } + case manualUpgrades, ok := <-manualUpgradesWatcher.Updated(): + if !ok { + return nil + } + r.logger.Info("Received updates to upgrade-info.json.batch") + if haltHeight == 0 && len(manualUpgrades) > 0 { + // shutdown, no halt height set + r.logger.Info("No halt height set, but manual upgrades found, restarting process") + return errRestartNeeded + } else { + // restart if we need to change the halt height based on the upgrade + firstUpgrade := manualUpgrades.FirstUpgrade() + if firstUpgrade == nil { + // if we have no longer have an upgrade then we need to remove halt height + r.logger.Info("No upgrade found, removing halt height") + return errRestartNeeded + } + if uint64(firstUpgrade.Height) < haltHeight { + // if we have an earlier halt height then we need to change the halt height + r.logger.Info("Earlier manual upgrade found, changing halt height", "current_halt_height", haltHeight, "needed_halt_height", firstUpgrade.Height) + return errRestartNeeded + } + } + case err := <-processRunner.Done(): + // we just return the error or absence of an error here, which will cause the process to restart with a backoff retry algorithm + return err + case actualHeight := <-heightWatcher.Updated(): + r.logger.Debug("Got height update from watcher", "height", actualHeight) + if haltHeight == 0 { + // we don't have a halt height, so we don't care to check anything about the actual height + continue + } + if !correctHeightConfirmed { + // read manual upgrade batch and check if we'd still be at the correct halt height + manualUpgrades, err := r.cfg.ReadManualUpgrades() + if err != nil { + r.logger.Warn("Failed to read manual upgrades", "error", err) + continue + } + firstUpgrade := manualUpgrades.FirstUpgrade() + if firstUpgrade == nil { + // no upgrade found, so we shouldn't have a halt height + r.logger.Warn("No upgrade found, but halt height is set, removing halt height. This is unexpected because we didn't receive an update to upgrade-info.json.batch") + return errRestartNeeded + } + if uint64(firstUpgrade.Height) == haltHeight { + correctHeightConfirmed = true + } else { + // we're at the wrong halt height so we need to restart + r.logger.Info("We're at a different height expected, so we need to set a different halt height", "current_halt_height", haltHeight, "needed_halt_height", firstUpgrade.Height) + return errRestartNeeded + } + } + // signal a restart if we're at or past the halt height + if actualHeight >= haltHeight { + r.logger.Info("Reached halt height, restarting process for upgrade") + return errRestartNeeded + } + } + } +} + +// RunConfig defines the configuration for running a command, +// essentially mapping its standard input, output, and error streams. +type RunConfig struct { + StdIn io.Reader + StdOut io.Writer + StdErr io.Writer +} + +var errRestartNeeded = errors.New("restart needed") diff --git a/tools/cosmovisor/internal/skip.go b/tools/cosmovisor/internal/skip.go new file mode 100644 index 000000000000..fe8cc3e203d9 --- /dev/null +++ b/tools/cosmovisor/internal/skip.go @@ -0,0 +1,51 @@ +package internal + +import ( + "fmt" + "strconv" + "strings" + + "cosmossdk.io/tools/cosmovisor/v2" + + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" +) + +// TODO do we need this functionality or should it be deleted? + +// IsSkipUpgradeHeight checks if pre-upgrade script must be run. +// If the height in the upgrade plan matches any of the heights provided in --unsafe-skip-upgrades, the script is not run. +func IsSkipUpgradeHeight(args []string, upgradeInfo upgradetypes.Plan) bool { + skipUpgradeHeights := UpgradeSkipHeights(args) + for _, h := range skipUpgradeHeights { + if h == int(upgradeInfo.Height) { + return true + } + } + return false +} + +// UpgradeSkipHeights gets all the heights provided when +// simd start --unsafe-skip-upgrades ... +func UpgradeSkipHeights(args []string) []int { + var heights []int + for i, arg := range args { + if arg == fmt.Sprintf("--%s", cosmovisor.FlagSkipUpgradeHeight) { + j := i + 1 + + for j < len(args) { + tArg := args[j] + if strings.HasPrefix(tArg, "-") { + break + } + h, err := strconv.Atoi(tArg) + if err == nil { + heights = append(heights, h) + } + j++ + } + + break + } + } + return heights +} diff --git a/tools/cosmovisor/internal/testing.go b/tools/cosmovisor/internal/testing.go new file mode 100644 index 000000000000..e671f5706b6d --- /dev/null +++ b/tools/cosmovisor/internal/testing.go @@ -0,0 +1,19 @@ +package internal + +import "context" + +type TestCallback func() + +type testCallbackKey struct{} + +func WithTestCallback(ctx context.Context, cb TestCallback) context.Context { + return context.WithValue(ctx, testCallbackKey{}, cb) +} + +func GetTestCallback(ctx context.Context) TestCallback { + cb, ok := ctx.Value(testCallbackKey{}).(TestCallback) + if !ok { + return nil + } + return cb +} diff --git a/tools/cosmovisor/upgrade.go b/tools/cosmovisor/internal/upgrade.go similarity index 83% rename from tools/cosmovisor/upgrade.go rename to tools/cosmovisor/internal/upgrade.go index c2bba046d8de..8318a421187e 100644 --- a/tools/cosmovisor/upgrade.go +++ b/tools/cosmovisor/internal/upgrade.go @@ -1,4 +1,4 @@ -package cosmovisor +package internal import ( "errors" @@ -7,18 +7,21 @@ import ( "runtime" "cosmossdk.io/log" + "cosmossdk.io/tools/cosmovisor/v2" "github.com/cosmos/cosmos-sdk/x/upgrade/plan" upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" ) -// UpgradeBinary will be called after the log message has been parsed and the process has terminated. +// UpgradeBinary will be called after an upgrade has been confirmed and the process has terminated. // We can now make any changes to the underlying directory without interference and leave it -// in a state, so we can make a proper restart -func UpgradeBinary(logger log.Logger, cfg *Config, p upgradetypes.Plan) error { +// in the upgraded state so that the app can restart with the new binary. +func UpgradeBinary(logger log.Logger, cfg *cosmovisor.Config, p upgradetypes.Plan) error { + logger.Info("Upgrading binary", "name", p.Name) // simplest case is to switch the link err := plan.EnsureBinary(cfg.UpgradeBin(p.Name)) if err == nil { + logger.Info("Upgrade binary already present, setting as current", "name", p.Name) // we have the binary - do it return cfg.SetCurrentUpgrade(p) } diff --git a/tools/cosmovisor/upgrade_test.go b/tools/cosmovisor/internal/upgrade_test.go similarity index 95% rename from tools/cosmovisor/upgrade_test.go rename to tools/cosmovisor/internal/upgrade_test.go index 8a3953c46fe0..c9f2404ba744 100644 --- a/tools/cosmovisor/upgrade_test.go +++ b/tools/cosmovisor/internal/upgrade_test.go @@ -1,6 +1,6 @@ //go:build darwin || linux -package cosmovisor_test +package internal import ( "fmt" @@ -15,7 +15,7 @@ import ( "github.com/stretchr/testify/suite" "cosmossdk.io/log" - "cosmossdk.io/tools/cosmovisor" + "cosmossdk.io/tools/cosmovisor/v2" upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" ) @@ -141,7 +141,7 @@ func (s *upgradeTestSuite) TestUpgradeBinaryNoDownloadUrl() { // do upgrade ignores bad files for _, name := range []string{"missing", "nobin"} { info := upgradetypes.Plan{Name: name} - err = cosmovisor.UpgradeBinary(logger, cfg, info) + err = UpgradeBinary(logger, cfg, info) s.Require().Error(err, name) currentBin, err := cfg.CurrentBin() s.Require().NoError(err) @@ -156,7 +156,7 @@ func (s *upgradeTestSuite) TestUpgradeBinaryNoDownloadUrl() { for _, upgrade := range []string{"chain2", "chain3"} { // now set it to a valid upgrade and make sure CurrentBin is now set properly info := upgradetypes.Plan{Name: upgrade} - err = cosmovisor.UpgradeBinary(logger, cfg, info) + err = UpgradeBinary(logger, cfg, info) s.Require().NoError(err) // we should see current point to the new upgrade dir upgradeBin := cfg.UpgradeBin(upgrade) @@ -228,10 +228,10 @@ func (s *upgradeTestSuite) TestUpgradeBinary() { plan := upgradetypes.Plan{ Name: "amazonas", - Info: fmt.Sprintf(`{"binaries":{"%s": "%s"}}`, cosmovisor.OSArch(), url), + Info: fmt.Sprintf(`{"binaries":{"%s": "%s"}}`, OSArch(), url), } - err = cosmovisor.UpgradeBinary(logger, cfg, plan) + err = UpgradeBinary(logger, cfg, plan) if !tc.canDownload { s.Require().Error(err) } else { @@ -250,7 +250,7 @@ func (s *upgradeTestSuite) TestOsArch() { "darwin/arm64", } - s.Require().True(slices.Contains(hosts, cosmovisor.OSArch())) + s.Require().True(slices.Contains(hosts, OSArch())) } // copyTestData will make a tempdir and then diff --git a/tools/cosmovisor/internal/upgrader.go b/tools/cosmovisor/internal/upgrader.go new file mode 100644 index 000000000000..778b120527ba --- /dev/null +++ b/tools/cosmovisor/internal/upgrader.go @@ -0,0 +1,240 @@ +package internal + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "time" + + "github.com/otiai10/copy" + + "cosmossdk.io/log" + "cosmossdk.io/tools/cosmovisor/v2" + + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" +) + +type UpgradeCheckResult struct { + Upgraded bool + HaltHeight uint64 +} + +func UpgradeIfNeeded(cfg *cosmovisor.Config, logger log.Logger, knownHeight uint64) (upgraded bool, err error) { + // if we see upgrade-info.json, assume we are at the right height and upgrade + logger.Info("Checking for upgrade-info.json") + currentBinaryUpgradeName := cfg.CurrentBinaryUpgradeName() + logger.Debug("read current binary's upgrade info", "name", currentBinaryUpgradeName) + // only upgrade if we have a pending upgrade plan with a different name from the current binary's upgrade plan + if upgradePlan, err := cfg.PendingUpgradeInfo(); err == nil && + upgradePlan != nil && + upgradePlan.Name != currentBinaryUpgradeName { + err := DoUpgrade(cfg, logger, upgradePlan) + if err != nil { + return false, err + } + return true, nil + } + logger.Info("Checking for upgrade-info.json.batch") + manualUpgradeBatch, err := cfg.ReadManualUpgrades() + if err != nil { + return false, err + } + logger.Info("Checking last known height") + lastKnownHeight := knownHeight + if lastKnownHeight == 0 { + lastKnownHeight = cfg.ReadLastKnownHeight() + } + if manualUpgrade := manualUpgradeBatch.FirstUpgrade(); manualUpgrade != nil { + haltHeight := uint64(manualUpgrade.Height) + if lastKnownHeight == haltHeight { + logger.Info("At manual upgrade", "upgrade", manualUpgrade, "halt_height", haltHeight) + err := DoUpgrade(cfg, logger, manualUpgrade) + if err != nil { + return false, err + } + // remove the manual upgrade plan after a successful upgrade, otherwise we will keep trying to upgrade + logger.Info("Removing completed manual upgrade plan", "height", manualUpgrade.Height, "name", manualUpgrade.Name) + err = cfg.RemoveManualUpgrade(manualUpgrade.Height) + if err != nil { + return true, fmt.Errorf("failed to remove manual upgrade at height %d: %w", manualUpgrade.Height, err) + } + return true, err + } else if lastKnownHeight > haltHeight { + // if the last known height is past the halt height, we assume that we are in an error condition and have missed the halt height! + return false, fmt.Errorf("last known height is %d, but we have manual upgrade %s scheduled for height %d which is before the current height! For safety, Cosmovisor assumes that this is an error condition that requires operator intervention. If the manual upgrade was scheduled by error, please remove it from upgrade-info.json.batch and restart Cosmovisor", + lastKnownHeight, manualUpgrade.Name, manualUpgrade.Height) + } + } + return false, nil +} + +type Upgrader struct { + cfg *cosmovisor.Config + logger log.Logger + upgradePlan *upgradetypes.Plan +} + +func DoUpgrade(cfg *cosmovisor.Config, logger log.Logger, upgradePlan *upgradetypes.Plan) error { + upgrader := &Upgrader{ + cfg: cfg, + logger: logger, + upgradePlan: upgradePlan, + } + return upgrader.DoUpgrade() +} + +func (u *Upgrader) DoUpgrade() error { + u.logger.Info("Starting upgrade process") + u.cfg.WaitRestartDelay() + + currentBin, err := u.cfg.CurrentBin() + if err != nil { + return err + } + upgradeBin := u.cfg.UpgradeBin(u.upgradePlan.Name) + u.logger.Info("Current binary", "current_bin", currentBin, "upgrade_bin", upgradeBin) + if currentBin == upgradeBin { + return fmt.Errorf("current binary %s is already the upgrade binary %s, fatal error", currentBin, upgradeBin) + } + + if err := u.doBackup(); err != nil { + return err + } + + if err := u.doCustomPreUpgrade(); err != nil { + return err + } + + if err := UpgradeBinary(u.logger, u.cfg, *u.upgradePlan); err != nil { + return err + } + + if err := u.doPreUpgrade(); err != nil { + return err + } + + return nil +} + +// doCustomPreUpgrade executes the custom preupgrade script if provided. +func (u *Upgrader) doCustomPreUpgrade() error { + if u.cfg.CustomPreUpgrade == "" { + return nil + } + + u.logger.Info("Running custom pre-upgrade script", "script", u.cfg.CustomPreUpgrade) + + // check if preupgradeFile is executable file + preupgradeFile := filepath.Join(u.cfg.Home, "cosmovisor", u.cfg.CustomPreUpgrade) + u.logger.Info("looking for COSMOVISOR_CUSTOM_PREUPGRADE file", "file", preupgradeFile) + info, err := os.Stat(preupgradeFile) + if err != nil { + u.logger.Error("COSMOVISOR_CUSTOM_PREUPGRADE file missing", "file", preupgradeFile) + return err + } + if !info.Mode().IsRegular() { + _, f := filepath.Split(preupgradeFile) + return fmt.Errorf("COSMOVISOR_CUSTOM_PREUPGRADE: %s is not a regular file", f) + } + + // Set the execute bit for only the current user + // Given: Current user - Group - Everyone + // 0o RWX - RWX - RWX + oldMode := info.Mode().Perm() + newMode := oldMode | 0o100 + if oldMode != newMode { + if err := os.Chmod(preupgradeFile, newMode); err != nil { + u.logger.Info("COSMOVISOR_CUSTOM_PREUPGRADE could not add execute permission") + return errors.New("COSMOVISOR_CUSTOM_PREUPGRADE could not add execute permission") + } + } + + // Run preupgradeFile + cmd := exec.Command(preupgradeFile, u.upgradePlan.Name, fmt.Sprintf("%d", u.upgradePlan.Height)) + cmd.Dir = u.cfg.Home + result, err := cmd.Output() + if err != nil { + return err + } + + u.logger.Info("COSMOVISOR_CUSTOM_PREUPGRADE result", "command", preupgradeFile, "argv1", u.upgradePlan.Name, "argv2", fmt.Sprintf("%d", u.upgradePlan.Height), "result", result) + + return nil +} + +// doPreUpgrade runs the pre-upgrade command defined by the application and handles respective error codes. +// cfg contains the cosmovisor config from env var. +// doPreUpgrade runs the new APP binary in order to process the upgrade (post-upgrade for cosmovisor). +func (u *Upgrader) doPreUpgrade() error { + counter := 0 + for { + if counter > u.cfg.PreUpgradeMaxRetries { + return fmt.Errorf("pre-upgrade command failed. reached max attempt of retries - %d", u.cfg.PreUpgradeMaxRetries) + } + + if err := u.executePreUpgradeCmd(); err != nil { + counter++ + + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + switch exitErr.ExitCode() { + case 1: + u.logger.Info("pre-upgrade command does not exist. continuing the upgrade.") + return nil + case 30: + return fmt.Errorf("pre-upgrade command failed : %w", err) + case 31: + u.logger.Error("pre-upgrade command failed. retrying", "error", err, "attempt", counter) + continue + } + } + } + + u.logger.Info("pre-upgrade successful. continuing the upgrade.") + return nil + } +} + +// executePreUpgradeCmd runs the pre-upgrade command defined by the application +// cfg contains the cosmovisor config from the env vars +func (u *Upgrader) executePreUpgradeCmd() error { + bin, err := u.cfg.CurrentBin() + if err != nil { + return fmt.Errorf("error while getting current binary path: %w", err) + } + + result, err := exec.Command(bin, "pre-upgrade").Output() + if err != nil { + return err + } + + u.logger.Info("pre-upgrade result", "result", result) + return nil +} + +func (u *Upgrader) doBackup() error { + // take backup if `UNSAFE_SKIP_BACKUP` is not set. + if u.cfg.UnsafeSkipBackup { + return nil + } + + // a destination directory, Format YYYY-MM-DD + st := time.Now() + ymd := fmt.Sprintf("%d-%d-%d", st.Year(), st.Month(), st.Day()) + dst := filepath.Join(u.cfg.DataBackupPath, fmt.Sprintf("data"+"-backup-%s", ymd)) + + u.logger.Info("Taking backup of data directory", "backup_path", dst) + + // copy the $DAEMON_HOME/data to a backup dir + if err := copy.Copy(filepath.Join(u.cfg.Home, "data"), dst); err != nil { + return fmt.Errorf("error while taking data backup: %w", err) + } + + // backup is done, lets check endtime to calculate total time taken for backup process + et := time.Now() + u.logger.Info("Backup completed", "backup_path", dst, "completion_time", et, "duration", et.Sub(st)) + + return nil +} diff --git a/tools/cosmovisor/internal/watchers/data_watcher.go b/tools/cosmovisor/internal/watchers/data_watcher.go new file mode 100644 index 000000000000..17be5b4512e1 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/data_watcher.go @@ -0,0 +1,40 @@ +package watchers + +import ( + "context" +) + +type DataWatcher[T any] struct { + outChan chan T +} + +func NewDataWatcher[T, I any](ctx context.Context, errorHandler ErrorHandler, watcher Watcher[I], unmarshal func(I) (T, error)) *DataWatcher[T] { + outChan := make(chan T, 1) + go func() { + defer close(outChan) + for { + select { + case <-ctx.Done(): + return + case contents, ok := <-watcher.Updated(): + if !ok { + return + } + var data T + data, err := unmarshal(contents) + if err == nil { + outChan <- data + } else { + errorHandler.Warn("failed to unmarshal data", err) + } + } + } + }() + return &DataWatcher[T]{ + outChan: outChan, + } +} + +func (d DataWatcher[T]) Updated() <-chan T { + return d.outChan +} diff --git a/tools/cosmovisor/internal/watchers/data_watcher_test.go b/tools/cosmovisor/internal/watchers/data_watcher_test.go new file mode 100644 index 000000000000..addb8f4ec67d --- /dev/null +++ b/tools/cosmovisor/internal/watchers/data_watcher_test.go @@ -0,0 +1,82 @@ +package watchers + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "cosmossdk.io/log" +) + +type TestData struct { + X int `json:"x"` + Y string `json:"y"` +} + +func TestDataWatcher(t *testing.T) { + dir, err := os.MkdirTemp("", "watcher") + require.NoError(t, err) + filename := filepath.Join(dir, "testfile.json") + + ctx, cancel := context.WithCancel(context.Background()) + eh := DebugLoggerErrorHandler(log.NewTestLogger(t)) + pollWatcher := NewFilePollWatcher(ctx, eh, filename, time.Millisecond*100) + dataWatcher := NewDataWatcher[TestData](ctx, eh, pollWatcher, func(contents []byte) (TestData, error) { + var data TestData + err := json.Unmarshal(contents, &data) + return data, err + }) + + expectedContent := TestData{ + X: 10, + Y: "testtesttest", + } + go func() { + // write some dummy data to the file + time.Sleep(time.Second) + err = os.WriteFile(filename, []byte("unexpected content - should be ignored"), 0o644) + require.NoError(t, err) + + // write the expected content to the file + time.Sleep(time.Second) + bz, err := json.Marshal(expectedContent) + require.NoError(t, err) + err = os.WriteFile(filename, bz, 0o644) + require.NoError(t, err) + + // wait a bit to ensure the watcher has time to pick up the change + // then cancel the context + time.Sleep(time.Second) + cancel() + }() + + var actualContext *TestData + + // we check all the channels in a function which we'll return from whenever + // a channel is closed or we get the done signal + func() { + for { + select { + case content, ok := <-dataWatcher.Updated(): + if !ok { + return + } + actualContext = &content + case <-ctx.Done(): + return + } + } + }() + + // check we have the expected context + require.Equal(t, expectedContent, *actualContext) + + // check that all the channels are closed + _, open := <-dataWatcher.Updated() + require.False(t, open) +} diff --git a/tools/cosmovisor/internal/watchers/file_poll_watcher.go b/tools/cosmovisor/internal/watchers/file_poll_watcher.go new file mode 100644 index 000000000000..753815811a48 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/file_poll_watcher.go @@ -0,0 +1,39 @@ +package watchers + +import ( + "context" + "fmt" + "os" + "time" +) + +func NewFilePollWatcher(ctx context.Context, errorHandler ErrorHandler, filename string, pollInterval time.Duration) Watcher[[]byte] { + stat, err := os.Stat(filename) + var lastModTime time.Time + if err == nil { + lastModTime = stat.ModTime() + } + check := func() ([]byte, error) { + stat, err := os.Stat(filename) + if err != nil { + if !os.IsNotExist(err) { + return nil, fmt.Errorf("failed to stat file %s: %w", filename, err) + } + } else { + modTime := stat.ModTime() + if stat.Size() > 0 && !modTime.Equal(lastModTime) { + lastModTime = modTime + bz, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read file %s: %w", filename, err) + } else { + return bz, nil + } + } + } + return nil, os.ErrNotExist + } + watcher := NewPollWatcher[[]byte](errorHandler, check, pollInterval) + watcher.Start(ctx) + return watcher +} diff --git a/tools/cosmovisor/internal/watchers/file_poll_watcher_test.go b/tools/cosmovisor/internal/watchers/file_poll_watcher_test.go new file mode 100644 index 000000000000..a27822b53463 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/file_poll_watcher_test.go @@ -0,0 +1,64 @@ +package watchers + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "cosmossdk.io/log" +) + +func TestPollWatcher(t *testing.T) { + dir, err := os.MkdirTemp("", "watcher") + require.NoError(t, err) + filename := filepath.Join(dir, "testfile") + + ctx, cancel := context.WithCancel(context.Background()) + eh := DebugLoggerErrorHandler(log.NewTestLogger(t)) + watcher := NewFilePollWatcher(ctx, eh, filename, time.Millisecond*100) + expectedContent := []byte("test") + go func() { + // write some dummy data to the file + time.Sleep(time.Second) + err = os.WriteFile(filename, []byte("unexpected content - should be updated later"), 0o644) + require.NoError(t, err) + + // write the expected content to the file + time.Sleep(time.Second) + err := os.WriteFile(filename, expectedContent, 0o644) + require.NoError(t, err) + + // wait a bit to ensure the watcher has time to pick up the change + // then cancel the context + time.Sleep(time.Second) + cancel() + }() + + var actualContent []byte + // we check all the channels in a function which we'll return from whenever + // a channel is closed or we get the done signal + func() { + for { + select { + case bz, ok := <-watcher.Updated(): + if !ok { + return + } + actualContent = bz + case <-ctx.Done(): + return + } + } + }() + + // check we have the expected context + require.Equal(t, expectedContent, actualContent) + + // check that all the channels are closed + _, open := <-watcher.Updated() + require.False(t, open) +} diff --git a/tools/cosmovisor/internal/watchers/fsnotify_watcher.go b/tools/cosmovisor/internal/watchers/fsnotify_watcher.go new file mode 100644 index 000000000000..e6d419f48527 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/fsnotify_watcher.go @@ -0,0 +1,98 @@ +package watchers + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/fsnotify/fsnotify" + + "cosmossdk.io/log" +) + +type FSNotifyWatcher struct { + watcher *fsnotify.Watcher + outChan chan FileUpdate +} + +var _ Watcher[FileUpdate] = (*FSNotifyWatcher)(nil) + +func NewFSNotifyWatcher(ctx context.Context, logger log.Logger, dir string, filenames []string) (*FSNotifyWatcher, error) { + watcher, err := fsnotify.NewWatcher() + if err != nil { + return nil, err + } + + err = watcher.Add(dir) + if err != nil { + return nil, fmt.Errorf("failed to watch directory %s: %w", dir, err) + } + + // validate filenames are absolute paths within the watched directory + filenameSet := make(map[string]struct{}) + for _, filename := range filenames { + if !filepath.IsAbs(filename) { + return nil, fmt.Errorf("filename must be absolute path: %s", filename) + } + if !strings.HasPrefix(filename, dir) { + return nil, fmt.Errorf("filename must be within watched directory: %s", filename) + } + filenameSet[filename] = struct{}{} + } + + outChan := make(chan FileUpdate, 1) + errChan := make(chan error, 1) + go func() { + // close the watcher and channels + // when the goroutines exits via return's + defer func(watcher *fsnotify.Watcher) { + _ = watcher.Close() + }(watcher) + defer close(outChan) + defer close(errChan) + + for { + select { + case <-ctx.Done(): + return + case event, ok := <-watcher.Events: + if !ok { // channel closed + return + } + if event.Has(fsnotify.Write) { + if _, ok := filenameSet[event.Name]; !ok { + continue + } + filename := event.Name + bz, err := os.ReadFile(filename) + if err != nil { + errChan <- fmt.Errorf("failed to read file %s: %w", filename, err) + } else { + outChan <- FileUpdate{Filename: filename, Contents: bz} + } + } + case err, ok := <-watcher.Errors: + if !ok { // channel closed + return + } + logger.Error("fsnotify error", "error", err) + } + } + }() + + return &FSNotifyWatcher{ + watcher: watcher, + outChan: outChan, + }, nil +} + +type FileUpdate struct { + Filename string + Contents []byte +} + +func (w *FSNotifyWatcher) Updated() <-chan FileUpdate { + return w.outChan +} diff --git a/tools/cosmovisor/internal/watchers/height_watcher.go b/tools/cosmovisor/internal/watchers/height_watcher.go new file mode 100644 index 000000000000..8af8769e8803 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/height_watcher.go @@ -0,0 +1,40 @@ +package watchers + +import ( + "context" + "time" +) + +type HeightChecker interface { + GetLatestBlockHeight() (uint64, error) +} + +type HeightWatcher struct { + *PollWatcher[uint64] + checker HeightChecker + onGetHeight func(uint64) error +} + +func NewHeightWatcher(errorHandler ErrorHandler, checker HeightChecker, pollInterval time.Duration, onGetHeight func(uint64) error) *HeightWatcher { + watcher := &HeightWatcher{ + checker: checker, + onGetHeight: onGetHeight, + } + watcher.PollWatcher = NewPollWatcher[uint64](errorHandler, func() (uint64, error) { + return watcher.ReadNow() + }, pollInterval) + return watcher +} + +func (h HeightWatcher) Start(ctx context.Context) { + h.PollWatcher.Start(ctx) +} + +func (h HeightWatcher) ReadNow() (uint64, error) { + height, err := h.checker.GetLatestBlockHeight() + if err != nil { + return 0, err + } + err = h.onGetHeight(height) + return height, err +} diff --git a/tools/cosmovisor/internal/watchers/http_block_checker.go b/tools/cosmovisor/internal/watchers/http_block_checker.go new file mode 100644 index 000000000000..aac83d14eb71 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/http_block_checker.go @@ -0,0 +1,91 @@ +package watchers + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strconv" + + "cosmossdk.io/log" +) + +func NewHTTPRPCBLockChecker(baseUrl string, logger log.Logger) HeightChecker { + return &httpRPCBlockChecker{ + baseUrl: baseUrl, + logger: logger, + } +} + +type httpRPCBlockChecker struct { + baseUrl string + subUrl string + logger log.Logger +} + +func (j *httpRPCBlockChecker) GetLatestBlockHeight() (uint64, error) { + if j.subUrl != "" { + return j.getLatestBlockHeight(j.subUrl) + } + + height, err1 := j.getLatestBlockHeight("/v1/block") + if err1 == nil { + j.logger.Info("Successfully resolved latest block height from /v1/block", "url", j.baseUrl+"/v1/block") + // If we successfully got the height from /v1/block, we can cache the subUrl + j.subUrl = "/v1/block" + return height, nil + } + + height, err2 := j.getLatestBlockHeight("/block") + if err2 == nil { + j.logger.Info("Successfully resolved latest block height from /block", "url", j.baseUrl+"/block") + // If we successfully got the height from /block, we can cache the subUrl + j.subUrl = "/block" + return height, nil + } + + return 0, fmt.Errorf("failed to get latest block height from both /block and /v1/block RPC endpoints: %w", errors.Join(err1, err2)) +} + +func (j *httpRPCBlockChecker) getLatestBlockHeight(subUrl string) (uint64, error) { + url := j.baseUrl + subUrl + res, err := http.Get(url) + if err != nil { + return 0, fmt.Errorf("failed to get latest block height: %w", err) + } + defer res.Body.Close() + + bz, err := io.ReadAll(res.Body) + if err != nil { + return 0, fmt.Errorf("failed to read latest block height: %w", err) + } + + return getHeightFromRPCBlockResponse(bz) +} + +var _ HeightChecker = &httpRPCBlockChecker{} + +type Header struct { + Height string `json:"height"` +} +type Block struct { + Header Header `json:"header"` +} +type Result struct { + Block Block `json:"block"` +} +type Response struct { + Result Result `json:"result"` +} + +func getHeightFromRPCBlockResponse(bz []byte) (uint64, error) { + var response Response + err := json.Unmarshal(bz, &response) + if err != nil { + return 0, fmt.Errorf("failed to unmarshal block response: %w", err) + } + + height := response.Result.Block.Header.Height + return strconv.ParseUint(height, 10, 64) +} diff --git a/tools/cosmovisor/internal/watchers/hybrid_watcher.go b/tools/cosmovisor/internal/watchers/hybrid_watcher.go new file mode 100644 index 000000000000..1197722e3d4e --- /dev/null +++ b/tools/cosmovisor/internal/watchers/hybrid_watcher.go @@ -0,0 +1,55 @@ +package watchers + +import ( + "context" + "time" +) + +type HybridWatcher struct { + outChan chan []byte + errChan chan error +} + +var _ Watcher[[]byte] = &HybridWatcher{} + +func NewHybridWatcher(ctx context.Context, errorHandler ErrorHandler, dirWatcher *FSNotifyWatcher, filename string, backupPollInterval time.Duration) *HybridWatcher { + pollWatcher := NewFilePollWatcher(ctx, errorHandler, filename, backupPollInterval) + outChan := make(chan []byte, 1) + errChan := make(chan error, 1) + + go func() { + defer close(outChan) + defer close(errChan) + for { + select { + case <-ctx.Done(): + return + case update, ok := <-dirWatcher.Updated(): + if !ok { + return + } + if update.Filename == filename { + outChan <- update.Contents + } + case update, ok := <-pollWatcher.Updated(): + if !ok { + return + } + outChan <- update + } + } + }() + + return &HybridWatcher{ + outChan: outChan, + errChan: errChan, + } +} + +func (h HybridWatcher) Updated() <-chan []byte { + return h.outChan +} + +func (h HybridWatcher) Errors() <-chan error { + return h.errChan +} diff --git a/tools/cosmovisor/internal/watchers/poll_watcher.go b/tools/cosmovisor/internal/watchers/poll_watcher.go new file mode 100644 index 000000000000..8c8ac8905437 --- /dev/null +++ b/tools/cosmovisor/internal/watchers/poll_watcher.go @@ -0,0 +1,57 @@ +package watchers + +import ( + "context" + "os" + "reflect" + "time" +) + +type PollWatcher[T any] struct { + outChan chan T + errorHandler ErrorHandler + checker func() (T, error) + pollInterval time.Duration +} + +func NewPollWatcher[T any](errorHandler ErrorHandler, checker func() (T, error), pollInterval time.Duration) *PollWatcher[T] { + outChan := make(chan T, 1) + return &PollWatcher[T]{ + errorHandler: errorHandler, + checker: checker, + pollInterval: pollInterval, + outChan: outChan, + } +} + +func (w *PollWatcher[T]) Start(ctx context.Context) { + ticker := time.NewTicker(w.pollInterval) + go func() { + defer ticker.Stop() + defer close(w.outChan) + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + x, err := w.checker() + if err != nil { + if !os.IsNotExist(err) { + w.errorHandler.Error("failed to check for updates", err) + } + } else { + // to make PollWatcher generic on any type T (including []byte), we use reflect.DeepEqual and the default zero value of T + var zero T + if !reflect.DeepEqual(x, zero) { + w.outChan <- x + } + } + } + } + }() +} + +func (w *PollWatcher[T]) Updated() <-chan T { + return w.outChan +} diff --git a/tools/cosmovisor/internal/watchers/watcher.go b/tools/cosmovisor/internal/watchers/watcher.go new file mode 100644 index 000000000000..c7f0c1c99e2e --- /dev/null +++ b/tools/cosmovisor/internal/watchers/watcher.go @@ -0,0 +1,52 @@ +package watchers + +import ( + "context" + "time" + + "cosmossdk.io/log" +) + +// Watcher is an interface that defines a generic watcher that emits updates of type T. +type Watcher[T any] interface { + // Updated returns a channel that emits updates of type T. + Updated() <-chan T +} + +// ErrorHandler is an interface for handling errors and warnings in watchers. +type ErrorHandler interface { + // Error handles an error as an error. + Error(msg string, err error) + // Warn handles an error as a warning. + Warn(msg string, err error) +} + +type debugLoggerErrorHandler struct { + logger log.Logger +} + +func (h *debugLoggerErrorHandler) Error(msg string, err error) { + h.logger.Warn(msg, "error", err) +} + +func (h *debugLoggerErrorHandler) Warn(msg string, err error) { + h.logger.Debug(msg, "error", err) +} + +// DebugLoggerErrorHandler returns an ErrorHandler that logs errors and warnings using the provided logger, +// but downgrades errors to warnings and warnings to debug logs. +func DebugLoggerErrorHandler(logger log.Logger) ErrorHandler { + return &debugLoggerErrorHandler{logger: logger} +} + +// InitFileWatcher initializes a file watcher which uses either both fsnotify and polling (hybrid watcher) or just polling, +// depending on whether a fsnotify directory watcher is provided. +func InitFileWatcher[T any](ctx context.Context, errorHandler ErrorHandler, pollInterval time.Duration, dirWatcher *FSNotifyWatcher, filename string, unmarshal func([]byte) (T, error)) Watcher[T] { + if dirWatcher != nil { + hybridWatcher := NewHybridWatcher(ctx, errorHandler, dirWatcher, filename, pollInterval) + return NewDataWatcher[T](ctx, errorHandler, hybridWatcher, unmarshal) + } else { + pollWatcher := NewFilePollWatcher(ctx, errorHandler, filename, pollInterval) + return NewDataWatcher[T](ctx, errorHandler, pollWatcher, unmarshal) + } +} diff --git a/tools/cosmovisor/manual.go b/tools/cosmovisor/manual.go new file mode 100644 index 000000000000..d8d0045c533e --- /dev/null +++ b/tools/cosmovisor/manual.go @@ -0,0 +1,130 @@ +package cosmovisor + +import ( + "encoding/json" + "fmt" + "os" + "sort" + + upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" +) + +// ReadManualUpgrades reads the manual upgrade data. +func (cfg *Config) ReadManualUpgrades() (ManualUpgradeBatch, error) { + bz, err := os.ReadFile(cfg.UpgradeInfoBatchFilePath()) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + return cfg.ParseManualUpgrades(bz) +} + +func (cfg *Config) ParseManualUpgrades(bz []byte) (ManualUpgradeBatch, error) { + var manualUpgrades ManualUpgradeBatch + if err := json.Unmarshal(bz, &manualUpgrades); err != nil { + return nil, err + } + + sortUpgrades(manualUpgrades) + + if err := manualUpgrades.ValidateBasic(); err != nil { + return nil, fmt.Errorf("invalid manual upgrade batch: %w", err) + } + + return manualUpgrades, nil +} + +// AddManualUpgrades adds a manual upgrade plan. +// If an upgrade with the same name already exists, it will only be overwritten if forceOverwrite is true, +// otherwise an error will be returned. +func (cfg *Config) AddManualUpgrades(forceOverwrite bool, plans ...*upgradetypes.Plan) error { + if len(plans) == 0 { + return nil + } + existing, err := cfg.ReadManualUpgrades() + if err != nil { + return err + } + + planMap := map[string]*upgradetypes.Plan{} + for _, existingPlan := range existing { + planMap[existingPlan.Name] = existingPlan + } + for _, plan := range plans { + if err := plan.ValidateBasic(); err != nil { + return fmt.Errorf("invalid upgrade plan %s: %w", plan.Name, err) + } + if _, ok := planMap[plan.Name]; ok { + if !forceOverwrite { + return fmt.Errorf("upgrade with name %s already exists", plan.Name) + } + } + planMap[plan.Name] = plan + } + + var newUpgrades ManualUpgradeBatch + for _, plan := range planMap { + newUpgrades = append(newUpgrades, plan) + } + + return cfg.saveManualUpgrades(newUpgrades) +} + +func (cfg *Config) RemoveManualUpgrade(height int64) error { + manualUpgrades, err := cfg.ReadManualUpgrades() + if err != nil { + return err + } + + var newUpgrades ManualUpgradeBatch + for _, existing := range manualUpgrades { + if existing.Height == height { + continue + } else { + newUpgrades = append(newUpgrades, existing) + } + } + if len(newUpgrades) == len(manualUpgrades) { + return nil + } + return cfg.saveManualUpgrades(newUpgrades) +} + +func (cfg *Config) saveManualUpgrades(manualUpgrades ManualUpgradeBatch) error { + sortUpgrades(manualUpgrades) + + manualUpgradesData, err := json.MarshalIndent(manualUpgrades, "", " ") + if err != nil { + return err + } + + return os.WriteFile(cfg.UpgradeInfoBatchFilePath(), manualUpgradesData, 0o644) +} + +func sortUpgrades(upgrades ManualUpgradeBatch) { + sort.Slice(upgrades, func(i, j int) bool { + return upgrades[i].Height < upgrades[j].Height + }) +} + +type ManualUpgradeBatch []*upgradetypes.Plan + +func (m ManualUpgradeBatch) ValidateBasic() error { + for _, upgrade := range m { + if err := upgrade.ValidateBasic(); err != nil { + return fmt.Errorf("invalid upgrade plan %s: %w", upgrade.Name, err) + } + } + return nil +} + +func (m ManualUpgradeBatch) FirstUpgrade() *upgradetypes.Plan { + // ensure the upgrades are sorted before searching + sortUpgrades(m) + if len(m) == 0 { + return nil + } + return m[0] +} diff --git a/tools/cosmovisor/process.go b/tools/cosmovisor/process.go deleted file mode 100644 index e2bb122d8be6..000000000000 --- a/tools/cosmovisor/process.go +++ /dev/null @@ -1,507 +0,0 @@ -package cosmovisor - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "os" - "os/exec" - "os/signal" - "path/filepath" - "sort" - "strconv" - "strings" - "sync" - "syscall" - "time" - - "github.com/fsnotify/fsnotify" - "github.com/otiai10/copy" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" - - "cosmossdk.io/log" - - "github.com/cosmos/cosmos-sdk/client/grpc/cmtservice" - "github.com/cosmos/cosmos-sdk/x/upgrade/plan" - upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" -) - -type Launcher struct { - logger log.Logger - cfg *Config - fw *fileWatcher -} - -func NewLauncher(logger log.Logger, cfg *Config) (Launcher, error) { - fw, err := newUpgradeFileWatcher(cfg) - if err != nil { - return Launcher{}, err - } - - return Launcher{logger: logger, cfg: cfg, fw: fw}, nil -} - -// loadBatchUpgradeFile loads the batch upgrade file into memory, sorted by -// their upgrade heights -func loadBatchUpgradeFile(cfg *Config) ([]upgradetypes.Plan, error) { - var uInfos []upgradetypes.Plan - upgradeInfoFile, err := os.ReadFile(cfg.UpgradeInfoBatchFilePath()) - if os.IsNotExist(err) { - return uInfos, nil - } else if err != nil { - return nil, fmt.Errorf("error while reading %s: %w", cfg.UpgradeInfoBatchFilePath(), err) - } - - if err = json.Unmarshal(upgradeInfoFile, &uInfos); err != nil { - return nil, err - } - sort.Slice(uInfos, func(i, j int) bool { - return uInfos[i].Height < uInfos[j].Height - }) - return uInfos, nil -} - -// BatchUpgradeWatcher starts a watcher loop that swaps upgrade manifests at the correct -// height, given the batch upgrade file. It watches the current state of the chain -// via the websocket API. -func BatchUpgradeWatcher(ctx context.Context, cfg *Config, logger log.Logger) { - // load batch file in memory - uInfos, err := loadBatchUpgradeFile(cfg) - if err != nil { - logger.Warn("failed to load batch upgrade file", "error", err) - uInfos = []upgradetypes.Plan{} - } - - watcher, err := fsnotify.NewWatcher() - if err != nil { - logger.Warn("failed to init watcher", "error", err) - return - } - defer watcher.Close() - err = watcher.Add(filepath.Dir(cfg.UpgradeInfoBatchFilePath())) - if err != nil { - logger.Warn("watcher failed to add upgrade directory", "error", err) - return - } - - var conn *grpc.ClientConn - var grpcErr error - - defer func() { - if conn != nil { - if err := conn.Close(); err != nil { - logger.Warn("couldn't stop gRPC client", "error", err) - } - } - }() - - // Wait for the chain process to be ready -pollLoop: - for { - select { - case <-ctx.Done(): - return - default: - conn, grpcErr = grpc.NewClient(cfg.GRPCAddress, grpc.WithTransportCredentials(insecure.NewCredentials())) - if grpcErr == nil { - break pollLoop - } - time.Sleep(time.Second) - } - } - - client := cmtservice.NewServiceClient(conn) - - var prevUpgradeHeight int64 = -1 - - logger.Info("starting the batch watcher loop") - for { - select { - case event := <-watcher.Events: - if event.Op&(fsnotify.Write|fsnotify.Create) != 0 { - uInfos, err = loadBatchUpgradeFile(cfg) - if err != nil { - logger.Warn("failed to load batch upgrade file", "error", err) - continue - } - } - case <-ctx.Done(): - return - default: - if len(uInfos) == 0 { - // prevent spending extra CPU cycles - time.Sleep(time.Second) - continue - } - resp, err := client.GetLatestBlock(ctx, &cmtservice.GetLatestBlockRequest{}) - if err != nil { - logger.Warn("error getting latest block", "error", err) - time.Sleep(time.Second) - continue - } - - h := resp.SdkBlock.Header.Height - upcomingUpgrade := uInfos[0].Height - // replace upgrade-info and upgrade-info batch file - if h > prevUpgradeHeight && h < upcomingUpgrade { - jsonBytes, err := json.Marshal(uInfos[0]) - if err != nil { - logger.Warn("error marshaling JSON for upgrade-info.json", "error", err, "upgrade", uInfos[0]) - continue - } - if err := os.WriteFile(cfg.UpgradeInfoFilePath(), jsonBytes, 0o600); err != nil { - logger.Warn("error writing upgrade-info.json", "error", err) - continue - } - uInfos = uInfos[1:] - - jsonBytes, err = json.Marshal(uInfos) - if err != nil { - logger.Warn("error marshaling JSON for upgrade-info.json.batch", "error", err, "upgrades", uInfos) - continue - } - if err := os.WriteFile(cfg.UpgradeInfoBatchFilePath(), jsonBytes, 0o600); err != nil { - logger.Warn("error writing upgrade-info.json.batch", "error", err) - // remove the upgrade-info.json.batch file to avoid non-deterministic behavior - err := os.Remove(cfg.UpgradeInfoBatchFilePath()) - if err != nil && !os.IsNotExist(err) { - logger.Warn("error removing upgrade-info.json.batch", "error", err) - return - } - continue - } - prevUpgradeHeight = upcomingUpgrade - } - - // Add a small delay to avoid hammering the gRPC endpoint - time.Sleep(time.Second) - } - } -} - -// Run launches the app in a subprocess and returns when the subprocess (app) -// exits (either when it dies, or *after* a successful upgrade.) and upgrade finished. -// Returns true if the upgrade request was detected and the upgrade process started. -func (l Launcher) Run(args []string, stdin io.Reader, stdout, stderr io.Writer) (bool, error) { - bin, err := l.cfg.CurrentBin() - if err != nil { - return false, fmt.Errorf("error creating symlink to genesis: %w", err) - } - - if err := plan.EnsureBinary(bin); err != nil { - return false, fmt.Errorf("current binary is invalid: %w", err) - } - - l.logger.Info("running app", "path", bin, "args", args) - cmd := exec.Command(bin, args...) - cmd.Stdin = stdin - cmd.Stdout = stdout - cmd.Stderr = stderr - if err := cmd.Start(); err != nil { - return false, fmt.Errorf("launching process %s %s failed: %w", bin, strings.Join(args, " "), err) - } - - ctx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - BatchUpgradeWatcher(ctx, l.cfg, l.logger) - }() - - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, syscall.SIGQUIT, syscall.SIGTERM) - go func() { - sig := <-sigs - cancel() - wg.Wait() - if err := cmd.Process.Signal(sig); err != nil { - l.logger.Error("terminated", "error", err, "bin", bin) - os.Exit(1) - } - }() - - if needsUpdate, err := l.WaitForUpgradeOrExit(cmd); err != nil || !needsUpdate { - return false, err - } - - if !IsSkipUpgradeHeight(args, l.fw.currentInfo) { - l.cfg.WaitRestartDelay() - - if err := l.doBackup(); err != nil { - return false, err - } - - if err := l.doCustomPreUpgrade(); err != nil { - return false, err - } - - if err := UpgradeBinary(l.logger, l.cfg, l.fw.currentInfo); err != nil { - return false, err - } - - if err = l.doPreUpgrade(); err != nil { - return false, err - } - - return true, nil - } - - cancel() - wg.Wait() - - return false, nil -} - -// WaitForUpgradeOrExit checks upgrade plan file created by the app. -// When it returns, the process (app) is finished. -// -// It returns (true, nil) if an upgrade should be initiated (and we killed the process) -// It returns (false, err) if the process died by itself -// It returns (false, nil) if the process exited normally without triggering an upgrade. This is very unlikely -// to happen with "start" but may happen with short-lived commands like `simd genesis export ...` -func (l Launcher) WaitForUpgradeOrExit(cmd *exec.Cmd) (bool, error) { - currentUpgrade, err := l.cfg.UpgradeInfo() - if err != nil { - // upgrade info not found do nothing - currentUpgrade = upgradetypes.Plan{} - } - - cmdDone := make(chan error) - go func() { - cmdDone <- cmd.Wait() - }() - - select { - case <-l.fw.MonitorUpdate(currentUpgrade): - // upgrade - kill the process and restart - l.logger.Info("daemon shutting down in an attempt to restart") - - if l.cfg.ShutdownGrace > 0 { - // Interrupt signal - l.logger.Info("sent interrupt to app, waiting for exit") - _ = cmd.Process.Signal(syscall.SIGTERM) - - // Wait app exit - psChan := make(chan *os.ProcessState) - go func() { - pstate, _ := cmd.Process.Wait() - psChan <- pstate - }() - - // Timeout and kill - select { - case <-psChan: - // Normal Exit - l.logger.Info("app exited normally") - case <-time.After(l.cfg.ShutdownGrace): - l.logger.Info("DAEMON_SHUTDOWN_GRACE exceeded, killing app") - // Kill after grace period - _ = cmd.Process.Kill() - } - } else { - // Default: Immediate app kill - _ = cmd.Process.Kill() - } - case err := <-cmdDone: - l.fw.Stop() - // no error -> command exits normally (eg. short command like `gaiad version`) - if err == nil { - return false, nil - } - // the app x/upgrade causes a panic and the app can die before the filwatcher finds the - // update, so we need to recheck update-info file. - if !l.fw.CheckUpdate(currentUpgrade) { - return false, err - } - } - return true, nil -} - -func (l Launcher) doBackup() error { - // take backup if `UNSAFE_SKIP_BACKUP` is not set. - if !l.cfg.UnsafeSkipBackup { - // check if upgrade-info.json is not empty. - var uInfo upgradetypes.Plan - upgradeInfoFile, err := os.ReadFile(l.cfg.UpgradeInfoFilePath()) - if err != nil { - return fmt.Errorf("error while reading upgrade-info.json: %w", err) - } - - if err = json.Unmarshal(upgradeInfoFile, &uInfo); err != nil { - return err - } - - if uInfo.Name == "" { - return errors.New("upgrade-info.json is empty") - } - - // a destination directory, Format YYYY-MM-DD - st := time.Now() - ymd := fmt.Sprintf("%d-%d-%d", st.Year(), st.Month(), st.Day()) - dst := filepath.Join(l.cfg.DataBackupPath, fmt.Sprintf("data"+"-backup-%s", ymd)) - - l.logger.Info("starting to take backup of data directory", "backup start time", st) - - // copy the $DAEMON_HOME/data to a backup dir - if err = copy.Copy(filepath.Join(l.cfg.Home, "data"), dst); err != nil { - return fmt.Errorf("error while taking data backup: %w", err) - } - - // backup is done, lets check endtime to calculate total time taken for backup process - et := time.Now() - l.logger.Info("backup completed", "backup saved at", dst, "backup completion time", et, "time taken to complete backup", et.Sub(st)) - } - - return nil -} - -// doCustomPreUpgrade executes the custom preupgrade script if provided. -func (l Launcher) doCustomPreUpgrade() error { - if l.cfg.CustomPreUpgrade == "" { - return nil - } - - // check if upgrade-info.json is not empty. - var upgradePlan upgradetypes.Plan - upgradeInfoFile, err := os.ReadFile(l.cfg.UpgradeInfoFilePath()) - if err != nil { - return fmt.Errorf("error while reading upgrade-info.json: %w", err) - } - - if err = json.Unmarshal(upgradeInfoFile, &upgradePlan); err != nil { - return err - } - - if err = upgradePlan.ValidateBasic(); err != nil { - return fmt.Errorf("invalid upgrade plan: %w", err) - } - - // check if preupgradeFile is executable file - preupgradeFile := filepath.Join(l.cfg.Home, "cosmovisor", l.cfg.CustomPreUpgrade) - l.logger.Info("looking for COSMOVISOR_CUSTOM_PREUPGRADE file", "file", preupgradeFile) - info, err := os.Stat(preupgradeFile) - if err != nil { - l.logger.Error("COSMOVISOR_CUSTOM_PREUPGRADE file missing", "file", preupgradeFile) - return err - } - if !info.Mode().IsRegular() { - _, f := filepath.Split(preupgradeFile) - return fmt.Errorf("COSMOVISOR_CUSTOM_PREUPGRADE: %s is not a regular file", f) - } - - // Set the execute bit for only the current user - // Given: Current user - Group - Everyone - // 0o RWX - RWX - RWX - oldMode := info.Mode().Perm() - newMode := oldMode | 0o100 - if oldMode != newMode { - if err := os.Chmod(preupgradeFile, newMode); err != nil { - l.logger.Info("COSMOVISOR_CUSTOM_PREUPGRADE could not add execute permission") - return errors.New("COSMOVISOR_CUSTOM_PREUPGRADE could not add execute permission") - } - } - - // Run preupgradeFile - cmd := exec.Command(preupgradeFile, upgradePlan.Name, fmt.Sprintf("%d", upgradePlan.Height)) - cmd.Dir = l.cfg.Home - result, err := cmd.Output() - if err != nil { - return err - } - - l.logger.Info("COSMOVISOR_CUSTOM_PREUPGRADE result", "command", preupgradeFile, "argv1", upgradePlan.Name, "argv2", fmt.Sprintf("%d", upgradePlan.Height), "result", result) - - return nil -} - -// doPreUpgrade runs the pre-upgrade command defined by the application and handles respective error codes. -// cfg contains the cosmovisor config from env var. -// doPreUpgrade runs the new APP binary in order to process the upgrade (post-upgrade for cosmovisor). -func (l *Launcher) doPreUpgrade() error { - counter := 0 - for { - if counter > l.cfg.PreUpgradeMaxRetries { - return fmt.Errorf("pre-upgrade command failed. reached max attempt of retries - %d", l.cfg.PreUpgradeMaxRetries) - } - - if err := l.executePreUpgradeCmd(); err != nil { - counter++ - - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - switch exitErr.ExitCode() { - case 1: - l.logger.Info("pre-upgrade command does not exist. continuing the upgrade.") - return nil - case 30: - return fmt.Errorf("pre-upgrade command failed : %w", err) - case 31: - l.logger.Error("pre-upgrade command failed. retrying", "error", err, "attempt", counter) - continue - } - } - } - - l.logger.Info("pre-upgrade successful. continuing the upgrade.") - return nil - } -} - -// executePreUpgradeCmd runs the pre-upgrade command defined by the application -// cfg contains the cosmovisor config from the env vars -func (l *Launcher) executePreUpgradeCmd() error { - bin, err := l.cfg.CurrentBin() - if err != nil { - return fmt.Errorf("error while getting current binary path: %w", err) - } - - result, err := exec.Command(bin, "pre-upgrade").Output() - if err != nil { - return err - } - - l.logger.Info("pre-upgrade result", "result", result) - return nil -} - -// IsSkipUpgradeHeight checks if pre-upgrade script must be run. -// If the height in the upgrade plan matches any of the heights provided in --unsafe-skip-upgrades, the script is not run. -func IsSkipUpgradeHeight(args []string, upgradeInfo upgradetypes.Plan) bool { - skipUpgradeHeights := UpgradeSkipHeights(args) - for _, h := range skipUpgradeHeights { - if h == int(upgradeInfo.Height) { - return true - } - } - return false -} - -// UpgradeSkipHeights gets all the heights provided when -// simd start --unsafe-skip-upgrades ... -func UpgradeSkipHeights(args []string) []int { - var heights []int - for i, arg := range args { - if arg == fmt.Sprintf("--%s", FlagSkipUpgradeHeight) { - j := i + 1 - - for j < len(args) { - tArg := args[j] - if strings.HasPrefix(tArg, "-") { - break - } - h, err := strconv.Atoi(tArg) - if err == nil { - heights = append(heights, h) - } - j++ - } - - break - } - } - return heights -} diff --git a/tools/cosmovisor/scanner.go b/tools/cosmovisor/scanner.go deleted file mode 100644 index 85caaeeef844..000000000000 --- a/tools/cosmovisor/scanner.go +++ /dev/null @@ -1,269 +0,0 @@ -package cosmovisor - -import ( - "encoding/json" - "errors" - "fmt" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "testing" - "time" - - dbm "github.com/cometbft/cometbft-db" - "github.com/cometbft/cometbft/v2/store" - - upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types" -) - -var errUntestAble = errors.New("untestable") - -type fileWatcher struct { - daemonHome string - filename string // full path to a watched file - interval time.Duration - - currentBin string - currentInfo upgradetypes.Plan - lastModTime time.Time - cancel chan bool - ticker *time.Ticker - - needsUpdate bool - initialized bool - disableRecase bool -} - -func newUpgradeFileWatcher(cfg *Config) (*fileWatcher, error) { - filename := cfg.UpgradeInfoFilePath() - if filename == "" { - return nil, errors.New("filename undefined") - } - - filenameAbs, err := filepath.Abs(filename) - if err != nil { - return nil, fmt.Errorf("invalid path: %s must be a valid file path: %w", filename, err) - } - - dirname := filepath.Dir(filename) - if info, err := os.Stat(dirname); err != nil || !info.IsDir() { - return nil, fmt.Errorf("invalid path: %s must be an existing directory: %w", dirname, err) - } - - bin, err := cfg.CurrentBin() - if err != nil { - return nil, fmt.Errorf("error creating symlink to genesis: %w", err) - } - - return &fileWatcher{ - daemonHome: cfg.Home, - currentBin: bin, - filename: filenameAbs, - interval: cfg.PollInterval, - currentInfo: upgradetypes.Plan{}, - lastModTime: time.Time{}, - cancel: make(chan bool), - ticker: time.NewTicker(cfg.PollInterval), - needsUpdate: false, - initialized: false, - disableRecase: cfg.DisableRecase, - }, nil -} - -func (fw *fileWatcher) Stop() { - close(fw.cancel) - fw.ticker.Stop() -} - -func (fw *fileWatcher) IsStop() bool { - select { - case <-fw.cancel: - return true - default: - return false - } -} - -// MonitorUpdate pools the filesystem to check for new upgrade currentInfo. -// currentName is the name of currently running upgrade. The check is rejected if it finds -// an upgrade with the same name. -func (fw *fileWatcher) MonitorUpdate(currentUpgrade upgradetypes.Plan) <-chan struct{} { - fw.ticker.Reset(fw.interval) - done := make(chan struct{}) - fw.cancel = make(chan bool) - fw.needsUpdate = false - - go func() { - for { - select { - case <-fw.ticker.C: - if fw.CheckUpdate(currentUpgrade) { - done <- struct{}{} - return - } - - case <-fw.cancel: - return - } - } - }() - - return done -} - -// CheckUpdate reads update plan from file and checks if there is a new update request -// currentName is the name of currently running upgrade. The check is rejected if it finds -// an upgrade with the same name. -func (fw *fileWatcher) CheckUpdate(currentUpgrade upgradetypes.Plan) bool { - if fw.needsUpdate { - return true - } - - stat, err := os.Stat(fw.filename) - if err != nil { - if os.IsNotExist(err) { - return false - } else { - panic(fmt.Errorf("failed to stat upgrade info file: %w", err)) - } - } - - // check https://github.com/cosmos/cosmos-sdk/issues/21086 - // If new file is still empty, wait a small amount of time for write to complete - if stat.Size() == 0 { - for range 10 { - time.Sleep(2 * time.Millisecond) - stat, err = os.Stat(fw.filename) - if err != nil { - if os.IsNotExist(err) { - return false - } else { - panic(fmt.Errorf("failed to stat upgrade info file: %w", err)) - } - } - if stat.Size() == 0 { - break - } - } - } - if stat.Size() == 0 { - return false - } - - // no update if the file already exists and has not been modified - if !stat.ModTime().After(fw.lastModTime) { - return false - } - - info, err := parseUpgradeInfoFile(fw.filename, fw.disableRecase) - if err != nil { - panic(fmt.Errorf("failed to parse upgrade info file: %w", err)) - } - - // file exist but too early in height - currentHeight, err := fw.checkHeight() - if (err != nil || currentHeight < info.Height) && !errors.Is(err, errUntestAble) { // ignore this check for tests - return false - } - - if !fw.initialized { - // daemon has restarted - fw.initialized = true - fw.currentInfo = info - fw.lastModTime = stat.ModTime() - - // Heuristic: Daemon has restarted, so we don't know if we successfully - // downloaded the upgrade or not. So we try to compare the running upgrade - // name (read from the cosmovisor file) with the upgrade info. - if !strings.EqualFold(currentUpgrade.Name, fw.currentInfo.Name) { - fw.needsUpdate = true - return true - } - } - - if info.Height > fw.currentInfo.Height { - fw.currentInfo = info - fw.lastModTime = stat.ModTime() - fw.needsUpdate = true - return true - } - - return false -} - -// checkHeight checks if the current block height -func (fw *fileWatcher) checkHeight() (int64, error) { - if testing.Testing() { // we cannot test the command in the test environment - return 0, errUntestAble - } - - if fw.IsStop() { - result, err := exec.Command(fw.currentBin, "config", "get", "config", "db_backend", "--home", fw.daemonHome).CombinedOutput() //nolint:gosec // we want to execute the config command - if err != nil { - result = []byte("goleveldb") // set default value, old version may not have config command - } - blockStoreDB, err := dbm.NewDB("blockstore", dbm.BackendType(result), filepath.Join(fw.daemonHome, "data")) - if err != nil { - return 0, err - } - defer blockStoreDB.Close() - return store.NewBlockStore(blockStoreDB).Height(), nil - } - - result, err := exec.Command(fw.currentBin, "status", "--home", fw.daemonHome).CombinedOutput() //nolint:gosec // we want to execute the status command - if err != nil { - return 0, err - } - - type response struct { - SyncInfo struct { - LatestBlockHeight string `json:"latest_block_height"` - } `json:"sync_info"` - AnotherCasingSyncInfo struct { - LatestBlockHeight string `json:"latest_block_height"` - } `json:"SyncInfo"` - } - - var resp response - if err := json.Unmarshal(result, &resp); err != nil { - return 0, err - } - - if resp.SyncInfo.LatestBlockHeight != "" { - return strconv.ParseInt(resp.SyncInfo.LatestBlockHeight, 10, 64) - } else if resp.AnotherCasingSyncInfo.LatestBlockHeight != "" { - return strconv.ParseInt(resp.AnotherCasingSyncInfo.LatestBlockHeight, 10, 64) - } - - return 0, errors.New("latest block height is empty") -} - -func parseUpgradeInfoFile(filename string, disableRecase bool) (upgradetypes.Plan, error) { - f, err := os.ReadFile(filename) - if err != nil { - return upgradetypes.Plan{}, err - } - - if len(f) == 0 { - return upgradetypes.Plan{}, fmt.Errorf("empty upgrade-info.json in %q", filename) - } - - var upgradePlan upgradetypes.Plan - if err := json.Unmarshal(f, &upgradePlan); err != nil { - return upgradetypes.Plan{}, err - } - - // required values must be set - if err := upgradePlan.ValidateBasic(); err != nil { - return upgradetypes.Plan{}, fmt.Errorf("invalid upgrade-info.json content: %w, got: %v", err, upgradePlan) - } - - // normalize name to prevent operator error in upgrade name case sensitivity errors. - if !disableRecase { - upgradePlan.Name = strings.ToLower(upgradePlan.Name) - } - - return upgradePlan, nil -} diff --git a/tools/cosmovisor/scanner_test.go b/tools/cosmovisor/scanner_test.go index 36de3b68074e..f725270e072f 100644 --- a/tools/cosmovisor/scanner_test.go +++ b/tools/cosmovisor/scanner_test.go @@ -1,6 +1,7 @@ package cosmovisor import ( + "os" "path/filepath" "testing" @@ -12,65 +13,65 @@ import ( func TestParseUpgradeInfoFile(t *testing.T) { cases := []struct { filename string - expectUpgrade upgradetypes.Plan + expectUpgrade *upgradetypes.Plan disableRecase bool expectErr string }{ { filename: "f1-good.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{Name: "upgrade1", Info: "some info", Height: 123}, + expectUpgrade: &upgradetypes.Plan{Name: "upgrade1", Info: "some info", Height: 123}, }, { filename: "f2-normalized-name.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{Name: "upgrade2", Info: "some info", Height: 125}, + expectUpgrade: &upgradetypes.Plan{Name: "upgrade2", Info: "some info", Height: 125}, }, { filename: "f2-normalized-name.json", disableRecase: true, - expectUpgrade: upgradetypes.Plan{Name: "Upgrade2", Info: "some info", Height: 125}, + expectUpgrade: &upgradetypes.Plan{Name: "Upgrade2", Info: "some info", Height: 125}, }, { filename: "f2-bad-type.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, - expectErr: "cannot unmarshal number into Go struct", + expectUpgrade: nil, + expectErr: "cannot unmarshal number into Go value", }, { filename: "f2-bad-type-2.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, - expectErr: "height must be greater than 0: invalid request", + expectUpgrade: nil, + expectErr: `unknown field "heigh"`, }, { filename: "f3-empty.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, - expectErr: "empty upgrade-info.json in", + expectUpgrade: nil, + expectErr: "EOF", }, { filename: "f4-empty-obj.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, - expectErr: "invalid upgrade-info.json content: name cannot be empty", + expectUpgrade: nil, + expectErr: "name cannot be empty", }, { filename: "f5-partial-obj-1.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, + expectUpgrade: nil, expectErr: "height must be greater than 0", }, { filename: "f5-partial-obj-2.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, + expectUpgrade: nil, expectErr: "name cannot be empty: invalid request", }, { filename: "non-existent.json", disableRecase: false, - expectUpgrade: upgradetypes.Plan{}, + expectUpgrade: nil, expectErr: "no such file or directory", }, } @@ -90,3 +91,14 @@ func TestParseUpgradeInfoFile(t *testing.T) { }) } } + +func parseUpgradeInfoFile(filename string, disableRecase bool) (*upgradetypes.Plan, error) { + cfg := &Config{ + DisableRecase: disableRecase, + } + bz, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + return cfg.ParseUpgradeInfo(bz) +} diff --git a/x/upgrade/CHANGELOG.md b/x/upgrade/CHANGELOG.md index 094f8cd2b546..4b47d65b868b 100644 --- a/x/upgrade/CHANGELOG.md +++ b/x/upgrade/CHANGELOG.md @@ -29,6 +29,10 @@ Ref: https://keepachangelog.com/en/1.0.0/ * [#24543](https://github.com/cosmos/cosmos-sdk/issues/24543) Use `telemetry.MetricKeyPreBlocker` metric key instead of `telemetry.MetricKeyBeginBlocker` in `PreBlocker`. * [#24720](https://github.com/cosmos/cosmos-sdk/pull/24720) switch to verbose mode logging when calling upgrading handlers. +* [#24821](https://github.com/cosmos/cosmos-sdk/pull/24821) Add support for executing upgrade logic for manual upgrades via the `SetManualUpgrade` and `GetManualUpgrade` methods in the `Keeper`. + +### Breaking Changes +* [#24821](https://github.com/cosmos/cosmos-sdk/pull/24821) Upgrade plans are now saved to disk using the app's JSON codec rather than encoding/json which is the correct behavior for emitting JSON for protobuf generated types. This will likely result in the height field being rendered as a JSON string rather than an integer. ## [v0.2.0](https://github.com/cosmos/cosmos-sdk/releases/tag/x/upgrade/v0.2.0) - 2025-04-24 diff --git a/x/upgrade/abci.go b/x/upgrade/abci.go index e4cec38b0c22..bacc5d912e5e 100644 --- a/x/upgrade/abci.go +++ b/x/upgrade/abci.go @@ -62,6 +62,28 @@ func PreBlocker(ctx context.Context, k *keeper.Keeper) (appmodule.ResponsePreBlo } if !found { + // check for manual upgrade + manualPlan := k.GetManualUpgrade() + if manualPlan != nil && manualPlan.Height == blockHeight { + // if we have a manual upgrade, we execute it + logger := k.Logger(ctx) + logger.Info(fmt.Sprintf("applying manual upgrade \"%s\" at %d", manualPlan.Name, blockHeight)) + sdkCtx = sdkCtx.WithBlockGasMeter(storetypes.NewInfiniteGasMeter()) + if err := k.ApplyUpgrade(sdkCtx, *manualPlan); err != nil { + return nil, err + } + + // clear the manual upgrade plan + err = k.SetManualUpgrade(nil) + if err != nil { + return nil, fmt.Errorf("failed to clear manual upgrade plan: %w", err) + } + + return &sdk.ResponsePreBlock{ + ConsensusParamsChanged: true, + }, nil + } + return &sdk.ResponsePreBlock{ ConsensusParamsChanged: false, }, nil diff --git a/x/upgrade/keeper/keeper.go b/x/upgrade/keeper/keeper.go index 1b996c1a18d4..b09f3e85f40d 100644 --- a/x/upgrade/keeper/keeper.go +++ b/x/upgrade/keeper/keeper.go @@ -3,7 +3,6 @@ package keeper import ( "context" "encoding/binary" - "encoding/json" "errors" "fmt" "os" @@ -40,12 +39,13 @@ type Keeper struct { homePath string // root directory of app config skipUpgradeHeights map[int64]bool // map of heights to skip for an upgrade storeService corestore.KVStoreService // key to access x/upgrade store - cdc codec.BinaryCodec // App-wide binary codec + cdc codec.Codec // App-wide binary codec upgradeHandlers map[string]types.UpgradeHandler // map of plan name to upgrade handler versionSetter xp.ProtocolVersionSetter // implements setting the protocol version field on BaseApp downgradeVerified bool // tells if we've already sanity checked that this binary version isn't being used against an old state. authority string // the address capable of executing and canceling an upgrade. Usually the gov module account initVersionMap module.VersionMap // the module version map at init genesis + manualUpgradeInfo *types.Plan } // NewKeeper constructs an upgrade Keeper which requires the following arguments: @@ -54,7 +54,7 @@ type Keeper struct { // cdc - the app-wide binary codec // homePath - root directory of the application's config // vs - the interface implemented by baseapp which allows setting baseapp's protocol version field -func NewKeeper(skipUpgradeHeights map[int64]bool, storeService corestore.KVStoreService, cdc codec.BinaryCodec, homePath string, vs xp.ProtocolVersionSetter, authority string) *Keeper { +func NewKeeper(skipUpgradeHeights map[int64]bool, storeService corestore.KVStoreService, cdc codec.Codec, homePath string, vs xp.ProtocolVersionSetter, authority string) *Keeper { k := &Keeper{ homePath: homePath, skipUpgradeHeights: skipUpgradeHeights, @@ -535,7 +535,7 @@ func (k Keeper) DumpUpgradeInfoToDisk(height int64, p types.Plan) error { Height: height, Info: p.Info, } - info, err := json.Marshal(upgradeInfo) + info, err := k.cdc.MarshalJSON(&upgradeInfo) if err != nil { return err } @@ -575,7 +575,7 @@ func (k Keeper) ReadUpgradeInfoFromDisk() (types.Plan, error) { return upgradeInfo, err } - if err := json.Unmarshal(data, &upgradeInfo); err != nil { + if err := k.cdc.UnmarshalJSON(data, &upgradeInfo); err != nil { return upgradeInfo, err } @@ -599,3 +599,26 @@ func (k *Keeper) SetDowngradeVerified(v bool) { func (k Keeper) DowngradeVerified() bool { return k.downgradeVerified } + +// SetManualUpgrade sets the manual upgrade plan. +// If the plan is nil, it clears the existing manual upgrade info. +// This allows manual upgrades to be executed using handlers registered with SetUpgradeHandler. +// Currently, only when manual upgrade can be set. +// It will be applied and cleared when the specified upgrade height has been reached. +func (k *Keeper) SetManualUpgrade(plan *types.Plan) error { + if plan == nil { + k.manualUpgradeInfo = nil + return nil + } + + if err := plan.ValidateBasic(); err != nil { + return err + } + + k.manualUpgradeInfo = plan + return nil +} + +func (k *Keeper) GetManualUpgrade() *types.Plan { + return k.manualUpgradeInfo +}