Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: v3 e2e upgrade #3910

Open
wants to merge 33 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f917483
attempt to add a major upgrade 3 test
cmwaters Sep 25, 2024
e0ed8c2
add support to specify the share version of txsim
cmwaters Sep 26, 2024
2e88e7d
use int64 instead
cmwaters Sep 26, 2024
3be733f
update txsim
cmwaters Sep 26, 2024
d22d7db
Merge branch 'main' into cal/v3-e2e-test
rootulp Oct 3, 2024
f3b91cb
comments for HACKHACKs
rootulp Oct 3, 2024
b3d47e2
instruct txSim to not send blobs
rootulp Oct 3, 2024
30eb9f5
improve comments + names
rootulp Oct 3, 2024
0433ddc
Revert "instruct txSim to not send blobs"
rootulp Oct 3, 2024
e518b95
use last commit for txsim binary
rootulp Oct 3, 2024
9e9a659
debug
rootulp Oct 3, 2024
465c1a1
debug: use pr version for Docker images
rootulp Oct 4, 2024
15ce3dc
fix: specify share version zero
rootulp Oct 4, 2024
f2396b5
refactor: extract key_generator
rootulp Oct 4, 2024
e88b6d7
debug: log keys available to txSim
rootulp Oct 4, 2024
937d57f
attempt to use validator volume for tx sim nodes
rootulp Oct 4, 2024
0d9aceb
refactor: renames for clarity
rootulp Oct 4, 2024
acbf9cc
attempt to plumb priv keys into tx sim
rootulp Oct 4, 2024
0a17009
comment out broken code
rootulp Oct 4, 2024
f5d6b44
add todo
rootulp Oct 4, 2024
135e5e3
try to copy validator keys into txSim keyring
rootulp Oct 6, 2024
c015c07
no accounts found in keyring
rootulp Oct 6, 2024
8990155
add keys to txsim works
rootulp Oct 6, 2024
57648aa
add comments for upgrade sequence b/c it is currently broken
rootulp Oct 6, 2024
762978e
cant see logs for txsim
rootulp Oct 7, 2024
713bd50
fix: upgrade sequence
rootulp Oct 7, 2024
acc95b0
cleanup revert as much as possible
rootulp Oct 7, 2024
9641129
cleanup
rootulp Oct 7, 2024
6e4fa05
convert print to log
rootulp Oct 7, 2024
11a01b0
fix: CreateTxClient comment
rootulp Oct 7, 2024
ad88e29
Merge branch 'main' into cal/v3-e2e-test
rootulp Oct 9, 2024
eaa98a1
fix: copy keyring dir to correct destination
rootulp Oct 9, 2024
ae45da4
fix: remove DefaultUpgradeHeightDelay override
rootulp Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pkg/appconsts/global_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ const (
// DefaultUpgradeHeightDelay is the number of blocks after a quorum has been
// reached that the chain should upgrade to the new version. Assuming a block
// interval of 12 seconds, this is 7 days.
DefaultUpgradeHeightDelay = int64(7 * 24 * 60 * 60 / 12) // 7 days * 24 hours * 60 minutes * 60 seconds / 12 seconds per block = 50,400 blocks.
// DefaultUpgradeHeightDelay = int64(7 * 24 * 60 * 60 / 12) // 7 days * 24 hours * 60 minutes * 60 seconds / 12 seconds per block = 50,400 blocks.

// HACKHACK: Override the default for the e2e test. Do not merge this as-is.
DefaultUpgradeHeightDelay = int64(1)
Copy link
Collaborator

@rootulp rootulp Oct 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the only remaining thing on this PR is to revert this change and plumb an override for the upgrade height via build flags (or something else).

cc: @cmwaters b/c I think you already have ideas for that via

OVERRIDE_UPGRADE_HEIGHT_DELAY ?=

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can fix this after the PR merges in #3947

)

var (
Expand Down
13 changes: 10 additions & 3 deletions test/cmd/txsim/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var (
stake, stakeValue, blob int
useFeegrant, suppressLogs bool
upgradeSchedule string
blobShareVersion int
)

func main() {
Expand Down Expand Up @@ -102,8 +103,8 @@ well funded account that can act as the master account. The command runs until a
masterAccName = os.Getenv(TxsimMasterAccName)
}

if stake == 0 && send == 0 && blob == 0 {
return errors.New("no sequences specified. Use --stake, --send or --blob")
if stake == 0 && send == 0 && blob == 0 && upgradeSchedule == "" {
return errors.New("no sequences specified. Use --stake, --send, --upgrade-schedule or --blob")
}

// setup the sequences
Expand All @@ -128,7 +129,12 @@ well funded account that can act as the master account. The command runs until a
return fmt.Errorf("invalid blob amounts: %w", err)
}

sequences = append(sequences, txsim.NewBlobSequence(sizes, blobsPerPFB).Clone(blob)...)
sequence := txsim.NewBlobSequence(sizes, blobsPerPFB)
if blobShareVersion >= 0 {
sequence.WithShareVersion(uint8(blobShareVersion))
}

sequences = append(sequences, sequence.Clone(blob)...)
}

upgradeScheduleMap, err := parseUpgradeSchedule(upgradeSchedule)
Expand Down Expand Up @@ -210,6 +216,7 @@ func flags() *flag.FlagSet {
flags.StringVar(&blobAmounts, "blob-amounts", "1", "range of blobs per PFB specified as a single value or a min-max range (e.g., 10 or 5-10). A single value indicates the exact number of blobs to be created.")
flags.BoolVar(&useFeegrant, "feegrant", false, "use the feegrant module to pay for fees")
flags.BoolVar(&suppressLogs, "suppressLogs", false, "disable logging")
flags.IntVar(&blobShareVersion, "blob-share-version", -1, "optionally specify a share version to use for the blob sequences")
return flags
}

Expand Down
9 changes: 7 additions & 2 deletions test/e2e/benchmark/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,16 @@ func (b *BenchmarkTest) SetupNodes() error {
// create tx clients and point them to the validators
log.Println("Creating tx clients")

err = b.CreateTxClients(ctx, b.manifest.TxClientVersion,
err = b.CreateTxClients(
ctx,
b.manifest.TxClientVersion,
b.manifest.BlobSequences,
b.manifest.BlobSizes,
b.manifest.BlobsPerSeq,
b.manifest.TxClientsResource, gRPCEndpoints)
b.manifest.TxClientsResource,
gRPCEndpoints,
map[int64]uint64{}, // upgrade schedule
)
testnet.NoError("failed to create tx clients", err)

log.Println("Setting up testnet")
Expand Down
1 change: 1 addition & 0 deletions test/e2e/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ func main() {
tests := []Test{
{"MinorVersionCompatibility", MinorVersionCompatibility},
{"MajorUpgradeToV2", MajorUpgradeToV2},
{"MajorUpgradeToV3", MajorUpgradeToV3},
{"E2ESimple", E2ESimple},
}

Expand Down
3 changes: 2 additions & 1 deletion test/e2e/major_upgrade_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ func MajorUpgradeToV2(logger *log.Logger) error {
logger.Println("Creating txsim")
endpoints, err := testNet.RemoteGRPCEndpoints(ctx)
testnet.NoError("failed to get remote gRPC endpoints", err)
err = testNet.CreateTxClient(ctx, "txsim", testnet.TxsimVersion, 1, "100-2000", 100, testnet.DefaultResources, endpoints[0])
upgradeSchedule := map[int64]uint64{}
err = testNet.CreateTxClient(ctx, "txsim", testnet.TxsimVersion, 1, "100-2000", 100, testnet.DefaultResources, endpoints[0], upgradeSchedule)
testnet.NoError("failed to create tx client", err)

logger.Println("Setting up testnet")
Expand Down
94 changes: 94 additions & 0 deletions test/e2e/major_upgrade_v3.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package main

import (
"context"
"fmt"
"log"
"time"

"github.com/celestiaorg/celestia-app/v3/app"
v2 "github.com/celestiaorg/celestia-app/v3/pkg/appconsts/v2"
v3 "github.com/celestiaorg/celestia-app/v3/pkg/appconsts/v3"
"github.com/celestiaorg/celestia-app/v3/test/e2e/testnet"
)

func MajorUpgradeToV3(logger *log.Logger) error {
numNodes := 4
upgradeHeightV3 := int64(20)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

logger.Println("Creating testnet")
testNet, err := testnet.New(ctx, "MajorUpgradeToV3", seed, nil, "test")
rootulp marked this conversation as resolved.
Show resolved Hide resolved
testnet.NoError("failed to create testnet", err)

defer testNet.Cleanup(ctx)

// HACKHACK: use a version of celestia-app built from a commit on this PR.
// This can be removed after the PR is merged to main.
version := "pr-3910"
rootulp marked this conversation as resolved.
Show resolved Hide resolved

logger.Println("Running major upgrade to v3 test", "version", version)

consensusParams := app.DefaultConsensusParams()
consensusParams.Version.AppVersion = v2.Version // Start the test on v2
rootulp marked this conversation as resolved.
Show resolved Hide resolved
testNet.SetConsensusParams(consensusParams)

preloader, err := testNet.NewPreloader()
testnet.NoError("failed to create preloader", err)

err = preloader.AddImage(ctx, testnet.DockerImageName(version))
testnet.NoError("failed to add image", err)
defer func() { _ = preloader.EmptyImages(ctx) }()

logger.Println("Creating genesis nodes")
for i := 0; i < numNodes; i++ {
err := testNet.CreateGenesisNode(ctx, version, 10000000, 0, testnet.DefaultResources, true)
testnet.NoError("failed to create genesis node", err)
}

logger.Println("Creating txsim")
endpoints, err := testNet.RemoteGRPCEndpoints(ctx)
testnet.NoError("failed to get remote gRPC endpoints", err)
upgradeSchedule := map[int64]uint64{
upgradeHeightV3: v3.Version,
}

err = testNet.CreateTxClient(ctx, "txsim", version, 1, "100-2000", 100, testnet.DefaultResources, endpoints[0], upgradeSchedule)
testnet.NoError("failed to create tx client", err)

logger.Println("Setting up testnet")
testnet.NoError("Failed to setup testnet", testNet.Setup(ctx))
logger.Println("Starting testnet")
testnet.NoError("Failed to start testnet", testNet.Start(ctx))

timer := time.NewTimer(10 * time.Minute)
defer timer.Stop()
ticker := time.NewTicker(3 * time.Second)
defer ticker.Stop()

logger.Println("waiting for upgrade")
for _, node := range testNet.Nodes() {
client, err := node.Client()
testnet.NoError("failed to get client", err)

upgradeComplete := false
lastHeight := int64(0)
for !upgradeComplete {
select {
case <-timer.C:
return fmt.Errorf("failed to upgrade to v3, last height: %d", lastHeight)
case <-ticker.C:
resp, err := client.Header(ctx, nil)
testnet.NoError("failed to get header", err)
if resp.Header.Version.App == v3.Version {
Comment on lines +84 to +85
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Ensure error handling after retrieving the header.

When calling client.Header(ctx, nil), if an error occurs, accessing resp.Header may result in a nil pointer dereference. This can cause the program to panic.

Modify the code to handle the error appropriately:

resp, err := client.Header(ctx, nil)
if err != nil {
    testnet.NoError("failed to get header", err)
    continue
}

Make sure that resp is not nil before accessing its fields.

upgradeComplete = true
}
fmt.Println("height", resp.Header.Height)
rootulp marked this conversation as resolved.
Show resolved Hide resolved
rootulp marked this conversation as resolved.
Show resolved Hide resolved
lastHeight = resp.Header.Height
}
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Handle timeouts and errors for each node individually.

The current implementation shares the same timer across all nodes. If one node delays the upgrade, it could cause the entire test to fail, even if others have succeeded.

Consider refactoring the code to handle timeouts and errors on a per-node basis to isolate issues and provide more detailed feedback.


return nil
}
3 changes: 2 additions & 1 deletion test/e2e/minor_version_compatibility.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ func MinorVersionCompatibility(logger *log.Logger) error {
logger.Println("Creating txsim")
endpoints, err := testNet.RemoteGRPCEndpoints(ctx)
testnet.NoError("failed to get remote gRPC endpoints", err)
err = testNet.CreateTxClient(ctx, "txsim", testnet.TxsimVersion, 1, "100-2000", 100, testnet.DefaultResources, endpoints[0])
upgradeSchedule := map[int64]uint64{}
staheri14 marked this conversation as resolved.
Show resolved Hide resolved
err = testNet.CreateTxClient(ctx, "txsim", testnet.TxsimVersion, 1, "100-2000", 100, testnet.DefaultResources, endpoints[0], upgradeSchedule)
testnet.NoError("failed to create tx client", err)

// start the testnet
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ func E2ESimple(logger *log.Logger) error {
logger.Println("Creating txsim")
endpoints, err := testNet.RemoteGRPCEndpoints(ctx)
testnet.NoError("failed to get remote gRPC endpoints", err)
err = testNet.CreateTxClient(ctx, "txsim", testnet.TxsimVersion, 10,
"100-2000", 100, testnet.DefaultResources, endpoints[0])
upgradeSchedule := map[int64]uint64{}
err = testNet.CreateTxClient(ctx, "txsim", testnet.TxsimVersion, 10, "100-2000", 100, testnet.DefaultResources, endpoints[0], upgradeSchedule)
testnet.NoError("failed to create tx client", err)

logger.Println("Setting up testnets")
Expand Down
37 changes: 37 additions & 0 deletions test/e2e/testnet/key_generator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package testnet

import (
"io"
"math/rand"

"github.com/tendermint/tendermint/crypto"
"github.com/tendermint/tendermint/crypto/ed25519"
"github.com/tendermint/tendermint/crypto/secp256k1"
)

type keyGenerator struct {
random *rand.Rand
}

func newKeyGenerator(seed int64) *keyGenerator {
return &keyGenerator{
random: rand.New(rand.NewSource(seed)), //nolint:gosec
}
}

func (g *keyGenerator) Generate(keyType string) crypto.PrivKey {
seed := make([]byte, ed25519.SeedSize)

_, err := io.ReadFull(g.random, seed)
if err != nil {
panic(err) // this shouldn't happen
}
switch keyType {
case "secp256k1":
return secp256k1.GenPrivKeySecp256k1(seed)
case "", "ed25519":
return ed25519.GenPrivKeyFromSecret(seed)
default:
panic("KeyType not supported") // should not make it this far
}
}
21 changes: 11 additions & 10 deletions test/e2e/testnet/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ type Node struct {

// PullRoundStateTraces retrieves the round state traces from a node.
// It will save them to the provided path.
func (n *Node) PullRoundStateTraces(path string) ([]trace.Event[schema.RoundState], error,
) {
func (n *Node) PullRoundStateTraces(path string) ([]trace.Event[schema.RoundState], error) {
addr := n.AddressTracing()
log.Info().Str("Address", addr).Msg("Pulling round state traces")

Expand All @@ -73,8 +72,7 @@ func (n *Node) PullRoundStateTraces(path string) ([]trace.Event[schema.RoundStat

// PullBlockSummaryTraces retrieves the block summary traces from a node.
// It will save them to the provided path.
func (n *Node) PullBlockSummaryTraces(path string) ([]trace.Event[schema.BlockSummary], error,
) {
func (n *Node) PullBlockSummaryTraces(path string) ([]trace.Event[schema.BlockSummary], error) {
addr := n.AddressTracing()
log.Info().Str("Address", addr).Msg("Pulling block summary traces")

Expand All @@ -99,11 +97,14 @@ type Resources struct {

func NewNode(
ctx context.Context,
name, version string,
startHeight, selfDelegation int64,
name string,
version string,
startHeight int64,
selfDelegation int64,
peers []string,
signerKey, networkKey crypto.PrivKey,
upgradeHeight int64,
signerKey crypto.PrivKey,
networkKey crypto.PrivKey,
upgradeHeightV2 int64,
resources Resources,
grafana *GrafanaInfo,
kn *knuu.Knuu,
Expand Down Expand Up @@ -159,8 +160,8 @@ func NewNode(
if disableBBR {
args = append(args, "--force-no-bbr")
}
if upgradeHeight != 0 {
args = append(args, fmt.Sprintf("--v2-upgrade-height=%d", upgradeHeight))
if upgradeHeightV2 != 0 {
rootulp marked this conversation as resolved.
Show resolved Hide resolved
args = append(args, fmt.Sprintf("--v2-upgrade-height=%d", upgradeHeightV2))
}

if err := knInstance.Build().SetArgs(args...); err != nil {
Expand Down
Loading
Loading