Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pkg/base/test_server_args.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,14 @@ type TestServerArgs struct {
UseDatabase string

// If set, this will be configured in the test server to check connections
// from other test servers and to report in the SQL introspection.
// from other test servers and to report in the SQL introspection. It is
// advised to make the name sufficiently unique, in order to prevent a
// TestCluster from accidentally getting messages from unrelated clusters in
// the same environment that used the same TCP ports recently (e.g. see
// https://github.com/cockroachdb/cockroach/issues/157838).
//
// If empty (most cases), a unique ClusterName is generated automatically, or
// a higher-level default is used (e.g. taken from TestClusterArgs).
ClusterName string

// Stopper can be used to stop the server. If not set, a stopper will be
Expand Down
8 changes: 6 additions & 2 deletions pkg/cli/debug_recover_loss_of_quorum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ func TestCollectInfoFromOnlineCluster(t *testing.T) {
"recover",
"collect-info",
"--insecure",
"--host",
tc.Server(2).AdvRPCAddr(),
"--host", tc.Server(2).AdvRPCAddr(),
"--cluster-name", tc.ClusterName(),
replicaInfoFileName,
})

Expand Down Expand Up @@ -548,6 +548,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
"--confirm=y",
"--certs-dir=test_certs",
"--host=" + tc.Server(0).AdvRPCAddr(),
"--cluster-name=" + tc.ClusterName(),
"--plan=" + planFile,
})
require.NoError(t, err, "failed to run make-plan")
Expand All @@ -571,6 +572,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
"debug", "recover", "apply-plan",
"--certs-dir=test_certs",
"--host=" + tc.Server(0).AdvRPCAddr(),
"--cluster-name=" + tc.ClusterName(),
"--confirm=y", planFile,
})
require.NoError(t, err, "failed to run apply plan")
Expand All @@ -586,6 +588,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
"debug", "recover", "verify",
"--certs-dir=test_certs",
"--host=" + tc.Server(0).AdvRPCAddr(),
"--cluster-name=" + tc.ClusterName(),
planFile,
})
require.NoError(t, err, "failed to run verify plan")
Expand Down Expand Up @@ -635,6 +638,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
"debug", "recover", "verify",
"--certs-dir=test_certs",
"--host=" + tc.Server(0).AdvRPCAddr(),
"--cluster-name=" + tc.ClusterName(),
planFile,
})
require.NoError(t, err, "failed to run verify plan")
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/zip/partial1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zip
----
debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false /dev/null
debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
[cluster] discovering virtual clusters... done
[cluster] creating output file /dev/null... done
[cluster] establishing RPC connection to ...
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/zip/partial1_excluded
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zip
----
debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 --validate-zip-file=false
debug zip --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
[cluster] discovering virtual clusters... done
[cluster] creating output file /dev/null... done
[cluster] establishing RPC connection to ...
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/zip/partial2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zip
----
debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false /dev/null
debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
[cluster] discovering virtual clusters... done
[cluster] creating output file /dev/null... done
[cluster] establishing RPC connection to ...
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/zip/testzip_concurrent
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,4 @@ zip
[node ?] ? log files found
[node ?] ? log files found
[node ?] ? log files found
debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false /dev/null
debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
70 changes: 40 additions & 30 deletions pkg/cli/zip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,11 @@ func TestConcurrentZip(t *testing.T) {
defer func(prevStderr *os.File) { stderr = prevStderr }(stderr)
stderr = os.Stdout

out, err := c.RunWithCapture("debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false " + os.DevNull)
if err != nil {
t.Fatal(err)
}
out, err := c.RunWithCapture(fmt.Sprintf(
"debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=%s %s",
tc.ClusterName(), os.DevNull,
))
require.NoError(t, err)

// Strip any non-deterministic messages.
out = eraseNonDeterministicZipOutput(out)
Expand All @@ -437,6 +438,8 @@ func TestConcurrentZip(t *testing.T) {
// which the original messages interleve with other messages mean the number
// of them after each series is collapsed is also non-derministic.
out = regexp.MustCompile(`<dumping SQL tables>\n`).ReplaceAllString(out, "")
// Replace the non-deterministic cluster name with a placeholder.
out = eraseClusterName(out, tc.ClusterName())

// We use datadriven simply to read the golden output file; we don't actually
// run any commands. Using datadriven allows TESTFLAGS=-rewrite.
Expand Down Expand Up @@ -541,9 +544,8 @@ func TestUnavailableZip(t *testing.T) {
tc := testcluster.StartTestCluster(t, 3,
base.TestClusterArgs{ServerArgs: base.TestServerArgs{
DefaultTestTenant: base.TestIsSpecificToStorageLayerAndNeedsASystemTenant,

Insecure: true,
Knobs: base.TestingKnobs{Store: knobs},
Insecure: true,
Knobs: base.TestingKnobs{Store: knobs},
}})
defer tc.Stopper().Stop(context.Background())

Expand All @@ -559,9 +561,10 @@ func TestUnavailableZip(t *testing.T) {
defer close(ch)

// Run debug zip against node 1.
debugZipCommand :=
"debug zip --concurrency=1 --cpu-profile-duration=0 " + os.
DevNull + " --timeout=.5s"
debugZipCommand := fmt.Sprintf(
"debug zip --concurrency=1 --cpu-profile-duration=0 --timeout=.5s --cluster-name=%s %s",
tc.ClusterName(), os.DevNull,
)

t.Run("server 1", func(t *testing.T) {
c := TestCLI{
Expand Down Expand Up @@ -651,6 +654,10 @@ func baseZipOutput(nodeId int) []string {
return output
}

func eraseClusterName(str, name string) string {
return strings.ReplaceAll(str, name, "<cluster-name>")
}

func eraseNonDeterministicZipOutput(out string) string {
re := regexp.MustCompile(`(?m)postgresql://.*$`)
out = re.ReplaceAllString(out, `postgresql://...`)
Expand Down Expand Up @@ -736,13 +743,15 @@ func TestPartialZip(t *testing.T) {
defer func(prevStderr *os.File) { stderr = prevStderr }(stderr)
stderr = os.Stdout

out, err := c.RunWithCapture("debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false " + os.DevNull)
if err != nil {
t.Fatal(err)
}
out, err := c.RunWithCapture(fmt.Sprintf(
"debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=%s %s",
tc.ClusterName(), os.DevNull,
))
require.NoError(t, err)

// Strip any non-deterministic messages.
t.Log(out)
out = eraseClusterName(out, tc.ClusterName())
out = eraseNonDeterministicZipOutput(out)

datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "partial1"),
Expand All @@ -751,12 +760,13 @@ func TestPartialZip(t *testing.T) {
})

// Now do it again and exclude the down node explicitly.
out, err = c.RunWithCapture("debug zip " + os.DevNull + " --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0" +
" --validate-zip-file=false")
if err != nil {
t.Fatal(err)
}
out, err = c.RunWithCapture(fmt.Sprintf(
"debug zip --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=%s %s",
tc.ClusterName(), os.DevNull,
))
require.NoError(t, err)

out = eraseClusterName(out, tc.ClusterName())
out = eraseNonDeterministicZipOutput(out)
datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "partial1_excluded"),
func(t *testing.T, td *datadriven.TestData) string {
Expand All @@ -767,12 +777,11 @@ func TestPartialZip(t *testing.T) {
// skips over it automatically. We specifically use --wait=none because
// we're decommissioning a node in a 3-node cluster, so there's no node to
// up-replicate the under-replicated ranges to.
{
_, err := c.RunWithCapture(fmt.Sprintf("node decommission --checks=skip --wait=none %d", 2))
if err != nil {
t.Fatal(err)
}
}
_, err = c.RunWithCapture(fmt.Sprintf(
"node decommission --checks=skip --wait=none --cluster-name=%s %d",
tc.ClusterName(), 2,
))
require.NoError(t, err)

// We use .Override() here instead of SET CLUSTER SETTING in SQL to
// override the 1m15s minimum placed on the cluster setting. There
Expand All @@ -787,12 +796,13 @@ func TestPartialZip(t *testing.T) {
datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "partial2"),
func(t *testing.T, td *datadriven.TestData) string {
f := func() string {
out, err := c.RunWithCapture("debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false " + os.DevNull)
if err != nil {
t.Fatal(err)
}

out, err := c.RunWithCapture(fmt.Sprintf(
"debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=%s %s",
tc.ClusterName(), os.DevNull,
))
require.NoError(t, err)
// Strip any non-deterministic messages.
out = eraseClusterName(out, tc.ClusterName())
return eraseNonDeterministicZipOutput(out)
}

Expand Down
14 changes: 14 additions & 0 deletions pkg/testutils/testcluster/testcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"context"
gosql "database/sql"
"fmt"
"math/rand/v2"
"net"
"os"
"reflect"
Expand Down Expand Up @@ -86,6 +87,11 @@ type TestCluster struct {

var _ serverutils.TestClusterInterface = &TestCluster{}

// ClusterName returns the configured or auto-generated cluster name.
func (tc *TestCluster) ClusterName() string {
return tc.clusterArgs.ServerArgs.ClusterName
}

// NumServers is part of TestClusterInterface.
func (tc *TestCluster) NumServers() int {
return len(tc.Servers)
Expand Down Expand Up @@ -317,6 +323,11 @@ func NewTestCluster(
if clusterArgs.StartSingleNode && nodes > 1 {
t.Fatal("StartSingleNode implies 1 node only, but asked to create", nodes)
}
if clusterArgs.ServerArgs.ClusterName == "" {
// Use a cluster name that is sufficiently unique (within the CI env) but is
// concise and recognizable.
clusterArgs.ServerArgs.ClusterName = fmt.Sprintf("TestCluster-%d", rand.Uint32())
}

if err := checkServerArgsForCluster(
clusterArgs.ServerArgs, clusterArgs.ReplicationMode, disallowJoinAddr,
Expand Down Expand Up @@ -634,6 +645,9 @@ func (tc *TestCluster) AddServer(
if serverArgs.JoinAddr != "" {
serverArgs.NoAutoInitializeCluster = true
}
if serverArgs.ClusterName == "" {
serverArgs.ClusterName = tc.clusterArgs.ServerArgs.ClusterName
}

// Check args even though we have called checkServerArgsForCluster()
// already in NewTestCluster(). AddServer might be called for servers
Expand Down
Loading