From 3bb57f2ac357a4cb76a25453c6d9e27be811fd97 Mon Sep 17 00:00:00 2001
From: Ricky Stewart <ricky@cockroachlabs.com>
Date: Fri, 14 Nov 2025 12:57:39 -0600
Subject: [PATCH] bench/rttanalysis: shard TestBenchmarkExpectation to avoid
 timeouts

Re-apply `9fecc53b0b3cde307b5379ee5b88fae0fc8f34e2`, but add a `skip` if
the test is running under `s390x`.

Release note: none
Epic: none
---
 pkg/bench/rttanalysis/BUILD.bazel             |  2 +
 pkg/bench/rttanalysis/registry.go             | 66 +++++++++++++++++--
 .../validate_benchmark_data_test.go           | 43 +++++++++++-
 3 files changed, 103 insertions(+), 8 deletions(-)

diff --git a/pkg/bench/rttanalysis/BUILD.bazel b/pkg/bench/rttanalysis/BUILD.bazel
index b08968a6ecf3..4242434d7f56 100644
--- a/pkg/bench/rttanalysis/BUILD.bazel
+++ b/pkg/bench/rttanalysis/BUILD.bazel
@@ -55,6 +55,7 @@ go_test(
     data = glob(["testdata/**"]),
     embed = [":rttanalysis"],
     exec_properties = {"test.Pool": "large"},
+    shard_count = 4,
     deps = [
         "//pkg/base",
         "//pkg/jobs/jobspb",
@@ -66,6 +67,7 @@ go_test(
         "//pkg/testutils/serverutils",
         "//pkg/testutils/sqlutils",
         "//pkg/testutils/testcluster",
+        "//pkg/util/envutil",
         "//pkg/util/protoutil",
         "//pkg/util/randutil",
     ],
diff --git a/pkg/bench/rttanalysis/registry.go b/pkg/bench/rttanalysis/registry.go
index 6733b586d970..47ffb4613885 100644
--- a/pkg/bench/rttanalysis/registry.go
+++ b/pkg/bench/rttanalysis/registry.go
@@ -6,6 +6,7 @@
 package rttanalysis
 
 import (
+	"runtime"
 	"strings"
 	"testing"
 
@@ -51,15 +52,68 @@ func (r *Registry) Run(b *testing.B) {
 // benchmarks can be filtered by passing the usual test filters underneath
 // this test's name.
 //
-// It takes a long time and thus is skipped under stress, race
-// and short.
+// It takes a long time and thus is skipped under duress and short.
 func (r *Registry) RunExpectations(t *testing.T) {
-	skip.UnderStress(t)
-	skip.UnderRace(t)
+	r.RunExpectationsSharded(t, 1, 1)
+}
+
+// RunExpectationsSharded runs all the benchmarks for one iteration
+// and validates that the number of RPCs meets the expectation. If run
+// with the --rewrite flag, it will rewrite the run benchmarks. The
+// benchmarks can be filtered by passing the usual test filters underneath
+// this test's name.
+//
+// It takes a long time and thus is skipped under duress and short.
+//
+// When shard and totalShards are provided (> 1), only a subset of benchmarks
+// assigned to the specific shard will be run, enabling parallel execution.
+// Test groups are distributed across shards using round-robin assignment.
+func (r *Registry) RunExpectationsSharded(t *testing.T, shard, totalShards int) {
+	skip.UnderDuress(t)
 	skip.UnderShort(t)
-	skip.UnderDeadlock(t)
+	if runtime.GOARCH == "s390x" {
+		skip.IgnoreLint(t, "test prone to crashing under s390x (see #154317)")
+	}
+
+	// If totalShards is 1, run all tests; otherwise shard them
+	var registryToUse *Registry
+	if totalShards <= 1 {
+		// Run all test groups
+		registryToUse = r
+	} else {
+		// Create a registry with only the test groups assigned to this shard
+		shardRegistry := &Registry{
+			numNodes: r.numNodes,
+			cc:       r.cc,
+			r:        make(map[string][]RoundTripBenchTestCase),
+		}
+
+		// Distribute test groups across shards using round-robin assignment
+		// First, get all group names and sort them for consistent ordering
+		groupNames := make([]string, 0, len(r.r))
+		for groupName := range r.r {
+			groupNames = append(groupNames, groupName)
+		}
+		// Sort for deterministic assignment across runs
+		for i := 0; i < len(groupNames); i++ {
+			for j := i + 1; j < len(groupNames); j++ {
+				if groupNames[i] > groupNames[j] {
+					groupNames[i], groupNames[j] = groupNames[j], groupNames[i]
+				}
+			}
+		}
+
+		// Assign groups to shards using round-robin
+		for i, groupName := range groupNames {
+			assignedShard := (i % totalShards) + 1
+			if assignedShard == shard {
+				shardRegistry.r[groupName] = r.r[groupName]
+			}
+		}
+		registryToUse = shardRegistry
+	}
 
-	runBenchmarkExpectationTests(t, r)
+	runBenchmarkExpectationTests(t, registryToUse)
 }
 
 // Register registers a set of test cases to a given benchmark name. It is
diff --git a/pkg/bench/rttanalysis/validate_benchmark_data_test.go b/pkg/bench/rttanalysis/validate_benchmark_data_test.go
index 7c3750857471..08fc6f559df8 100644
--- a/pkg/bench/rttanalysis/validate_benchmark_data_test.go
+++ b/pkg/bench/rttanalysis/validate_benchmark_data_test.go
@@ -5,6 +5,45 @@
 
 package rttanalysis
 
-import "testing"
+import (
+	"strconv"
+	"testing"
 
-func TestBenchmarkExpectation(t *testing.T) { reg.RunExpectations(t) }
+	"github.com/cockroachdb/cockroach/pkg/util/envutil"
+)
+
+// NOTE: If you change the number of shards, you must also update the
+// shard_count in BUILD.bazel to match.
+const shardCount = 4
+
+// Validate that shardCount matches TEST_TOTAL_SHARDS environment variable at init time
+var _ = func() int {
+	totalShardsStr, found := envutil.ExternalEnvString("TEST_TOTAL_SHARDS", 1)
+	if totalShardsStr == "" || !found {
+		return 0
+	}
+	totalShards, err := strconv.Atoi(totalShardsStr)
+	if err != nil {
+		return 0
+	}
+	if totalShards != shardCount {
+		panic("shardCount mismatch: update shard_count in pkg/bench/rttanalysis/BUILD.bazel to match shardCount constant")
+	}
+	return 0
+}()
+
+func TestBenchmarkExpectationShard1(t *testing.T) {
+	reg.RunExpectationsSharded(t, 1, shardCount)
+}
+
+func TestBenchmarkExpectationShard2(t *testing.T) {
+	reg.RunExpectationsSharded(t, 2, shardCount)
+}
+
+func TestBenchmarkExpectationShard3(t *testing.T) {
+	reg.RunExpectationsSharded(t, 3, shardCount)
+}
+
+func TestBenchmarkExpectationShard4(t *testing.T) {
+	reg.RunExpectationsSharded(t, 4, shardCount)
+}