From 5a79414a192d36886d6ab116917a5136375fc3a9 Mon Sep 17 00:00:00 2001 From: Junqi Zhao Date: Thu, 25 Sep 2025 18:03:26 +0800 Subject: [PATCH 1/5] bump telemetry series limit to 1000 --- test/extended/prometheus/prometheus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 70f667cef79e..50575dd4c1cd 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -541,7 +541,7 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { case isManagedServiceCluster: averageSeriesLimit = 850 default: - averageSeriesLimit = 780 + averageSeriesLimit = 1000 } tests := map[string]bool{ From ee539f3722b38b02e743a4d9cde2f3fcb9c6e2ac Mon Sep 17 00:00:00 2001 From: Junqi Zhao Date: Thu, 25 Sep 2025 20:20:04 +0800 Subject: [PATCH 2/5] revert change in 52c9e9ad5c428a17f59f8828184dcb0018d66ec0 commit --- test/extended/prometheus/prometheus.go | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 50575dd4c1cd..55f467a697e9 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -529,22 +529,15 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { // we only consider series sent since the beginning of the test testDuration := exutil.DurationSinceStartInSeconds().String() - isManagedServiceCluster, err := exutil.IsManagedServiceCluster(ctx, oc.AdminKubeClient()) - o.Expect(err).NotTo(o.HaveOccurred()) - - // We want to limit the number of total series sent, the cluster:telemetry_selected_series:count - // rule contains the count of the all the series that are sent via telemetry. It is permissible - // for some scenarios to generate more series than 760, we just want the basic state to be below - // a threshold. var averageSeriesLimit int - switch { - case isManagedServiceCluster: - averageSeriesLimit = 850 - default: - averageSeriesLimit = 1000 - } + averageSeriesLimit = 1000 tests := map[string]bool{ + // We want to limit the number of total series sent, the cluster:telemetry_selected_series:count + // rule contains the count of the all the series that are sent via telemetry. It is permissible + // for some scenarios to generate more series than 760, we just want the basic state to be below + // a threshold. + // The following query can be executed against the telemetry server // to reevaluate the threshold value (replace the matcher on the version label accordingly): // From 0bfdec2a0c107f0114dfb51082f1a8f164f4b23b Mon Sep 17 00:00:00 2001 From: Junqi Zhao Date: Thu, 25 Sep 2025 20:28:12 +0800 Subject: [PATCH 3/5] update comment --- test/extended/prometheus/prometheus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 55f467a697e9..e7760ec269e5 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -535,7 +535,7 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { tests := map[string]bool{ // We want to limit the number of total series sent, the cluster:telemetry_selected_series:count // rule contains the count of the all the series that are sent via telemetry. It is permissible - // for some scenarios to generate more series than 760, we just want the basic state to be below + // for some scenarios to generate more series than 1000, we just want the basic state to be below // a threshold. // The following query can be executed against the telemetry server From f993b858370009fbd90125df61f1269ad7adfdad Mon Sep 17 00:00:00 2001 From: Junqi Zhao Date: Thu, 25 Sep 2025 21:19:59 +0800 Subject: [PATCH 4/5] fix error --- test/extended/prometheus/prometheus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index e7760ec269e5..1cc0e145a303 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -553,7 +553,7 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { fmt.Sprintf(`avg_over_time(cluster:telemetry_selected_series:count[%s]) >= %d`, testDuration, averageSeriesLimit): false, fmt.Sprintf(`max_over_time(cluster:telemetry_selected_series:count[%s]) >= 1200`, testDuration): false, } - err = helper.RunQueries(context.TODO(), oc.NewPrometheusClient(context.TODO()), tests, oc) + err := helper.RunQueries(context.TODO(), oc.NewPrometheusClient(context.TODO()), tests, oc) o.Expect(err).NotTo(o.HaveOccurred()) e2e.Logf("Total number of series sent via telemetry is below the limit") From 7c506204a94e9dfd61350b8a9f2437ab7886a786 Mon Sep 17 00:00:00 2001 From: Junqi Zhao Date: Fri, 26 Sep 2025 16:09:19 +0800 Subject: [PATCH 5/5] update based on comments --- test/extended/prometheus/prometheus.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 1cc0e145a303..962af33133f9 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -529,13 +529,10 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { // we only consider series sent since the beginning of the test testDuration := exutil.DurationSinceStartInSeconds().String() - var averageSeriesLimit int - averageSeriesLimit = 1000 - tests := map[string]bool{ // We want to limit the number of total series sent, the cluster:telemetry_selected_series:count // rule contains the count of the all the series that are sent via telemetry. It is permissible - // for some scenarios to generate more series than 1000, we just want the basic state to be below + // for some scenarios to generate more series than the limit, we just want the basic state to be below // a threshold. // The following query can be executed against the telemetry server @@ -550,8 +547,8 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { // )[30m:1m] // ) // ) - fmt.Sprintf(`avg_over_time(cluster:telemetry_selected_series:count[%s]) >= %d`, testDuration, averageSeriesLimit): false, - fmt.Sprintf(`max_over_time(cluster:telemetry_selected_series:count[%s]) >= 1200`, testDuration): false, + fmt.Sprintf(`avg_over_time(cluster:telemetry_selected_series:count[%s]) >= 1000`, testDuration): false, + fmt.Sprintf(`max_over_time(cluster:telemetry_selected_series:count[%s]) >= 1200`, testDuration): false, } err := helper.RunQueries(context.TODO(), oc.NewPrometheusClient(context.TODO()), tests, oc) o.Expect(err).NotTo(o.HaveOccurred())