From d08b7e83652486f2799cc1f60e305ced5f4ef88f Mon Sep 17 00:00:00 2001 From: Lili Cosic Date: Mon, 15 Jun 2020 16:32:54 +0200 Subject: [PATCH 1/2] test/extended/prometheus: Add test for no alerts in pending state Because our tests run only for short amount of time they might not catch the alerts that start firing as some components get started later in the cluster run, might only have alerts in pending state. This test hopes to catch those alerts that would potentially be in firing state. But also out of the box we should not have any, ideally, alerts in pending or firing states. --- test/extended/prometheus/prometheus.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 4867b063ac22..0b8460d474f4 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -88,6 +88,20 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { e2e.Logf("Watchdog alert is firing") }) + g.It("should not have any alerts in pending state the entire cluster run", func() { + oc.SetupProject() + ns := oc.Namespace() + execPod := exutil.CreateCentosExecPodOrFail(oc.AdminKubeClient(), ns, "execpod", nil) + defer func() { + oc.AdminKubeClient().CoreV1().Pods(ns).Delete(context.Background(), execPod.Name, *metav1.NewDeleteOptions(1)) + }() + + tests := map[string]bool{ + `count_over_time(ALERTS{alertname!~"Watchdog|AlertmanagerReceiversNotConfigured|KubeAPILatencyHigh", alertstate="pending"}[2h])`: false, + } + helper.RunQueries(tests, oc, ns, execPod.Name, url, bearerToken) + }) + g.It("shouldn't exceed the 500 series limit of total series sent via telemetry from each cluster", func() { if !hasPullSecret(oc.AdminKubeClient(), "cloud.openshift.com") { e2eskipper.Skipf("Telemetry is disabled") From 46e18b846fcc6e9cbf2baa6c7d1e1bace9774d1e Mon Sep 17 00:00:00 2001 From: Lili Cosic Date: Mon, 15 Jun 2020 17:07:41 +0200 Subject: [PATCH 2/2] test/extended/util/annotate/generated: Regenarate file --- .../extended/util/annotate/generated/zz_generated.annotations.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 12ac908ebd92..88e3c74ab09b 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -892,6 +892,7 @@ var annotations = map[string]string{ "[Top Level] [sig-instrumentation] Stackdriver Monitoring should run Custom Metrics - Stackdriver Adapter for old resource model [Feature:StackdriverCustomMetrics]": "should run Custom Metrics - Stackdriver Adapter for old resource model [Feature:StackdriverCustomMetrics] [Disabled:Unimplemented] [Suite:k8s]", "[Top Level] [sig-instrumentation] Stackdriver Monitoring should run Stackdriver Metadata Agent [Feature:StackdriverMetadataAgent]": "should run Stackdriver Metadata Agent [Feature:StackdriverMetadataAgent] [Disabled:Unimplemented] [Suite:k8s]", "[Top Level] [sig-instrumentation][Late] Alerts should have a Watchdog alert in firing state the entire cluster run": "should have a Watchdog alert in firing state the entire cluster run [Suite:openshift/conformance/parallel]", + "[Top Level] [sig-instrumentation][Late] Alerts should not have any alerts in pending state the entire cluster run": "should not have any alerts in pending state the entire cluster run [Suite:openshift/conformance/parallel]", "[Top Level] [sig-instrumentation][Late] Alerts shouldn't exceed the 500 series limit of total series sent via telemetry from each cluster": "shouldn't exceed the 500 series limit of total series sent via telemetry from each cluster [Suite:openshift/conformance/parallel]", "[Top Level] [sig-instrumentation][Late] Alerts shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured": "shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured [Suite:openshift/conformance/parallel]", "[Top Level] [sig-instrumentation][sig-builds][Feature:Builds] Prometheus when installed on the cluster should start and expose a secured proxy and verify build metrics": "should start and expose a secured proxy and verify build metrics [Suite:openshift/conformance/parallel]",