diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 2d6bda4966d7..425f964925a5 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -65,10 +65,9 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() { testDuration := exutil.DurationSinceStartInSeconds().String() tests := map[string]bool{ - // Checking Watchdog alert state is done in "should have a Watchdog alert in firing state". - // TODO: remove KubePodCrashLooping subtraction logic once https://bugzilla.redhat.com/show_bug.cgi?id=1842002 - // is fixed, but for now we are ignoring KubePodCrashLooping alerts in the openshift-kube-controller-manager namespace. - fmt.Sprintf(`count_over_time(ALERTS{alertname!~"Watchdog|AlertmanagerReceiversNotConfigured|KubeAPILatencyHigh",alertstate="firing",severity!="info"}[%[1]s]) - count_over_time(ALERTS{alertname="KubePodCrashLooping",namespace="openshift-kube-controller-manager",alertstate="firing",severity!="info"}[%[1]s]) >= 1`, testDuration): false, + // Invariant: No alerts should have fired during the test run except the known alerts + // Returns number of seconds the alerts were firing + fmt.Sprintf(`sort_desc(count_over_time(ALERTS{alertname!~"Watchdog|AlertmanagerReceiversNotConfigured",alertstate="firing",severity!="info"}[%[1]s:1s]) > 0)`, testDuration): false, } err := helper.RunQueries(tests, oc, ns, execPod.Name, url, bearerToken) o.Expect(err).NotTo(o.HaveOccurred())