From e38745d3c57537f5f7542e7d952dee799c2503c8 Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Wed, 28 Feb 2024 08:28:52 -0400 Subject: [PATCH] Do not let loki alerts fail tests We saw yesterday with loki in full outage, this single test would fail reporting an alert firing in openshift-e2e-loki due to daemon set rollout stuck. Promtail pods would run but would never go ready because they couldn't communicate with loki. Filter out any alerts from openshift-e2e-loki in this test so we can pass all tests even when loki is down. We were very close otherwise as no other problems popped up. --- test/extended/prometheus/prometheus.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go index 8f6d721b5545..f67449b1e1b6 100644 --- a/test/extended/prometheus/prometheus.go +++ b/test/extended/prometheus/prometheus.go @@ -551,7 +551,8 @@ var _ = g.Describe("[sig-instrumentation] Prometheus [apigroup:image.openshift.i } tests := map[string]bool{ - fmt.Sprintf(`ALERTS{alertname!~"%s",alertstate="firing",severity!="info"} >= 1`, strings.Join(allowedAlertNames, "|")): false, + // openshift-e2e-loki alerts should never fail this test, we've seen this happen on daemon set rollout stuck when CI loki was down. + fmt.Sprintf(`ALERTS{alertname!~"%s",alertstate="firing",severity!="info",namespace!="openshift-e2e-loki"} >= 1`, strings.Join(allowedAlertNames, "|")): false, } err := helper.RunQueries(context.TODO(), oc.NewPrometheusClient(context.TODO()), tests, oc) o.Expect(err).NotTo(o.HaveOccurred())