diff --git a/test/e2e/upgrade/alert/alert.go b/test/e2e/upgrade/alert/alert.go
index 800feb9505ce..cb0a4e8d1273 100644
--- a/test/e2e/upgrade/alert/alert.go
+++ b/test/e2e/upgrade/alert/alert.go
@@ -3,46 +3,42 @@ package alert
 import (
 	"context"
 	"fmt"
+	"strings"
 	"time"
 
 	g "github.com/onsi/ginkgo"
 	o "github.com/onsi/gomega"
+	"github.com/prometheus/common/model"
 
 	exutil "github.com/openshift/origin/test/extended/util"
+	"github.com/openshift/origin/test/extended/util/disruption"
 	helper "github.com/openshift/origin/test/extended/util/prometheus"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/kubernetes/test/e2e/framework"
-	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
 	"k8s.io/kubernetes/test/e2e/upgrades"
 )
 
-const (
-	// Delay after upgrade is complete before checking for critical alerts
-	alertCheckSleepMinutes = 5
-	alertCheckSleep        = alertCheckSleepMinutes * time.Minute
-
-	// Previous period in which to check for critical alerts
-	alertPeriodCheckMinutes = 1
-)
-
-// UpgradeTest runs post-upgrade after alertCheckSleep delay and tests if any critical alerts are firing.
+// UpgradeTest runs verifies invariants regarding what alerts are allowed to fire
+// during the upgrade process.
 type UpgradeTest struct {
 	url         string
 	bearerToken string
 	oc          *exutil.CLI
 }
 
-func (UpgradeTest) Name() string { return "check-for-critical-alerts" }
+func (UpgradeTest) Name() string { return "check-for-alerts" }
 func (UpgradeTest) DisplayName() string {
-	return "[sig-arch] Check if critical alerts are firing after upgrade success"
+	return "[sig-arch] Check if alerts are firing during or after upgrade success"
 }
 
 // Setup creates parameters to query Prometheus
 func (t *UpgradeTest) Setup(f *framework.Framework) {
-	g.By("Setting up post-upgrade alert test")
+	g.By("Setting up upgrade alert test")
 
-	url, bearerToken, oc, ok := helper.ExpectPrometheus(f)
+	oc := exutil.NewCLIWithFramework(f)
+	url, _, bearerToken, ok := helper.LocatePrometheus(oc)
 	if !ok {
 		framework.Failf("Prometheus could not be located on this cluster, failing test %s", t.Name())
 	}
@@ -52,53 +48,183 @@ func (t *UpgradeTest) Setup(f *framework.Framework) {
 	framework.Logf("Post-upgrade alert test setup complete")
 }
 
-// Test checks if any critical alerts are firing.
+type MetricCondition struct {
+	Selector map[string]string
+	Text     string
+}
+
+type MetricConditions []MetricCondition
+
+func (c MetricConditions) Matches(sample *model.Sample) *MetricCondition {
+	for i, condition := range c {
+		matches := true
+		for name, value := range condition.Selector {
+			if sample.Metric[model.LabelName(name)] != model.LabelValue(value) {
+				matches = false
+				break
+			}
+			if matches {
+				return &c[i]
+			}
+		}
+	}
+	return nil
+}
+
+func stripLabels(m model.Metric, names ...string) model.LabelSet {
+	labels := make(model.LabelSet)
+	for k := range m {
+		labels[k] = m[k]
+	}
+	for _, name := range names {
+		delete(labels, model.LabelName(name))
+	}
+	return labels
+}
+
+// Test checks if alerts are firing at various points during upgrade.
+// An alert firing during an upgrade is a high severity bug - it either points to a real issue in
+// a dependency, or a failure of the component, and therefore must be fixed.
 func (t *UpgradeTest) Test(f *framework.Framework, done <-chan struct{}, upgrade upgrades.UpgradeType) {
-	g.By("Checking for critical alerts")
+	tolerateDuringSkew := exutil.TolerateVersionSkewInTests()
+	firingAlertsWithBugs := MetricConditions{
+		{
+			Selector: map[string]string{"alertname": "KubePodNotReady", "namespace": "openshift-kube-apiserver-operator"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1939580",
+		},
+		{
+			Selector: map[string]string{"alertname": "ClusterOperatorDegraded", "name": "openshift-apiserver"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1939580",
+		},
+		{
+			Selector: map[string]string{"alertname": "ClusterOperatorDown", "name": "authentication"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1939580",
+		},
+		{
+			Selector: map[string]string{"alertname": "ClusterOperatorDegraded", "name": "authentication"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1939580",
+		},
+
+		{
+			Selector: map[string]string{"alertname": "FailingOperator", "name": "packageserver", "job": "olm-operator-metrics"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1932626",
+		},
+		{
+			Selector: map[string]string{"alertname": "ThanosSidecarUnhealthy", "job": "prometheus-k8s-thanos-sidecar"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1940262",
+		},
+	}
+
+	pendingAlertsWithBugs := MetricConditions{
+		{
+			Selector: map[string]string{"alertname": "ClusterMonitoringOperatorReconciliationErrors"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1932624",
+		},
+		{
+			Selector: map[string]string{"alertname": "KubeClientErrors"},
+			Text:     "https://bugzilla.redhat.com/show_bug.cgi?id=1925698",
+		},
+	}
+	allowedPendingAlerts := MetricConditions{
+		{
+			Selector: map[string]string{"alertname": "etcdMemberCommunicationSlow"},
+			Text:     "Excluded because it triggers during upgrade (detects ~5m of high latency immediately preceeding the end of the test), and we don't want to change the alert because it is correct",
+		},
+	}
+
+	knownViolations := sets.NewString()
+	unexpectedViolations := sets.NewString()
+	unexpectedViolationsAsFlakes := sets.NewString()
+	debug := sets.NewString()
+
+	g.By("Checking for alerts")
 
-	// Recover current test if it fails so test suite can complete
-	defer g.GinkgoRecover()
+	start := time.Now()
 
 	// Block until upgrade is done
-	g.By("Waiting for upgrade to finish before checking for critical alerts")
+	g.By("Waiting for upgrade to finish before checking for alerts")
 	<-done
 
-	ctx, cancel := context.WithCancel(context.Background())
+	// Additonal delay after upgrade completion to allow pending alerts to settle
+	g.By("Waiting before checking for alerts")
+	time.Sleep(1 * time.Minute)
 
-	// Additonal delay after upgrade completion
-	g.By("Waiting before checking for critical alerts")
-	time.Sleep(alertCheckSleep)
-	cancel()
-
-	if exutil.TolerateVersionSkewInTests() {
-		e2eskipper.Skipf("Test is disabled to allow cluster components to have different versions, and skewed versions trigger multiple other alerts")
-	}
-	t.oc.SetupProject()
-	ns := t.oc.Namespace()
+	ns := t.oc.SetupNamespace()
 	execPod := exutil.CreateExecPodOrFail(t.oc.AdminKubeClient(), ns, "execpod")
 	defer func() {
-		t.oc.AdminKubeClient().CoreV1().Pods(ns).Delete(ctx, execPod.Name, *metav1.NewDeleteOptions(1))
+		t.oc.AdminKubeClient().CoreV1().Pods(ns).Delete(context.Background(), execPod.Name, *metav1.NewDeleteOptions(1))
 	}()
 
-	// Query to check if Prometheus has been up and running for entire post-upgrade
-	// period by verifying Watchdog alert has been in firing state
-	watchdogQuery := fmt.Sprintf(`count_over_time(ALERTS{alertstate="firing",alertname="Watchdog", severity="none"}[%dm])`, alertCheckSleepMinutes)
-
-	// Query to check for any critical severity alerts that have occurred within the last alertPeriodCheckMinutes.
-	criticalAlertQuery := fmt.Sprintf(`count_over_time(ALERTS{alertname!~"Watchdog|AlertmanagerReceiversNotConfigured|KubeAPILatencyHigh",alertstate="firing",severity="critical"}[%dm]) >= 1`, alertPeriodCheckMinutes)
+	testDuration := time.Now().Sub(start).Truncate(time.Second)
+
+	// Invariant: The watchdog alert should be firing continuously during the whole upgrade via the thanos
+	// querier (which should have no gaps when it queries the individual stores). Allow zero or one changes
+	// to the presence of this series (zero if data is preserved over upgrade, one if data is lost on upgrade).
+	// This would not catch the alert stopping firing, but we catch that in other places and tests.
+	watchdogQuery := fmt.Sprintf(`changes((max((ALERTS{alertstate="firing",alertname="Watchdog",severity="none"}) or (absent(ALERTS{alertstate="firing",alertname="Watchdog",severity="none"})*0)))[%s:1s]) > 1`, testDuration)
+	result, err := helper.RunQuery(watchdogQuery, ns, execPod.Name, t.url, t.bearerToken)
+	o.Expect(err).NotTo(o.HaveOccurred(), "unable to check watchdog alert over upgrade window")
+	if len(result.Data.Result) > 0 {
+		unexpectedViolations.Insert("Watchdog alert had missing intervals during the run, which may be a sign of a Prometheus outage in violation of the prometheus query SLO of 100% uptime during upgrade")
+	}
 
-	tests := map[string]bool{
-		watchdogQuery:      true,
-		criticalAlertQuery: false,
+	// Invariant: No non-info level alerts should have fired during the upgrade
+	firingAlertQuery := fmt.Sprintf(`
+sort_desc(
+count_over_time(ALERTS{alertstate="firing",severity!="info",alertname!~"Watchdog|AlertmanagerReceiversNotConfigured"}[%[1]s:1s])
+) > 0
+`, testDuration)
+	result, err = helper.RunQuery(firingAlertQuery, ns, execPod.Name, t.url, t.bearerToken)
+	o.Expect(err).NotTo(o.HaveOccurred(), "unable to check firing alerts during upgrade")
+	for _, series := range result.Data.Result {
+		labels := stripLabels(series.Metric, "alertname", "alertstate", "prometheus")
+		violation := fmt.Sprintf("alert %s fired for %s seconds with labels: %s", series.Metric["alertname"], series.Value, labelsAsSelector(labels))
+		if cause := firingAlertsWithBugs.Matches(series); cause != nil {
+			knownViolations.Insert(fmt.Sprintf("%s (open bug: %s)", violation, cause.Text))
+		} else {
+			unexpectedViolations.Insert(violation)
+		}
 	}
 
-	err := helper.RunQueries(tests, t.oc, ns, execPod.Name, t.url, t.bearerToken)
-	o.Expect(err).NotTo(o.HaveOccurred())
+	// Invariant: There should be no pending alerts 1m after the upgrade completes
+	pendingAlertQuery := `ALERTS{alertname!~"Watchdog|AlertmanagerReceiversNotConfigured",alertstate="pending",severity!="info"}`
+	result, err = helper.RunQuery(pendingAlertQuery, ns, execPod.Name, t.url, t.bearerToken)
+	o.Expect(err).NotTo(o.HaveOccurred(), "unable to retrieve pending alerts after upgrade")
+	for _, series := range result.Data.Result {
+		labels := stripLabels(series.Metric, "alertname", "alertstate", "prometheus")
+		violation := fmt.Sprintf("alert %s pending for %s seconds with labels: %s", series.Metric["alertname"], series.Value, labelsAsSelector(labels))
+		if cause := allowedPendingAlerts.Matches(series); cause != nil {
+			debug.Insert(fmt.Sprintf("%s (allowed: %s)", violation, cause.Text))
+			continue
+		}
+		if cause := pendingAlertsWithBugs.Matches(series); cause != nil {
+			knownViolations.Insert(fmt.Sprintf("%s (open bug: %s)", violation, cause.Text))
+		} else {
+			// treat pending errors as a flake right now because we are still trying to determine the scope
+			// TODO: move this to unexpectedViolations later
+			unexpectedViolationsAsFlakes.Insert(violation)
+		}
+	}
 
-	framework.Logf("No critical alerts firing post-upgrade")
+	if len(debug) > 0 {
+		framework.Logf("Alerts were detected during upgrade which are allowed:\n\n%s", strings.Join(debug.List(), "\n"))
+	}
+	if len(unexpectedViolations) > 0 {
+		if !tolerateDuringSkew {
+			framework.Failf("Unexpected alerts fired or pending during the upgrade:\n\n%s", strings.Join(unexpectedViolations.List(), "\n"))
+		}
+	}
+	if flakes := sets.NewString().Union(knownViolations).Union(unexpectedViolations).Union(unexpectedViolationsAsFlakes); len(flakes) > 0 {
+		disruption.FrameworkFlakef(f, "Unexpected alert behavior during upgrade:\n\n%s", strings.Join(flakes.List(), "\n"))
+	}
+	framework.Logf("No alerts fired during upgrade")
 }
 
 // Teardown cleans up any remaining resources.
 func (t *UpgradeTest) Teardown(f *framework.Framework) {
 	// rely on the namespace deletion to clean up everything
 }
+
+func labelsAsSelector(l model.LabelSet) string {
+	return l.String()
+}
diff --git a/test/extended/machines/cluster.go b/test/extended/machines/cluster.go
index 4dd78b6577c8..5e6cd1866e41 100644
--- a/test/extended/machines/cluster.go
+++ b/test/extended/machines/cluster.go
@@ -66,7 +66,7 @@ var _ = g.Describe("[sig-node] Managed cluster", func() {
 	)
 	g.BeforeEach(func() {
 		var ok bool
-		url, bearerToken, ok = prometheus.LocatePrometheus(oc)
+		url, _, bearerToken, ok = prometheus.LocatePrometheus(oc)
 		if !ok {
 			e2e.Failf("Prometheus could not be located on this cluster, failing prometheus test")
 		}
diff --git a/test/extended/prometheus/prometheus.go b/test/extended/prometheus/prometheus.go
index 00836c8b0170..9a6acbb0b917 100644
--- a/test/extended/prometheus/prometheus.go
+++ b/test/extended/prometheus/prometheus.go
@@ -45,7 +45,7 @@ var _ = g.Describe("[sig-instrumentation][Late] Alerts", func() {
 	)
 	g.BeforeEach(func() {
 		var ok bool
-		url, bearerToken, ok = helper.LocatePrometheus(oc)
+		url, _, bearerToken, ok = helper.LocatePrometheus(oc)
 		if !ok {
 			e2e.Failf("Prometheus could not be located on this cluster, failing prometheus test")
 		}
@@ -133,12 +133,12 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 	var (
 		oc = exutil.NewCLIWithoutNamespace("prometheus")
 
-		url, bearerToken string
+		url, prometheusURL, bearerToken string
 	)
 
 	g.BeforeEach(func() {
 		var ok bool
-		url, bearerToken, ok = helper.LocatePrometheus(oc)
+		url, prometheusURL, bearerToken, ok = helper.LocatePrometheus(oc)
 		if !ok {
 			e2e.Failf("Prometheus could not be located on this cluster, failing prometheus test")
 		}
@@ -183,7 +183,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			g.By("checking the prometheus metrics path")
 			var metrics map[string]*dto.MetricFamily
 			o.Expect(wait.PollImmediate(10*time.Second, 2*time.Minute, func() (bool, error) {
-				results, err := getInsecureURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/metrics", url))
+				results, err := getInsecureURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/metrics", prometheusURL))
 				if err != nil {
 					e2e.Logf("unable to get metrics: %v", err)
 					return false, nil
@@ -222,7 +222,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			// expect all endpoints within 60 seconds
 			var lastErrs []error
 			o.Expect(wait.PollImmediate(10*time.Second, 2*time.Minute, func() (bool, error) {
-				contents, err := getBearerTokenURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/api/v1/targets", url), bearerToken)
+				contents, err := getBearerTokenURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/api/v1/targets", prometheusURL), bearerToken)
 				o.Expect(err).NotTo(o.HaveOccurred())
 
 				targets := &prometheusTargets{}
@@ -269,7 +269,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 
 			g.By("verifying all targets are exposing metrics over secure channel")
 			var insecureTargets []error
-			contents, err := getBearerTokenURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/api/v1/targets", url), bearerToken)
+			contents, err := getBearerTokenURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/api/v1/targets", prometheusURL), bearerToken)
 			o.Expect(err).NotTo(o.HaveOccurred())
 
 			targets := &prometheusTargets{}
@@ -299,6 +299,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			}
 			o.Expect(insecureTargets).To(o.BeEmpty(), "some services expose metrics over insecure channel")
 		})
+
 		g.It("should have a AlertmanagerReceiversNotConfigured alert in firing state", func() {
 			ns := oc.SetupNamespace()
 			execPod := exutil.CreateExecPodOrFail(oc.AdminKubeClient(), ns, "execpod")
@@ -314,6 +315,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 
 			e2e.Logf("AlertmanagerReceiversNotConfigured alert is firing")
 		})
+
 		g.It("should have important platform topology metrics", func() {
 			oc.SetupProject()
 			ns := oc.Namespace()
@@ -340,6 +342,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			err := helper.RunQueries(tests, oc, ns, execPod.Name, url, bearerToken)
 			o.Expect(err).NotTo(o.HaveOccurred())
 		})
+
 		g.It("should have non-Pod host cAdvisor metrics", func() {
 			ns := oc.SetupNamespace()
 			execPod := exutil.CreateExecPodOrFail(oc.AdminKubeClient(), ns, "execpod")
@@ -353,6 +356,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			err := helper.RunQueries(tests, oc, ns, execPod.Name, url, bearerToken)
 			o.Expect(err).NotTo(o.HaveOccurred())
 		})
+
 		g.It("shouldn't have failing rules evaluation", func() {
 			oc.SetupProject()
 			ns := oc.Namespace()
@@ -370,6 +374,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			err := helper.RunQueries(tests, oc, ns, execPod.Name, url, bearerToken)
 			o.Expect(err).NotTo(o.HaveOccurred())
 		})
+
 		networking.InOpenShiftSDNContext(func() {
 			g.It("should be able to get the sdn ovs flows", func() {
 				ns := oc.SetupNamespace()
@@ -386,6 +391,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 				o.Expect(err).NotTo(o.HaveOccurred())
 			})
 		})
+
 		g.It("shouldn't report any alerts in firing state apart from Watchdog and AlertmanagerReceiversNotConfigured [Early]", func() {
 			if len(os.Getenv("TEST_UNSUPPORTED_ALLOW_VERSION_SKEW")) > 0 {
 				e2eskipper.Skipf("Test is disabled to allow cluster components to have different versions, and skewed versions trigger multiple other alerts")
@@ -403,6 +409,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			err := helper.RunQueries(tests, oc, ns, execPod.Name, url, bearerToken)
 			o.Expect(err).NotTo(o.HaveOccurred())
 		})
+
 		g.It("should provide ingress metrics", func() {
 			ns := oc.SetupNamespace()
 
@@ -413,7 +420,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 
 			var lastErrs []error
 			o.Expect(wait.PollImmediate(10*time.Second, 4*time.Minute, func() (bool, error) {
-				contents, err := getBearerTokenURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/api/v1/targets", url), bearerToken)
+				contents, err := getBearerTokenURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/api/v1/targets", prometheusURL), bearerToken)
 				o.Expect(err).NotTo(o.HaveOccurred())
 
 				targets := &prometheusTargets{}
@@ -440,6 +447,7 @@ var _ = g.Describe("[sig-instrumentation] Prometheus", func() {
 			err := helper.RunQueries(queries, oc, ns, execPod.Name, url, bearerToken)
 			o.Expect(err).NotTo(o.HaveOccurred())
 		})
+
 		g.It("should provide named network metrics", func() {
 			ns := oc.SetupNamespace()
 
diff --git a/test/extended/prometheus/prometheus_builds.go b/test/extended/prometheus/prometheus_builds.go
index 5be6d0927750..1b7dd0e89cc3 100644
--- a/test/extended/prometheus/prometheus_builds.go
+++ b/test/extended/prometheus/prometheus_builds.go
@@ -26,7 +26,7 @@ var _ = g.Describe("[sig-instrumentation][sig-builds][Feature:Builds] Prometheus
 	)
 	g.BeforeEach(func() {
 		var ok bool
-		url, bearerToken, ok = helper.LocatePrometheus(oc)
+		url, _, bearerToken, ok = helper.LocatePrometheus(oc)
 		if !ok {
 			e2eskipper.Skipf("Prometheus could not be located on this cluster, skipping prometheus test")
 		}
diff --git a/test/extended/prometheus/storage.go b/test/extended/prometheus/storage.go
index 492ba57bcc1e..84f5f46244ec 100644
--- a/test/extended/prometheus/storage.go
+++ b/test/extended/prometheus/storage.go
@@ -29,7 +29,7 @@ var _ = g.Describe("[sig-storage][Late] Metrics", func() {
 
 	g.BeforeEach(func() {
 		var ok bool
-		url, bearerToken, ok = helper.LocatePrometheus(oc)
+		url, _, bearerToken, ok = helper.LocatePrometheus(oc)
 		if !ok {
 			e2e.Failf("Prometheus could not be located on this cluster, failing prometheus test")
 		}
diff --git a/test/extended/util/prometheus/helpers.go b/test/extended/util/prometheus/helpers.go
index f47ed59aa8db..2744c857b8cd 100644
--- a/test/extended/util/prometheus/helpers.go
+++ b/test/extended/util/prometheus/helpers.go
@@ -62,11 +62,11 @@ func waitForServiceAccountInNamespace(c clientset.Interface, ns, serviceAccountN
 	return err
 }
 
-// LocatePrometheus uses an exisitng CLI to return information used to make http requests to Prometheus
-func LocatePrometheus(oc *exutil.CLI) (url, bearerToken string, ok bool) {
+// LocatePrometheus uses an exisitng CLI to return information used to make http requests to Prometheus.
+func LocatePrometheus(oc *exutil.CLI) (queryURL, prometheusURL, bearerToken string, ok bool) {
 	_, err := oc.AdminKubeClient().CoreV1().Services("openshift-monitoring").Get(context.Background(), "prometheus-k8s", metav1.GetOptions{})
 	if kapierrs.IsNotFound(err) {
-		return "", "", false
+		return "", "", "", false
 	}
 
 	waitForServiceAccountInNamespace(oc.AdminKubeClient(), "openshift-monitoring", "prometheus-k8s", 2*time.Minute)
@@ -91,18 +91,27 @@ func LocatePrometheus(oc *exutil.CLI) (url, bearerToken string, ok bool) {
 	}
 	o.Expect(bearerToken).ToNot(o.BeEmpty())
 
-	return "https://prometheus-k8s.openshift-monitoring.svc:9091", bearerToken, true
+	return "https://thanos-querier.openshift-monitoring.svc:9091", "https://prometheus-k8s.openshift-monitoring.svc:9091", bearerToken, true
 }
 
-// ExpectPrometheus uses an existing framework to return information used to make http requests to Prometheus
-func ExpectPrometheus(f *framework.Framework) (url, bearerToken string, oc *exutil.CLI, ok bool) {
-
-	// Must use version that's can run within Ginkgo It
-	oc = exutil.NewCLIWithFramework(f)
-
-	url, bearerToken, ok = LocatePrometheus(oc)
+func RunQuery(query, ns, execPodName, baseURL, bearerToken string) (*PrometheusResponse, error) {
+	url := fmt.Sprintf("%s/api/v1/query?%s", baseURL, (url.Values{"query": []string{query}}).Encode())
+	contents, err := GetBearerTokenURLViaPod(ns, execPodName, url, bearerToken)
+	if err != nil {
+		return nil, fmt.Errorf("unable to execute query %s: %v", query, err)
+	}
 
-	return url, bearerToken, oc, ok
+	// check query result, if this is a new error log it, otherwise remain silent
+	var result PrometheusResponse
+	if err := json.Unmarshal([]byte(contents), &result); err != nil {
+		return nil, fmt.Errorf("unable to parse query response for %s: %v", query, err)
+	}
+	metrics := result.Data.Result
+	if result.Status != "success" {
+		data, _ := json.MarshalIndent(metrics, "", "  ")
+		return nil, fmt.Errorf("promQL query: %s had reported incorrect status:\n%s", query, data)
+	}
+	return &result, nil
 }
 
 // RunQueries executes Prometheus queries and checks provided expected result.
@@ -119,33 +128,21 @@ func RunQueries(promQueries map[string]bool, oc *exutil.CLI, ns, execPodName, ba
 			// and introduced at https://github.com/prometheus/client_golang/blob/master/api/prometheus/v1/api.go are vendored into
 			// openshift/origin, look to replace this homegrown http request / query param with that API
 			g.By("perform prometheus metric query " + query)
-			url := fmt.Sprintf("%s/api/v1/query?%s", baseURL, (url.Values{"query": []string{query}}).Encode())
-			contents, err := GetBearerTokenURLViaPod(ns, execPodName, url, bearerToken)
+			result, err := RunQuery(query, ns, execPodName, baseURL, bearerToken)
 			if err != nil {
-				return err
-			}
-
-			// check query result, if this is a new error log it, otherwise remain silent
-			var result PrometheusResponse
-			if err := json.Unmarshal([]byte(contents), &result); err != nil {
-				framework.Logf("unable to parse query response for %s: %v", query, err)
-				continue
-			}
-			metrics := result.Data.Result
-			if result.Status != "success" {
-				data, _ := json.Marshal(metrics)
-				msg := fmt.Sprintf("promQL query: %s had reported incorrect status:\n%s", query, data)
-				if prev, ok := queryErrors[query]; !ok || prev.Error() != msg {
-					framework.Logf("%s", msg)
+				msg := err.Error()
+				if prev, ok := queryErrors[query]; ok && prev.Error() != msg {
+					framework.Logf("%s", prev.Error())
 				}
-				queryErrors[query] = fmt.Errorf(msg)
+				queryErrors[query] = err
 				continue
 			}
+			metrics := result.Data.Result
 			if (len(metrics) > 0 && !expected) || (len(metrics) == 0 && expected) {
-				data, _ := json.Marshal(metrics)
-				msg := fmt.Sprintf("promQL query: %s had reported incorrect results:\n%s", query, data)
-				if prev, ok := queryErrors[query]; !ok || prev.Error() != msg {
-					framework.Logf("%s", msg)
+				data, _ := json.MarshalIndent(result.Data.Result, "", "  ")
+				msg := fmt.Sprintf("promQL query returned unexpected results:\n%s\n%s", query, data)
+				if prev, ok := queryErrors[query]; ok && prev.Error() != msg {
+					framework.Logf("%s", prev.Error())
 				}
 				queryErrors[query] = fmt.Errorf(msg)
 				continue
@@ -163,11 +160,11 @@ func RunQueries(promQueries map[string]bool, oc *exutil.CLI, ns, execPodName, ba
 	}
 
 	if len(queryErrors) != 0 {
-		exutil.DumpPodLogsStartingWith("prometheus-0", oc)
+		exutil.DumpPodLogsStartingWith("prometheus-k8s-", oc)
 	}
 	var errs []error
-	for query, err := range queryErrors {
-		errs = append(errs, fmt.Errorf("query failed: %s: %s", query, err))
+	for _, err := range queryErrors {
+		errs = append(errs, err)
 	}
 	return errors.NewAggregate(errs)
 }