diff --git a/pkg/operator/controller/monitoring-dashboard/controller.go b/pkg/operator/controller/monitoring-dashboard/controller.go new file mode 100644 index 0000000000..b6e60d4106 --- /dev/null +++ b/pkg/operator/controller/monitoring-dashboard/controller.go @@ -0,0 +1,67 @@ +package monitoringdashboard + +import ( + "context" + "fmt" + + configv1 "github.com/openshift/api/config/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +const ( + controllerName = "monitoring_dashboard_controller" +) + +// New creates the monitoring dashboard controller. This is the controller +// that handles all the logic about the monitoring dashboard +func New(mgr manager.Manager) (controller.Controller, error) { + operatorCache := mgr.GetCache() + reconciler := &reconciler{ + client: mgr.GetClient(), + } + c, err := controller.New(controllerName, mgr, controller.Options{Reconciler: reconciler}) + if err != nil { + return nil, err + } + + CMPredicate := predicate.NewPredicateFuncs(func(o client.Object) bool { + return o.GetName() == dashboardConfigMapName + }) + + if err := c.Watch(source.Kind(operatorCache, &corev1.ConfigMap{}), &handler.EnqueueRequestForObject{}, CMPredicate); err != nil { + return nil, err + } + + if err := c.Watch(source.Kind(operatorCache, &configv1.Infrastructure{}), &handler.EnqueueRequestForObject{}); err != nil { + return nil, err + } + + return c, nil +} + +// reconciler handles the actual monitoringdashboard reconciliation logic in response to events. +type reconciler struct { + client client.Client +} + +// Reconcile will look at the cluster configuration and create the monitoring dashboard accordingly +func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + infraConfig := &configv1.Infrastructure{} + if err := r.client.Get(ctx, types.NamespacedName{Name: "cluster"}, infraConfig); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get infrastructure 'config': %v", err) + } + + if err := r.ensureMonitoringDashboard(ctx, infraConfig.Status); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure monitoring dashboard: %v", err) + } + + return reconcile.Result{}, nil +} diff --git a/pkg/operator/controller/monitoring-dashboard/dashboard.json b/pkg/operator/controller/monitoring-dashboard/dashboard.json new file mode 100644 index 0000000000..3871440b25 --- /dev/null +++ b/pkg/operator/controller/monitoring-dashboard/dashboard.json @@ -0,0 +1,1448 @@ +{ + "__inputs": [], + "__requires": [], + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "panels": [], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "format": "Bps", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(haproxy_server_bytes_in_total[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Current Total Incoming Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "format": "Bps", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(haproxy_server_bytes_out_total[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Current Total Outgoing Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "format": "", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(haproxy_server_http_responses_total{code=~\"4xx|5xx\", route!=\"\"}[$__range])) / sum(increase(haproxy_server_http_responses_total{route!=\"\"}[$__range]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [ + 2, + 10 + ], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Error Rate", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "format": "ms", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(haproxy_server_http_average_response_latency_milliseconds != 0)", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [ + 2, + 10 + ], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Server Average Response Latency", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(haproxy_server_bytes_in_total{route!=\"\"}[1m])) by (route) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ route }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(haproxy_server_bytes_out_total{route!=\"\"}[1m])) by (route) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ route }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(haproxy_server_http_responses_total{code=~\"4xx|5xx\", route!=\"\"}[1m]) != 0) by (route) / sum(rate(haproxy_server_http_responses_total{route!=\"\"}[1m])) by (route))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ route }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Server Response Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,avg(haproxy_server_http_average_response_latency_milliseconds{route!=\"\"} != 0) by (route) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ route }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average HTTP Server Response Latency (ms)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 Per Route", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(haproxy_server_bytes_in_total{exported_namespace!=\"\"}[1m])) by (exported_namespace) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ exported_namespace }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(haproxy_server_bytes_out_total{exported_namespace!=\"\"}[1m])) by (exported_namespace) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ exported_namespace }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(haproxy_server_http_responses_total{code=~\"4xx|5xx\", route!=\"\"}[1m]) != 0) by (exported_namespace) / sum(rate(haproxy_server_http_responses_total{route!=\"\"}[1m])) by (exported_namespace))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ exported_namespace }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Server Response Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,avg(haproxy_server_http_average_response_latency_milliseconds{exported_namespace!=\"\"} != 0) by (exported_namespace) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ exported_namespace }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average HTTP Server Response Latency (ms)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 Per Namespace", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(haproxy_server_bytes_in_total{route!=\"\"}[1m])) by (service) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(haproxy_server_bytes_out_total{route!=\"\"}[1m])) by (service) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(haproxy_server_http_responses_total{code=~\"4xx|5xx\", route!=\"\"}[1m]) != 0) by (service) / sum(rate(haproxy_server_http_responses_total{route!=\"\"}[1m])) by (service))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Server Response Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,avg(haproxy_server_http_average_response_latency_milliseconds{route!=\"\"} != 0) by (service) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average HTTP Server Response Latency (ms)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 20, + "w": 25, + "x": 0, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,count(count(haproxy_server_up == 1) by (route, service)) by (service) != 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of Routes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 Per Shard", + "titleSize": "h6" + } + ], + "schemaVersion": 16, + "tags": [ + "networking-mixin" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Networking / Ingress", + "version": 0 +} diff --git a/pkg/operator/controller/monitoring-dashboard/monitoring_dashboard.go b/pkg/operator/controller/monitoring-dashboard/monitoring_dashboard.go new file mode 100644 index 0000000000..0565d1e8e1 --- /dev/null +++ b/pkg/operator/controller/monitoring-dashboard/monitoring_dashboard.go @@ -0,0 +1,111 @@ +package monitoringdashboard + +import ( + "context" + _ "embed" + "fmt" + "reflect" + + configv1 "github.com/openshift/api/config/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +const ( + dashboardConfigMapName = "grafana-dashboard-ingress-operator" + dashboardConfigMapNamespace = "openshift-config-managed" + consoleDashboardLabel = "console.openshift.io/dashboard" + DashboardFileName = "dashboard.json" +) + +// ensureMonitoringDashboard creates, updates or deletes an operator generated +// configmap containing the dashboard for ingress operator monitoring. +// Return any errors. +func (r *reconciler) ensureMonitoringDashboard(ctx context.Context, infraStatus configv1.InfrastructureStatus) error { + current, err := r.currentMonitoringDashboard(ctx) + if err != nil { + return fmt.Errorf("failed to get current monitoring dashboard: %v", err) + } + + desired := desiredMonitoringDashboard(ctx, infraStatus, current) + + switch { + case current == nil && desired != nil: + err = r.client.Create(ctx, desired) + case current != nil && desired != nil: + if dashboardNeedsUpdate(current, desired) { + err = r.client.Update(ctx, desired) + } + case current != nil && desired == nil: + err = r.client.Delete(ctx, current) + case current == nil && desired == nil: + // nothing to do + } + + return err +} + +func ConfigMapName() types.NamespacedName { + return types.NamespacedName{ + Namespace: dashboardConfigMapNamespace, + Name: dashboardConfigMapName, + } +} + +// currentMonitoringDashboard retrieves the existing monitoring dashboard ConfigMap if it exists, otherwise returns nil. +// If an error occurs during the retrieval, it returns the error. +func (r *reconciler) currentMonitoringDashboard(ctx context.Context) (*corev1.ConfigMap, error) { + configmap := &corev1.ConfigMap{} + name := ConfigMapName() + if err := r.client.Get(ctx, name, configmap); err != nil { + if errors.IsNotFound(err) { + return nil, nil + } + return nil, err + } + return configmap, nil +} + +// dashboardJSON is the string representation of the embedded 'dashboard.json' file. +// +//go:embed dashboard.json +var dashboardJSON string + +// desiredMonitoringDashboard return the desired configmap for the monitoring dashboard or nil if the +// configmap should not be deployed +func desiredMonitoringDashboard(ctx context.Context, infraStatus configv1.InfrastructureStatus, current *corev1.ConfigMap) *corev1.ConfigMap { + // If control plane topology is set to external, we do not deploy the dashboard + if infraStatus.ControlPlaneTopology == configv1.ExternalTopologyMode { + return nil + } + desired := corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: dashboardConfigMapName, + Namespace: dashboardConfigMapNamespace, + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + }, + } + if current != nil { + desired.SetResourceVersion(current.GetResourceVersion()) + } + return &desired +} + +// dashboardNeedsUpdate compares the current dashboard configmap +// and desired ConfigMaps. It returns true if the data within the +// current ConfigMap does not match the desired ConfigMap, or if the +// console label dashboard is false or not present. This indicates +// that the dashboard data requires an update. +func dashboardNeedsUpdate(current *corev1.ConfigMap, desired *corev1.ConfigMap) bool { + if labelValue, ok := current.ObjectMeta.Labels[consoleDashboardLabel]; !ok || labelValue != "true" { + return true + } + return !reflect.DeepEqual(current.Data, desired.Data) +} diff --git a/pkg/operator/controller/monitoring-dashboard/monitoring_dashboard_test.go b/pkg/operator/controller/monitoring-dashboard/monitoring_dashboard_test.go new file mode 100644 index 0000000000..7723f1d921 --- /dev/null +++ b/pkg/operator/controller/monitoring-dashboard/monitoring_dashboard_test.go @@ -0,0 +1,261 @@ +package monitoringdashboard + +import ( + "context" + "reflect" + "testing" + + configv1 "github.com/openshift/api/config/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func newConfigMap() *corev1.ConfigMap { + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: dashboardConfigMapName, + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + }, + } +} + +// TestDashboardNeedsUpdate checks if the dashboardNeedsUpdate function +// accurately determines the need for dashboard ConfigMap updates under various scenarios. +func TestDashboardNeedsUpdate(t *testing.T) { + type testInputs struct { + current *corev1.ConfigMap + desired *corev1.ConfigMap + } + type testOutputs struct { + updateNeeded bool + } + testCases := []struct { + description string + inputs testInputs + output testOutputs + }{ + { + description: "Identical configmaps", + inputs: testInputs{ + current: newConfigMap(), + desired: newConfigMap(), + }, + output: testOutputs{ + updateNeeded: false, + }, + }, + { + description: "Missing dashboard in configmap", + inputs: testInputs{ + current: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dashboardConfigMapName", + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + }, + Data: map[string]string{}, + }, + desired: newConfigMap(), + }, + output: testOutputs{ + updateNeeded: true, + }, + }, + { + description: "Wrong dashboard value", + inputs: testInputs{ + current: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dashboardConfigMapName", + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + }, + Data: map[string]string{ + DashboardFileName: "corrupted text", + }, + }, + desired: newConfigMap(), + }, + output: testOutputs{ + updateNeeded: true, + }, + }, + { + description: "Second unwanted dashboard", + inputs: testInputs{ + current: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dashboardConfigMapName", + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + "dashboard2.json": dashboardJSON, + }, + }, + desired: newConfigMap(), + }, + output: testOutputs{ + updateNeeded: true, + }, + }, + { + description: "Missing label", + inputs: testInputs{ + current: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dashboardConfigMapName", + Namespace: "openshift-config-managed", + Labels: map[string]string{}, + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + }, + }, + desired: newConfigMap(), + }, + output: testOutputs{ + updateNeeded: true, + }, + }, + { + description: "Label set to false", + inputs: testInputs{ + current: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dashboardConfigMapName", + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "false", + }, + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + }, + }, + desired: newConfigMap(), + }, + output: testOutputs{ + updateNeeded: true, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + expected := tc.output.updateNeeded + actual := dashboardNeedsUpdate(tc.inputs.current, tc.inputs.desired) + if expected != actual { + t.Errorf("expected %v, got %v", expected, actual) + } + }) + } +} + +// TestDesiredMonitoringDashboard verifies that the function +// desiredMonitoringDashboard correctly creates a monitoring dashboard +// ConfigMap based on the ControlPlaneTopology value. It ensures no ConfigMap +// is returned for ExternalTopologyMode and checks for a correct ConfigMap in +// other cases. +func TestDesiredMonitoringDashboard(t *testing.T) { + type testInputs struct { + infraStatus configv1.InfrastructureStatus + current *corev1.ConfigMap + } + type testOutputs struct { + configMap *corev1.ConfigMap + } + testCases := []struct { + description string + inputs testInputs + output testOutputs + }{ + { + description: "No dashboard if topology is external", + inputs: testInputs{ + infraStatus: configv1.InfrastructureStatus{ + ControlPlaneTopology: configv1.ExternalTopologyMode, + }, + current: nil, + }, + output: testOutputs{ + configMap: nil, + }, + }, + { + description: "Dashboard expected if topology is not external", + inputs: testInputs{ + infraStatus: configv1.InfrastructureStatus{ + ControlPlaneTopology: configv1.SingleReplicaTopologyMode, + }, + current: nil, + }, + output: testOutputs{ + configMap: newConfigMap(), + }, + }, + { + description: "Desired must use current resource version", + inputs: testInputs{ + infraStatus: configv1.InfrastructureStatus{ + ControlPlaneTopology: configv1.SingleReplicaTopologyMode, + }, + current: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: dashboardConfigMapName, + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + ResourceVersion: "32", + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + }, + }, + }, + output: testOutputs{ + configMap: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: dashboardConfigMapName, + Namespace: "openshift-config-managed", + Labels: map[string]string{ + consoleDashboardLabel: "true", + }, + ResourceVersion: "32", + }, + Data: map[string]string{ + DashboardFileName: dashboardJSON, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + expected := tc.output.configMap + actual := desiredMonitoringDashboard(context.TODO(), tc.inputs.infraStatus, tc.inputs.current) + if expected == nil && actual != nil { + t.Errorf("expected %v, got %v", expected, actual) + } else if expected != nil && actual == nil { + t.Errorf("expected %v, got %v", expected, actual) + } else if !reflect.DeepEqual(expected, actual) { + t.Errorf("expected %v, got %v", expected, actual) + } + }) + } +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 7e19fcf3f7..be28712dda 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -10,6 +10,7 @@ import ( "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" "github.com/openshift/library-go/pkg/operator/v1helpers" + monitoringdashboard "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/monitoring-dashboard" routemetricscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/route-metrics" errorpageconfigmapcontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/sync-http-error-code-configmap" "github.com/openshift/library-go/pkg/operator/onepodpernodeccontroller" @@ -274,6 +275,11 @@ func New(config operatorconfig.Config, kubeConfig *rest.Config) (*Operator, erro return nil, fmt.Errorf("failed to create route metrics controller: %w", err) } + // Set up the route monitoring dashboard controller. + if _, err := monitoringdashboard.New(mgr); err != nil { + return nil, fmt.Errorf("failed to create monitoring dashboard controller: %w", err) + } + // Set up the gatewayclass controller. This controller is unmanaged by // the manager; the gatewayapi controller starts it after it creates the // Gateway API CRDs. diff --git a/test/e2e/all_test.go b/test/e2e/all_test.go index c8ce27ab54..14ab641b82 100644 --- a/test/e2e/all_test.go +++ b/test/e2e/all_test.go @@ -113,5 +113,6 @@ func TestAll(t *testing.T) { t.Run("TestRouteHardStopAfterEnableOnIngressConfig", TestRouteHardStopAfterEnableOnIngressConfig) t.Run("TestRouteHardStopAfterEnableOnIngressControllerHasPriorityOverIngressConfig", TestRouteHardStopAfterEnableOnIngressControllerHasPriorityOverIngressConfig) t.Run("TestHostNetworkPortBinding", TestHostNetworkPortBinding) + t.Run("TestDashboardCreation", TestDashboardCreation) }) } diff --git a/test/e2e/dashboard_test.go b/test/e2e/dashboard_test.go new file mode 100644 index 0000000000..913f6a97eb --- /dev/null +++ b/test/e2e/dashboard_test.go @@ -0,0 +1,82 @@ +//go:build e2e +// +build e2e + +package e2e + +import ( + "context" + "reflect" + "testing" + + configv1 "github.com/openshift/api/config/v1" + monitoringdashboard "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/monitoring-dashboard" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "time" +) + +func TestDashboardCreation(t *testing.T) { + t.Parallel() + + infraConfig := &configv1.Infrastructure{} + if err := kclient.Get(context.TODO(), types.NamespacedName{Name: "cluster"}, infraConfig); err != nil { + t.Fatalf("failed to get infraConfig: %v", err) + } + + dashboardCM := &corev1.ConfigMap{} + if err := kclient.Get(context.TODO(), monitoringdashboard.ConfigMapName(), dashboardCM); err != nil { + if errors.IsNotFound(err) && infraConfig.Status.ControlPlaneTopology == configv1.ExternalTopologyMode { + // Dashboard is not created when external topology is externel + return + } + t.Fatalf("failed to get dashboard configmap: %v", err) + } + + initialData := dashboardCM.Data + + // Change dashboard in configmap and check for update from the operator + dashboardCM.Data = map[string]string{ + monitoringdashboard.DashboardFileName: "", + } + if err := kclient.Update(context.TODO(), dashboardCM); err != nil { + t.Fatalf("failed to update dashboard configmap: %v", err) + } + err := wait.PollImmediate(1*time.Second, 1*time.Minute, func() (bool, error) { + err := kclient.Get(context.TODO(), monitoringdashboard.ConfigMapName(), dashboardCM) + if err != nil { + t.Logf("Failed to get ConfigMap, retrying... Error: %v", err) + return false, nil + } + if dashboard, ok := dashboardCM.Data[monitoringdashboard.DashboardFileName]; ok && dashboard != "" { + return true, nil + } + t.Logf("ConfigMap not yet updated, retrying...") + return false, nil + }) + if err != nil { + t.Fatalf("failed to observe configmap: %v", err) + } + + // Delete configmap and check for update from the operator + dashboardCM.Data = map[string]string{ + monitoringdashboard.DashboardFileName: "", + } + if err := kclient.Delete(context.TODO(), dashboardCM); err != nil { + t.Fatalf("failed to delete dashboard configmap: %v", err) + } + err = wait.PollImmediate(1*time.Second, 1*time.Minute, func() (bool, error) { + if err := kclient.Get(context.TODO(), monitoringdashboard.ConfigMapName(), dashboardCM); err != nil { + t.Logf("failed to get configmap: %v, retying...", err) + return false, nil + } + return true, nil + }) + if err != nil { + t.Fatalf("failed to observe configmap: %v", err) + } + if !reflect.DeepEqual(dashboardCM.Data, initialData) { + t.Fatalf("data mismatch") + } +}