diff --git a/README.md b/README.md index 5dc50e1d..a1cf9104 100644 --- a/README.md +++ b/README.md @@ -224,20 +224,20 @@ Further Information | elasticsearch_clusterinfo_up | gauge | 1 | Up metric for the cluster info collector | elasticsearch_clusterinfo_version_info | gauge | 6 | Constant metric with ES version information as labels | elasticsearch_slm_stats_up | gauge | 0 | Up metric for SLM collector -| elasticsearch_slm_stats_total_scrapes | gauge | 0 | Number of scrapes for SLM collector -| elasticsearch_slm_stats_json_parse_failures | gauge | 0 | JSON parse failures for SLM collector -| elasticsearch_slm_stats_retention_runs | gauge | 0 | Total retention runs -| elasticsearch_slm_stats_retention_failed | gauge | 0 | Total failed retention runs -| elasticsearch_slm_stats_retention_timed_out | gauge | 0 | Total retention run timeouts -| elasticsearch_slm_stats_retention_retention_deletion_time_millis | gauge | 0 | Retention run deletion time -| elasticsearch_slm_stats_total_snapshots_taken | gauge | 0 | Total snapshots taken -| elasticsearch_slm_stats_total_snapshots_failed | gauge | 0 | Total snapshots failed -| elasticsearch_slm_stats_total_snapshots_deleted | gauge | 0 | Total snapshots deleted -| elasticsearch_slm_stats_total_snapshots_failed | gauge | 0 | Total snapshots failed -| elasticsearch_slm_stats_snapshots_taken | gauge | 1 | Snapshots taken by policy -| elasticsearch_slm_stats_snapshots_failed | gauge | 1 | Snapshots failed by policy -| elasticsearch_slm_stats_snapshots_deleted | gauge | 1 | Snapshots deleted by policy -| elasticsearch_slm_stats_snapshot_deletion_failures | gauge | 1 | Snapshot deletion failures by policy +| elasticsearch_slm_stats_total_scrapes | counter | 0 | Number of scrapes for SLM collector +| elasticsearch_slm_stats_json_parse_failures | counter | 0 | JSON parse failures for SLM collector +| elasticsearch_slm_stats_retention_runs_total | counter | 0 | Total retention runs +| elasticsearch_slm_stats_retention_failed_total | counter | 0 | Total failed retention runs +| elasticsearch_slm_stats_retention_timed_out_total | counter | 0 | Total retention run timeouts +| elasticsearch_slm_stats_retention_deletion_time_seconds | gauge | 0 | Retention run deletion time +| elasticsearch_slm_stats_total_snapshots_taken_total | counter | 0 | Total snapshots taken +| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed +| elasticsearch_slm_stats_total_snapshots_deleted_total | counter | 0 | Total snapshots deleted +| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed +| elasticsearch_slm_stats_snapshots_taken_total | counter | 1 | Snapshots taken by policy +| elasticsearch_slm_stats_snapshots_failed_total | counter | 1 | Snapshots failed by policy +| elasticsearch_slm_stats_snapshots_deleted_total | counter | 1 | Snapshots deleted by policy +| elasticsearch_slm_stats_snapshot_deletion_failures_total | counter | 1 | Snapshot deletion failures by policy | elasticsearch_slm_stats_operation_mode | gauge | 1 | SLM operation mode (Running, stopping, stopped) diff --git a/collector/slm.go b/collector/slm.go index 7a005b71..07543429 100644 --- a/collector/slm.go +++ b/collector/slm.go @@ -90,9 +90,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }), slmMetrics: []*slmMetric{ { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "retention_runs"), + prometheus.BuildFQName(namespace, "slm_stats", "retention_runs_total"), "Total retention runs", nil, nil, ), @@ -101,9 +101,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "retention_failed"), + prometheus.BuildFQName(namespace, "slm_stats", "retention_failed_total"), "Total failed retention runs", nil, nil, ), @@ -112,9 +112,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "retention_timed_out"), + prometheus.BuildFQName(namespace, "slm_stats", "retention_timed_out_total"), "Total timed out retention runs", nil, nil, ), @@ -134,9 +134,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_taken"), + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_taken_total"), "Total snapshots taken", nil, nil, ), @@ -145,9 +145,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_failed"), + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_failed_total"), "Total snapshots failed", nil, nil, ), @@ -156,9 +156,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_deleted"), + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_deleted_total"), "Total snapshots deleted", nil, nil, ), @@ -167,9 +167,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "total_snapshot_deletion_failures"), + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshot_deletion_failures_total"), "Total snapshot deletion failures", nil, nil, ), @@ -180,9 +180,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { }, policyMetrics: []*policyMetric{ { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "snapshots_taken"), + prometheus.BuildFQName(namespace, "slm_stats", "snapshots_taken_total"), "Total snapshots taken", defaultPolicyLabels, nil, ), @@ -192,9 +192,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { Labels: defaultPolicyLabelValues, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "snapshots_failed"), + prometheus.BuildFQName(namespace, "slm_stats", "snapshots_failed_total"), "Total snapshots failed", defaultPolicyLabels, nil, ), @@ -204,9 +204,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { Labels: defaultPolicyLabelValues, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "snapshots_deleted"), + prometheus.BuildFQName(namespace, "slm_stats", "snapshots_deleted_total"), "Total snapshots deleted", defaultPolicyLabels, nil, ), @@ -216,9 +216,9 @@ func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { Labels: defaultPolicyLabelValues, }, { - Type: prometheus.GaugeValue, + Type: prometheus.CounterValue, Desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "slm_stats", "snapshot_deletion_failures"), + prometheus.BuildFQName(namespace, "slm_stats", "snapshot_deletion_failures_total"), "Total snapshot deletion failures", defaultPolicyLabels, nil, ),