diff --git a/.circleci/config.yml b/.circleci/config.yml index 3c467f05..219f765f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ executors: # This must match .promu.yml. golang: docker: - - image: circleci/golang:1.17 + - image: cimg/go:1.18 jobs: test: executor: golang diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..202ae236 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 662ea3b6..136d7d4b 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -21,6 +21,9 @@ jobs: uses: actions/setup-go@v2 with: go-version: 1.18.x + - name: Install snmp_exporter/generator dependencies + run: sudo apt-get update && sudo apt-get -y install libsnmp-dev + if: github.repository == 'prometheus/snmp_exporter' - name: Lint uses: golangci/golangci-lint-action@v3.1.0 with: diff --git a/.promu.yml b/.promu.yml index 1bf5271f..9f6d8a0d 100644 --- a/.promu.yml +++ b/.promu.yml @@ -1,6 +1,6 @@ go: # This must match .circle/config.yml. - version: 1.17 + version: 1.18 repository: path: github.com/prometheus-community/elasticsearch_exporter build: diff --git a/.yamllint b/.yamllint new file mode 100644 index 00000000..3878a31d --- /dev/null +++ b/.yamllint @@ -0,0 +1,28 @@ +--- +extends: default + +rules: + braces: + max-spaces-inside: 1 + level: error + brackets: + max-spaces-inside: 1 + level: error + commas: disable + comments: disable + comments-indentation: disable + document-start: disable + indentation: + spaces: consistent + indent-sequences: consistent + key-duplicates: + ignore: | + config/testdata/section_key_dup.bad.yml + line-length: disable + truthy: + ignore: | + .github/workflows/codeql-analysis.yml + .github/workflows/funcbench.yml + .github/workflows/fuzzing.yml + .github/workflows/prombench.yml + .github/workflows/golangci-lint.yml diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 9a1aff41..d325872b 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,3 +1,3 @@ -## Prometheus Community Code of Conduct +# Prometheus Community Code of Conduct -Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). +Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md). diff --git a/README.md b/README.md index de19a387..44753fb5 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ elasticsearch_exporter --help | es.no-aliases | 1.0.4rc1 | If true, exclude informational aliases metrics. | false | | es.shards | 1.0.3rc1 | If true, query stats for all indices in the cluster, including shard-level stats (implies `es.indices=true`). | false | | es.snapshots | 1.0.4rc1 | If true, query stats for the cluster snapshots. | false | +| es.slm | | If true, query stats for SLM. | false | | es.timeout | 1.0.2 | Timeout for trying to get stats from Elasticsearch. (ex: 20s) | 5s | | es.ca | 1.0.2 | Path to PEM file that contains trusted Certificate Authorities for the Elasticsearch connection. | | | es.client-private-key | 1.0.2 | Path to PEM file that contains the private key for client auth when connecting to Elasticsearch. | | @@ -87,6 +88,7 @@ es.indices | `indices` `monitor` (per index or `*`) | All actions that are requi es.indices_settings | `indices` `monitor` (per index or `*`) | es.shards | not sure if `indices` or `cluster` `monitor` or both | es.snapshots | `cluster:admin/snapshot/status` and `cluster:admin/repository/get` | [ES Forum Post](https://discuss.elastic.co/t/permissions-for-backup-user-with-x-pack/88057) +es.slm | `read_slm` Further Information - [Build in Users](https://www.elastic.co/guide/en/elastic-stack-overview/7.3/built-in-users.html) @@ -222,6 +224,23 @@ Further Information | elasticsearch_clusterinfo_last_retrieval_success_ts | gauge | 1 | Timestamp of the last successful cluster info retrieval | elasticsearch_clusterinfo_up | gauge | 1 | Up metric for the cluster info collector | elasticsearch_clusterinfo_version_info | gauge | 6 | Constant metric with ES version information as labels +| elasticsearch_slm_stats_up | gauge | 0 | Up metric for SLM collector +| elasticsearch_slm_stats_total_scrapes | counter | 0 | Number of scrapes for SLM collector +| elasticsearch_slm_stats_json_parse_failures | counter | 0 | JSON parse failures for SLM collector +| elasticsearch_slm_stats_retention_runs_total | counter | 0 | Total retention runs +| elasticsearch_slm_stats_retention_failed_total | counter | 0 | Total failed retention runs +| elasticsearch_slm_stats_retention_timed_out_total | counter | 0 | Total retention run timeouts +| elasticsearch_slm_stats_retention_deletion_time_seconds | gauge | 0 | Retention run deletion time +| elasticsearch_slm_stats_total_snapshots_taken_total | counter | 0 | Total snapshots taken +| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed +| elasticsearch_slm_stats_total_snapshots_deleted_total | counter | 0 | Total snapshots deleted +| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed +| elasticsearch_slm_stats_snapshots_taken_total | counter | 1 | Snapshots taken by policy +| elasticsearch_slm_stats_snapshots_failed_total | counter | 1 | Snapshots failed by policy +| elasticsearch_slm_stats_snapshots_deleted_total | counter | 1 | Snapshots deleted by policy +| elasticsearch_slm_stats_snapshot_deletion_failures_total | counter | 1 | Snapshot deletion failures by policy +| elasticsearch_slm_stats_operation_mode | gauge | 1 | SLM operation mode (Running, stopping, stopped) + ### Alerts & Recording Rules diff --git a/SECURITY.md b/SECURITY.md index 67741f01..fed02d85 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,4 +3,4 @@ The Prometheus security policy, including how to report vulnerabilities, can be found here: -https://prometheus.io/docs/operating/security/ + diff --git a/collector/cluster_info.go b/collector/cluster_info.go new file mode 100644 index 00000000..3649a214 --- /dev/null +++ b/collector/cluster_info.go @@ -0,0 +1,109 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "encoding/json" + "io/ioutil" + "net/http" + "net/url" + + "github.com/blang/semver" + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" +) + +func init() { + registerCollector("cluster-info", defaultEnabled, NewClusterInfo) +} + +type ClusterInfoCollector struct { + logger log.Logger + u *url.URL + hc *http.Client +} + +func NewClusterInfo(logger log.Logger, u *url.URL, hc *http.Client) (Collector, error) { + return &ClusterInfoCollector{ + logger: logger, + u: u, + hc: hc, + }, nil +} + +var clusterInfoDesc = map[string]*prometheus.Desc{ + "version": prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "version"), + "Elasticsearch version information.", + []string{ + "cluster", + "cluster_uuid", + "build_date", + "build_hash", + "version", + "lucene_version", + }, + nil, + ), +} + +// ClusterInfoResponse is the cluster info retrievable from the / endpoint +type ClusterInfoResponse struct { + Name string `json:"name"` + ClusterName string `json:"cluster_name"` + ClusterUUID string `json:"cluster_uuid"` + Version VersionInfo `json:"version"` + Tagline string `json:"tagline"` +} + +// VersionInfo is the version info retrievable from the / endpoint, embedded in ClusterInfoResponse +type VersionInfo struct { + Number semver.Version `json:"number"` + BuildHash string `json:"build_hash"` + BuildDate string `json:"build_date"` + BuildSnapshot bool `json:"build_snapshot"` + LuceneVersion semver.Version `json:"lucene_version"` +} + +func (c *ClusterInfoCollector) Update(ctx context.Context, ch chan<- prometheus.Metric) error { + resp, err := c.hc.Get(c.u.String()) + if err != nil { + return err + } + defer resp.Body.Close() + b, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + var info ClusterInfoResponse + err = json.Unmarshal(b, &info) + if err != nil { + return err + } + + ch <- prometheus.MustNewConstMetric( + clusterInfoDesc["version"], + prometheus.GaugeValue, + 1, + info.ClusterName, + info.ClusterUUID, + info.Version.BuildDate, + info.Version.BuildHash, + info.Version.Number.String(), + info.Version.LuceneVersion.String(), + ) + + return nil +} diff --git a/collector/collector.go b/collector/collector.go index e31141f3..c08a9994 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -16,6 +16,8 @@ package collector import ( "context" + "errors" + "fmt" "net/http" "net/url" "sync" @@ -24,10 +26,26 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "gopkg.in/alecthomas/kingpin.v2" ) -// Namespace defines the common namespace to be used by all metrics. -const namespace = "elasticsearch" +const ( + // Namespace defines the common namespace to be used by all metrics. + namespace = "elasticsearch" + + defaultEnabled = true + // defaultDisabled = false +) + +type factoryFunc func(logger log.Logger, u *url.URL, hc *http.Client) (Collector, error) + +var ( + factories = make(map[string]factoryFunc) + initiatedCollectorsMtx = sync.Mutex{} + initiatedCollectors = make(map[string]Collector) + collectorState = make(map[string]*bool) + forcedCollectors = map[string]bool{} // collectors which have been explicitly enabled or disabled +) var ( scrapeDurationDesc = prometheus.NewDesc( @@ -50,16 +68,92 @@ type Collector interface { Update(context.Context, chan<- prometheus.Metric) error } +func registerCollector(name string, isDefaultEnabled bool, createFunc factoryFunc) { + var helpDefaultState string + if isDefaultEnabled { + helpDefaultState = "enabled" + } else { + helpDefaultState = "disabled" + } + + // Create flag for this collector + flagName := fmt.Sprintf("collector.%s", name) + flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", name, helpDefaultState) + defaultValue := fmt.Sprintf("%v", isDefaultEnabled) + + flag := kingpin.Flag(flagName, flagHelp).Default(defaultValue).Action(collectorFlagAction(name)).Bool() + collectorState[name] = flag + + // Register the create function for this collector + factories[name] = createFunc +} + type ElasticsearchCollector struct { Collectors map[string]Collector logger log.Logger + esURL *url.URL + httpClient *http.Client } +type Option func(*ElasticsearchCollector) error + // NewElasticsearchCollector creates a new ElasticsearchCollector -func NewElasticsearchCollector(logger log.Logger, httpClient *http.Client, esURL *url.URL) (*ElasticsearchCollector, error) { +func NewElasticsearchCollector(logger log.Logger, filters []string, options ...Option) (*ElasticsearchCollector, error) { + e := &ElasticsearchCollector{logger: logger} + // Apply options to customize the collector + for _, o := range options { + if err := o(e); err != nil { + return nil, err + } + } + + f := make(map[string]bool) + for _, filter := range filters { + enabled, exist := collectorState[filter] + if !exist { + return nil, fmt.Errorf("missing collector: %s", filter) + } + if !*enabled { + return nil, fmt.Errorf("disabled collector: %s", filter) + } + f[filter] = true + } collectors := make(map[string]Collector) + initiatedCollectorsMtx.Lock() + defer initiatedCollectorsMtx.Unlock() + for key, enabled := range collectorState { + if !*enabled || (len(f) > 0 && !f[key]) { + continue + } + if collector, ok := initiatedCollectors[key]; ok { + collectors[key] = collector + } else { + collector, err := factories[key](log.With(logger, "collector", key), e.esURL, e.httpClient) + if err != nil { + return nil, err + } + collectors[key] = collector + initiatedCollectors[key] = collector + } + } + + e.Collectors = collectors + + return e, nil +} - return &ElasticsearchCollector{Collectors: collectors, logger: logger}, nil +func WithElasticsearchURL(esURL *url.URL) Option { + return func(e *ElasticsearchCollector) error { + e.esURL = esURL + return nil + } +} + +func WithHTTPClient(hc *http.Client) Option { + return func(e *ElasticsearchCollector) error { + e.httpClient = hc + return nil + } } // Describe implements the prometheus.Collector interface. @@ -89,7 +183,11 @@ func execute(ctx context.Context, name string, c Collector, ch chan<- prometheus var success float64 if err != nil { - _ = level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) + if IsNoDataError(err) { + _ = level.Debug(logger).Log("msg", "collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err) + } else { + _ = level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) + } success = 0 } else { _ = level.Debug(logger).Log("msg", "collector succeeded", "name", name, "duration_seconds", duration.Seconds()) @@ -98,3 +196,22 @@ func execute(ctx context.Context, name string, c Collector, ch chan<- prometheus ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name) ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name) } + +// collectorFlagAction generates a new action function for the given collector +// to track whether it has been explicitly enabled or disabled from the command line. +// A new action function is needed for each collector flag because the ParseContext +// does not contain information about which flag called the action. +// See: https://github.com/alecthomas/kingpin/issues/294 +func collectorFlagAction(collector string) func(ctx *kingpin.ParseContext) error { + return func(ctx *kingpin.ParseContext) error { + forcedCollectors[collector] = true + return nil + } +} + +// ErrNoData indicates the collector found no data to collect, but had no other error. +var ErrNoData = errors.New("collector returned no data") + +func IsNoDataError(err error) bool { + return err == ErrNoData +} diff --git a/collector/shards.go b/collector/shards.go new file mode 100644 index 00000000..31f8934a --- /dev/null +++ b/collector/shards.go @@ -0,0 +1,176 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "path" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + defaultNodeShardLabels = []string{"node"} + + defaultNodeShardLabelValues = func(node string) []string { + return []string{ + node, + } + } +) + +// ShardResponse has shard's node and index info +type ShardResponse struct { + Index string `json:"index"` + Shard string `json:"shard"` + Node string `json:"node"` +} + +// Shards information struct +type Shards struct { + logger log.Logger + client *http.Client + url *url.URL + + nodeShardMetrics []*nodeShardMetric + jsonParseFailures prometheus.Counter +} + +type nodeShardMetric struct { + Type prometheus.ValueType + Desc *prometheus.Desc + Value func(shards float64) float64 + Labels func(node string) []string +} + +// NewShards defines Shards Prometheus metrics +func NewShards(logger log.Logger, client *http.Client, url *url.URL) *Shards { + return &Shards{ + logger: logger, + client: client, + url: url, + + nodeShardMetrics: []*nodeShardMetric{ + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "node_shards", "total"), + "Total shards per node", + defaultNodeShardLabels, nil, + ), + Value: func(shards float64) float64 { + return shards + }, + Labels: defaultNodeShardLabelValues, + }}, + + jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{ + Name: prometheus.BuildFQName(namespace, "node_shards", "json_parse_failures"), + Help: "Number of errors while parsing JSON.", + }), + } +} + +// Describe Shards +func (s *Shards) Describe(ch chan<- *prometheus.Desc) { + ch <- s.jsonParseFailures.Desc() + + for _, metric := range s.nodeShardMetrics { + ch <- metric.Desc + } +} + +func (s *Shards) getAndParseURL(u *url.URL) ([]ShardResponse, error) { + res, err := s.client.Get(u.String()) + if err != nil { + return nil, fmt.Errorf("failed to get from %s://%s:%s%s: %s", + u.Scheme, u.Hostname(), u.Port(), u.Path, err) + } + + defer func() { + err = res.Body.Close() + if err != nil { + _ = level.Warn(s.logger).Log( + "msg", "failed to close http.Client", + "err", err, + ) + } + }() + + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode) + } + var sfr []ShardResponse + if err := json.NewDecoder(res.Body).Decode(&sfr); err != nil { + s.jsonParseFailures.Inc() + return nil, err + } + return sfr, nil +} + +func (s *Shards) fetchAndDecodeShards() ([]ShardResponse, error) { + + u := *s.url + u.Path = path.Join(u.Path, "/_cat/shards") + q := u.Query() + q.Set("format", "json") + u.RawQuery = q.Encode() + sfr, err := s.getAndParseURL(&u) + if err != nil { + return sfr, err + } + return sfr, err +} + +// Collect number of shards on each nodes +func (s *Shards) Collect(ch chan<- prometheus.Metric) { + + defer func() { + ch <- s.jsonParseFailures + }() + + sr, err := s.fetchAndDecodeShards() + if err != nil { + _ = level.Warn(s.logger).Log( + "msg", "failed to fetch and decode node shards stats", + "err", err, + ) + return + } + + nodeShards := make(map[string]float64) + + for _, shard := range sr { + if val, ok := nodeShards[shard.Node]; ok { + nodeShards[shard.Node] = val + 1 + } else { + nodeShards[shard.Node] = 1 + } + } + + for node, shards := range nodeShards { + for _, metric := range s.nodeShardMetrics { + ch <- prometheus.MustNewConstMetric( + metric.Desc, + metric.Type, + metric.Value(shards), + metric.Labels(node)..., + ) + } + } +} diff --git a/collector/slm.go b/collector/slm.go new file mode 100644 index 00000000..84465517 --- /dev/null +++ b/collector/slm.go @@ -0,0 +1,401 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "net/url" + "path" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" +) + +type policyMetric struct { + Type prometheus.ValueType + Desc *prometheus.Desc + Value func(policyStats PolicyStats) float64 + Labels func(policyStats PolicyStats) []string +} + +type slmMetric struct { + Type prometheus.ValueType + Desc *prometheus.Desc + Value func(slmStats SLMStatsResponse) float64 +} + +type slmStatusMetric struct { + Type prometheus.ValueType + Desc *prometheus.Desc + Value func(slmStatus SLMStatusResponse, operationMode string) float64 + Labels func(operationMode string) []string +} + +var ( + defaultPolicyLabels = []string{"policy"} + defaultPolicyLabelValues = func(policyStats PolicyStats) []string { + return []string{policyStats.Policy} + } + + statuses = []string{"RUNNING", "STOPPING", "STOPPED"} +) + +// SLM information struct +type SLM struct { + logger log.Logger + client *http.Client + url *url.URL + + up prometheus.Gauge + totalScrapes, jsonParseFailures prometheus.Counter + + slmMetrics []*slmMetric + policyMetrics []*policyMetric + slmStatusMetric *slmStatusMetric +} + +// NewSLM defines SLM Prometheus metrics +func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM { + return &SLM{ + logger: logger, + client: client, + url: url, + + up: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: prometheus.BuildFQName(namespace, "slm_stats", "up"), + Help: "Was the last scrape of the ElasticSearch SLM endpoint successful.", + }), + totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{ + Name: prometheus.BuildFQName(namespace, "slm_stats", "total_scrapes"), + Help: "Current total ElasticSearch SLM scrapes.", + }), + jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{ + Name: prometheus.BuildFQName(namespace, "slm_stats", "json_parse_failures"), + Help: "Number of errors while parsing JSON.", + }), + slmMetrics: []*slmMetric{ + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "retention_runs_total"), + "Total retention runs", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.RetentionRuns) + }, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "retention_failed_total"), + "Total failed retention runs", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.RetentionFailed) + }, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "retention_timed_out_total"), + "Total timed out retention runs", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.RetentionTimedOut) + }, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "retention_deletion_time_seconds"), + "Retention run deletion time", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.RetentionDeletionTimeMillis) / 1000 + }, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_taken_total"), + "Total snapshots taken", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.TotalSnapshotsTaken) + }, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_failed_total"), + "Total snapshots failed", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.TotalSnapshotsFailed) + }, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_deleted_total"), + "Total snapshots deleted", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.TotalSnapshotsDeleted) + }, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "total_snapshot_deletion_failures_total"), + "Total snapshot deletion failures", + nil, nil, + ), + Value: func(slmStats SLMStatsResponse) float64 { + return float64(slmStats.TotalSnapshotDeletionFailures) + }, + }, + }, + policyMetrics: []*policyMetric{ + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "snapshots_taken_total"), + "Total snapshots taken", + defaultPolicyLabels, nil, + ), + Value: func(policyStats PolicyStats) float64 { + return float64(policyStats.SnapshotsTaken) + }, + Labels: defaultPolicyLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "snapshots_failed_total"), + "Total snapshots failed", + defaultPolicyLabels, nil, + ), + Value: func(policyStats PolicyStats) float64 { + return float64(policyStats.SnapshotsFailed) + }, + Labels: defaultPolicyLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "snapshots_deleted_total"), + "Total snapshots deleted", + defaultPolicyLabels, nil, + ), + Value: func(policyStats PolicyStats) float64 { + return float64(policyStats.SnapshotsDeleted) + }, + Labels: defaultPolicyLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "snapshot_deletion_failures_total"), + "Total snapshot deletion failures", + defaultPolicyLabels, nil, + ), + Value: func(policyStats PolicyStats) float64 { + return float64(policyStats.SnapshotDeletionFailures) + }, + Labels: defaultPolicyLabelValues, + }, + }, + slmStatusMetric: &slmStatusMetric{ + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "slm_stats", "operation_mode"), + "Operating status of SLM", + []string{"operation_mode"}, nil, + ), + Value: func(slmStatus SLMStatusResponse, operationMode string) float64 { + if slmStatus.OperationMode == operationMode { + return 1 + } + return 0 + }, + }, + } +} + +// Describe adds SLM metrics descriptions +func (s *SLM) Describe(ch chan<- *prometheus.Desc) { + ch <- s.slmStatusMetric.Desc + + for _, metric := range s.slmMetrics { + ch <- metric.Desc + } + + for _, metric := range s.policyMetrics { + ch <- metric.Desc + } + + ch <- s.up.Desc() + ch <- s.totalScrapes.Desc() + ch <- s.jsonParseFailures.Desc() +} + +func (s *SLM) fetchAndDecodeSLMStats() (SLMStatsResponse, error) { + var ssr SLMStatsResponse + + u := *s.url + u.Path = path.Join(u.Path, "/_slm/stats") + res, err := s.client.Get(u.String()) + if err != nil { + return ssr, fmt.Errorf("failed to get slm stats health from %s://%s:%s%s: %s", + u.Scheme, u.Hostname(), u.Port(), u.Path, err) + } + + defer func() { + err = res.Body.Close() + if err != nil { + _ = level.Warn(s.logger).Log( + "msg", "failed to close http.Client", + "err", err, + ) + } + }() + + if res.StatusCode != http.StatusOK { + return ssr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode) + } + + bts, err := ioutil.ReadAll(res.Body) + if err != nil { + s.jsonParseFailures.Inc() + return ssr, err + } + + if err := json.Unmarshal(bts, &ssr); err != nil { + s.jsonParseFailures.Inc() + return ssr, err + } + + return ssr, nil +} + +func (s *SLM) fetchAndDecodeSLMStatus() (SLMStatusResponse, error) { + var ssr SLMStatusResponse + + u := *s.url + u.Path = path.Join(u.Path, "/_slm/status") + res, err := s.client.Get(u.String()) + if err != nil { + return ssr, fmt.Errorf("failed to get slm status from %s://%s:%s%s: %s", + u.Scheme, u.Hostname(), u.Port(), u.Path, err) + } + + defer func() { + err = res.Body.Close() + if err != nil { + _ = level.Warn(s.logger).Log( + "msg", "failed to close http.Client", + "err", err, + ) + } + }() + + if res.StatusCode != http.StatusOK { + return ssr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode) + } + + bts, err := ioutil.ReadAll(res.Body) + if err != nil { + s.jsonParseFailures.Inc() + return ssr, err + } + + if err := json.Unmarshal(bts, &ssr); err != nil { + s.jsonParseFailures.Inc() + return ssr, err + } + + return ssr, nil +} + +// Collect gets SLM metric values +func (s *SLM) Collect(ch chan<- prometheus.Metric) { + s.totalScrapes.Inc() + defer func() { + ch <- s.up + ch <- s.totalScrapes + ch <- s.jsonParseFailures + }() + + slmStatusResp, err := s.fetchAndDecodeSLMStatus() + if err != nil { + s.up.Set(0) + _ = level.Warn(s.logger).Log( + "msg", "failed to fetch and decode slm status", + "err", err, + ) + return + } + + slmStatsResp, err := s.fetchAndDecodeSLMStats() + if err != nil { + s.up.Set(0) + _ = level.Warn(s.logger).Log( + "msg", "failed to fetch and decode slm stats", + "err", err, + ) + return + } + + s.up.Set(1) + + for _, status := range statuses { + ch <- prometheus.MustNewConstMetric( + s.slmStatusMetric.Desc, + s.slmStatusMetric.Type, + s.slmStatusMetric.Value(slmStatusResp, status), + status, + ) + } + + for _, metric := range s.slmMetrics { + ch <- prometheus.MustNewConstMetric( + metric.Desc, + metric.Type, + metric.Value(slmStatsResp), + ) + } + + for _, metric := range s.policyMetrics { + for _, policy := range slmStatsResp.PolicyStats { + ch <- prometheus.MustNewConstMetric( + metric.Desc, + metric.Type, + metric.Value(policy), + metric.Labels(policy)..., + ) + } + } +} diff --git a/collector/slm_response.go b/collector/slm_response.go new file mode 100644 index 00000000..b1cfc1b1 --- /dev/null +++ b/collector/slm_response.go @@ -0,0 +1,42 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +// SLMStatsResponse is a representation of the SLM stats +type SLMStatsResponse struct { + RetentionRuns int64 `json:"retention_runs"` + RetentionFailed int64 `json:"retention_failed"` + RetentionTimedOut int64 `json:"retention_timed_out"` + RetentionDeletionTime string `json:"retention_deletion_time"` + RetentionDeletionTimeMillis int64 `json:"retention_deletion_time_millis"` + TotalSnapshotsTaken int64 `json:"total_snapshots_taken"` + TotalSnapshotsFailed int64 `json:"total_snapshots_failed"` + TotalSnapshotsDeleted int64 `json:"total_snapshots_deleted"` + TotalSnapshotDeletionFailures int64 `json:"total_snapshot_deletion_failures"` + PolicyStats []PolicyStats `json:"policy_stats"` +} + +// PolicyStats is a representation of SLM stats for specific policies +type PolicyStats struct { + Policy string `json:"policy"` + SnapshotsTaken int64 `json:"snapshots_taken"` + SnapshotsFailed int64 `json:"snapshots_failed"` + SnapshotsDeleted int64 `json:"snapshots_deleted"` + SnapshotDeletionFailures int64 `json:"snapshot_deletion_failures"` +} + +// SLMStatusResponse is a representation of the SLM status +type SLMStatusResponse struct { + OperationMode string `json:"operation_mode"` +} diff --git a/collector/slm_test.go b/collector/slm_test.go new file mode 100644 index 00000000..5bbea562 --- /dev/null +++ b/collector/slm_test.go @@ -0,0 +1,65 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/go-kit/log" +) + +func TestSLM(t *testing.T) { + // Testcases created using: + + // docker run -d -p 9200:9200 -e discovery.type=single-node -e path.repo=/tmp/backups docker.elastic.co/elasticsearch/elasticsearch:7.15.0-arm64 + // curl -XPUT http://127.0.0.1:9200/_snapshot/my_repository -H 'Content-Type: application/json' -d '{"type":"url","settings":{"url":"file:/tmp/backups"}}' + // curl -XPUT http://127.0.0.1:9200/_slm/policy/everything -H 'Content-Type: application/json' -d '{"schedule":"0 */15 * * * ?","name":"","repository":"my_repository","config":{"indices":".*","include_global_state":true,"ignore_unavailable":true},"retention":{"expire_after":"7d"}}' + // curl http://127.0.0.1:9200/_slm/stats (Numbers manually tweaked) + + tcs := map[string]string{ + "7.15.0": `{"retention_runs":9,"retention_failed":0,"retention_timed_out":0,"retention_deletion_time":"1.2m","retention_deletion_time_millis":72491,"total_snapshots_taken":103,"total_snapshots_failed":2,"total_snapshots_deleted":20,"total_snapshot_deletion_failures":0,"policy_stats":[{"policy":"everything","snapshots_taken":50,"snapshots_failed":2,"snapshots_deleted":20,"snapshot_deletion_failures":0}]}`, + } + for ver, out := range tcs { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, out) + })) + defer ts.Close() + + u, err := url.Parse(ts.URL) + if err != nil { + t.Fatalf("Failed to parse URL: %s", err) + } + s := NewSLM(log.NewNopLogger(), http.DefaultClient, u) + stats, err := s.fetchAndDecodeSLMStats() + if err != nil { + t.Fatalf("Failed to fetch or decode snapshots stats: %s", err) + } + t.Logf("[%s] SLM Response: %+v", ver, stats) + slmStats := stats + policyStats := stats.PolicyStats[0] + + if slmStats.TotalSnapshotsTaken != 103 { + t.Errorf("Bad number of total snapshots taken") + } + + if policyStats.SnapshotsTaken != 50 { + t.Errorf("Bad number of policy snapshots taken") + } + } + +} diff --git a/examples/grafana/dashboard.json b/examples/grafana/dashboard.json index 08d10084..66f2ab1a 100644 --- a/examples/grafana/dashboard.json +++ b/examples/grafana/dashboard.json @@ -1416,7 +1416,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(node_memory_MemTotal{cluster=~\"$cluster\"})", + "expr": "sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"})", "intervalFactor": 2, "legendFormat": "", "metric": "", @@ -1501,7 +1501,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(node_memory_MemFree{cluster=~\"$cluster\"})", + "expr": "sum(node_memory_MemFree_bytes{cluster=~\"$cluster\"})", "intervalFactor": 2, "legendFormat": "", "refId": "A", @@ -1585,7 +1585,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(node_memory_MemAvailable{cluster=~\"$cluster\"})", + "expr": "sum(node_memory_MemAvailable_bytes{cluster=~\"$cluster\"})", "intervalFactor": 2, "legendFormat": "", "refId": "A", @@ -1829,7 +1829,7 @@ } ], "dsType": "elasticsearch", - "expr": "avg(irate(node_cpu{cluster=~\"$cluster\"}[10s])) by(mode) *100", + "expr": "avg(irate(node_cpu_seconds_total{cluster=~\"$cluster\"}[60s])) by(mode) *100", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ mode }}", @@ -2247,4 +2247,4 @@ "title": "ElasticSearch", "uid": "n_nxrE_mk", "version": 2 -} \ No newline at end of file +} diff --git a/go.mod b/go.mod index 43b6a591..20a1a5d7 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,26 @@ module github.com/prometheus-community/elasticsearch_exporter -go 1.16 +go 1.17 require ( github.com/blang/semver v3.5.2-0.20180723201105-3c1074078d32+incompatible github.com/go-kit/log v0.2.0 github.com/imdario/mergo v0.3.12 - github.com/prometheus/client_golang v1.11.0 - github.com/prometheus/common v0.31.1 + github.com/prometheus/client_golang v1.12.1 + github.com/prometheus/common v0.34.0 gopkg.in/alecthomas/kingpin.v2 v2.2.6 ) + +require ( + github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect + github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/go-logfmt/logfmt v0.5.1 // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect + github.com/prometheus/client_model v0.2.0 // indirect + github.com/prometheus/procfs v0.7.3 // indirect + golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect + google.golang.org/protobuf v1.26.0 // indirect +) diff --git a/go.sum b/go.sum index 85bc969a..9dd14644 100644 --- a/go.sum +++ b/go.sum @@ -47,8 +47,9 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/blang/semver v3.5.2-0.20180723201105-3c1074078d32+incompatible h1:8fBbhRkI5/0ocLFbrhPgnGUm0ogc+Gko1cRodPWDKX4= github.com/blang/semver v3.5.2-0.20180723201105-3c1074078d32+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= +github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -100,8 +101,10 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -136,6 +139,7 @@ github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= @@ -153,6 +157,7 @@ github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -163,8 +168,9 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= -github.com/prometheus/client_golang v1.11.0 h1:HNkLOAEQMIDv/K+04rukrLx6ch7msSRwf3/SASFAGtQ= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= +github.com/prometheus/client_golang v1.12.1 h1:ZiaPsmm9uiBeaSMRznKsCDNtPCS0T3JVDGF+06gjBzk= +github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -173,13 +179,15 @@ github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6T github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= -github.com/prometheus/common v0.31.1 h1:d18hG4PkHnNAKNMOmFuXFaiY8Us0nird/2m60uS1AMs= -github.com/prometheus/common v0.31.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= +github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= +github.com/prometheus/common v0.34.0 h1:RBmGO9d/FVjqHT0yUGQwBJhkwKV+wPCn7KGpvfab0uE= +github.com/prometheus/common v0.34.0/go.mod h1:gB3sOl7P0TvJabZpLY5uQMpUqRCPPCyRLCZYc7JZTNE= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= +github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= @@ -263,12 +271,15 @@ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -312,15 +323,20 @@ golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40 h1:JWgyZ1qgdTaF3N3oxC+MdTV7qvEEgHo3otj+HB5CM7Q= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0= +golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -442,8 +458,9 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1 h1:7QnIQpGRHE5RnLKnESfDoxm2dTapTZua5a0kS0A+VXQ= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/main.go b/main.go index adaa47ac..b9407be0 100644 --- a/main.go +++ b/main.go @@ -85,6 +85,9 @@ func main() { esExportSnapshots = kingpin.Flag("es.snapshots", "Export stats for the cluster snapshots."). Default("false").Bool() + esExportSLM = kingpin.Flag("es.slm", + "Export stats for SLM snapshots."). + Default("false").Bool() esClusterInfoInterval = kingpin.Flag("es.clusterinfo.interval", "Cluster info update interval for the cluster label"). Default("5m").Duration() @@ -160,6 +163,20 @@ func main() { // version metric prometheus.MustRegister(version.NewCollector(name)) + // create the exporter + exporter, err := collector.NewElasticsearchCollector( + logger, + []string{}, + collector.WithElasticsearchURL(esURL), + collector.WithHTTPClient(httpClient), + ) + if err != nil { + _ = level.Error(logger).Log("msg", "failed to create Elasticsearch collector", "err", err) + os.Exit(1) + } + prometheus.MustRegister(exporter) + + // TODO(@sysadmind): Remove this when we have a better way to get the cluster name to down stream collectors. // cluster info retriever clusterInfoRetriever := clusterinfo.New(logger, httpClient, esURL, *esClusterInfoInterval) @@ -179,6 +196,10 @@ func main() { prometheus.MustRegister(collector.NewSnapshots(logger, httpClient, esURL)) } + if *esExportSLM { + prometheus.MustRegister(collector.NewSLM(logger, httpClient, esURL)) + } + if *esExportClusterSettings { prometheus.MustRegister(collector.NewClusterSettings(logger, httpClient, esURL)) }