diff --git a/receiver/prometheusreceiver/internal/metrics.go b/receiver/prometheusreceiver/internal/metrics.go new file mode 100644 index 000000000000..12cbdeefae48 --- /dev/null +++ b/receiver/prometheusreceiver/internal/metrics.go @@ -0,0 +1,54 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "go.opencensus.io/metric" + "go.opencensus.io/metric/metricdata" + "go.opencensus.io/metric/metricproducer" +) + +var ( + metricsRegistry = metric.NewRegistry() + + upGauge, err = metricsRegistry.AddFloat64Gauge( + "up", + metric.WithDescription("Whether the endpoint is alive or not"), + metric.WithLabelKeys("instance"), + metric.WithUnit(metricdata.UnitDimensionless)) +) + +func init() { + if err != nil { + panic(err) + } + metricproducer.GlobalManager().AddProducer(metricsRegistry) +} + +func recordInstanceAsUp(instanceValue string) { + ent, err := upGauge.GetEntry(metricdata.NewLabelValue(instanceValue)) + if err != nil { + panic(err) + } + ent.Set(1) +} + +func recordInstanceAsDown(instanceValue string) { + ent, err := upGauge.GetEntry(metricdata.NewLabelValue(instanceValue)) + if err != nil { + panic(err) + } + ent.Set(0) +} diff --git a/receiver/prometheusreceiver/internal/metricsbuilder.go b/receiver/prometheusreceiver/internal/metricsbuilder.go index 3ffe7c159d62..72fa0a19ad97 100644 --- a/receiver/prometheusreceiver/internal/metricsbuilder.go +++ b/receiver/prometheusreceiver/internal/metricsbuilder.go @@ -93,29 +93,40 @@ func (b *metricBuilder) AddDataPoint(ls labels.Labels, t int64, v float64) error b.numTimeseries++ b.droppedTimeseries++ return errMetricNameNotFound + case isInternalMetric(metricName): b.hasInternalMetric = true lm := ls.Map() delete(lm, model.MetricNameLabel) + if metricName != scrapeUpMetricName { + return nil + } + // See https://www.prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series // up: 1 if the instance is healthy, i.e. reachable, or 0 if the scrape failed. - if metricName == scrapeUpMetricName && v != 1.0 { - if v == 0.0 { - b.logger.Warn("Failed to scrape Prometheus endpoint", - zap.Int64("scrape_timestamp", t), - zap.String("target_labels", fmt.Sprintf("%v", lm))) - } else { - b.logger.Warn("The 'up' metric contains invalid value", - zap.Float64("value", v), - zap.Int64("scrape_timestamp", t), - zap.String("target_labels", fmt.Sprintf("%v", lm))) - } + instanceValue := lm["instance"] + switch v { + case 1.0: // The instance is up! + recordInstanceAsUp(instanceValue) + + case 0.0: // The instance is definitely down. + recordInstanceAsDown(instanceValue) + b.logger.Warn("Failed to scrape Prometheus endpoint", + zap.Int64("scrape_timestamp", t), + zap.String("target_labels", fmt.Sprintf("%v", lm))) + + default: // We got an invalid value for "up" + recordInstanceAsDown(instanceValue) + b.logger.Warn("The 'up' metric contains invalid value", + zap.Float64("value", v), + zap.Int64("scrape_timestamp", t), + zap.String("target_labels", fmt.Sprintf("%v", lm))) } return nil + case b.useStartTimeMetric && b.matchStartTimeMetric(metricName): b.startTime = v } - b.hasData = true if b.currentMf != nil && !b.currentMf.IsSameFamily(metricName) {