From 6068c5d69ddd00517d4beb4b6676b0bca875f7b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Fri, 31 Oct 2025 09:22:02 +0100 Subject: [PATCH 1/2] [reveiver/prometheusreceiver] fix stalness tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the dependency upgrade #43890 there was the PR https://github.com/prometheus/prometheus/pull/16429 which changed the provisions for staleness tracking. Now the code only does the tracking if the series was successfully appended in storage. This is indicated by a non zero storage reference returned by the appender. Since we used to return 0 in all cases, the staleness tracking is now broken. Solution is to return a fake reference of 1, just to indicate success. Alternative solution would be to make a unique reference for each series label set, but that incurs an overhead which we can avoid. Signed-off-by: György Krajcsovits --- .../prometheus-receiver-fix-stalenss.yaml | 27 +++++++++++++++++++ .../internal/staleness_end_to_end_test.go | 1 - .../internal/transaction.go | 14 ++++++++-- .../metrics_receiver_non_numerical_test.go | 1 - .../metrics_receiver_protobuf_test.go | 1 - 5 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 .chloggen/prometheus-receiver-fix-stalenss.yaml diff --git a/.chloggen/prometheus-receiver-fix-stalenss.yaml b/.chloggen/prometheus-receiver-fix-stalenss.yaml new file mode 100644 index 0000000000000..d9c1c94ee8b92 --- /dev/null +++ b/.chloggen/prometheus-receiver-fix-stalenss.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: receiver/prometheus + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix missing staleness tracking leading to missing no recorded value data points. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [43893] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/receiver/prometheusreceiver/internal/staleness_end_to_end_test.go b/receiver/prometheusreceiver/internal/staleness_end_to_end_test.go index a400fc5492620..8e25918fe220e 100644 --- a/receiver/prometheusreceiver/internal/staleness_end_to_end_test.go +++ b/receiver/prometheusreceiver/internal/staleness_end_to_end_test.go @@ -43,7 +43,6 @@ import ( // Prometheus remotewrite exporter that staleness markers are emitted per timeseries. // See https://github.com/open-telemetry/opentelemetry-collector/issues/3413 func TestStalenessMarkersEndToEnd(t *testing.T) { - t.Skip("Skipping test until https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/43893 is resolved") if testing.Short() { t.Skip("This test can take a long time") } diff --git a/receiver/prometheusreceiver/internal/transaction.go b/receiver/prometheusreceiver/internal/transaction.go index 1d68db9b7f2d6..0dc8742a8dc26 100644 --- a/receiver/prometheusreceiver/internal/transaction.go +++ b/receiver/prometheusreceiver/internal/transaction.go @@ -187,9 +187,14 @@ func (t *transaction) Append(_ storage.SeriesRef, ls labels.Labels, atMs int64, err = curMF.addSeries(seriesRef, metricName, ls, atMs, val) if err != nil { t.logger.Warn("failed to add datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls)) + // never return errors, as that fails the while scrape + // return ref==0 indicating that the series was not added + return 0, nil } - return 0, nil // never return errors, as that fails the whole scrape + // never return errors, as that fails the whole scrape + // return ref==1 indicating that the series was added and needs staleness tracking + return 1, nil } // detectAndStoreNativeHistogramStaleness returns true if it detects @@ -350,9 +355,14 @@ func (t *transaction) AppendHistogram(_ storage.SeriesRef, ls labels.Labels, atM } if err != nil { t.logger.Warn("failed to add histogram datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls)) + // never return errors, as that fails the while scrape + // return ref==0 indicating that the series was not added + return 0, nil } - return 0, nil // never return errors, as that fails the whole scrape + // never return errors, as that fails the whole scrape + // return ref==1 indicating that the series was added and needs staleness tracking + return 1, nil } func (t *transaction) AppendCTZeroSample(_ storage.SeriesRef, ls labels.Labels, atMs, ctMs int64) (storage.SeriesRef, error) { diff --git a/receiver/prometheusreceiver/metrics_receiver_non_numerical_test.go b/receiver/prometheusreceiver/metrics_receiver_non_numerical_test.go index e646b8fd99b64..11564da6062c9 100644 --- a/receiver/prometheusreceiver/metrics_receiver_non_numerical_test.go +++ b/receiver/prometheusreceiver/metrics_receiver_non_numerical_test.go @@ -46,7 +46,6 @@ var totalScrapes = 10 // TestStaleNaNs validates that staleness marker gets generated when the timeseries is no longer present func TestStaleNaNs(t *testing.T) { - t.Skip("Skipping test until https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/43893 is resolved") var mockResponses []mockPrometheusResponse for i := range totalScrapes { if i%2 == 0 { diff --git a/receiver/prometheusreceiver/metrics_receiver_protobuf_test.go b/receiver/prometheusreceiver/metrics_receiver_protobuf_test.go index 5a9545478450f..5c4caf5f22c68 100644 --- a/receiver/prometheusreceiver/metrics_receiver_protobuf_test.go +++ b/receiver/prometheusreceiver/metrics_receiver_protobuf_test.go @@ -529,7 +529,6 @@ func TestNativeVsClassicHistogramScrapeViaProtobuf(t *testing.T) { } func TestStaleExponentialHistogram(t *testing.T) { - t.Skip("Skipping test until https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/43893 is resolved") mf := &dto.MetricFamily{ Name: "test_counter", Type: dto.MetricType_COUNTER, From 320ea0000db2282dc0c7f844b88309db6641f8f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Fri, 31 Oct 2025 10:04:53 +0100 Subject: [PATCH 2/2] Follow up change in series tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are now correctly tracking what series were added to "storage". Which means that the number of series added is only 13 on the first scrape. Debug logs: Running tool: /home/krajo/opt/go/bin/go test -timeout 30s -run ^TestEndToEndSummarySupport$ github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusexporter number of series added: 13 number of series added: 0 number of series added: 0 number of series added: 0 number of series added: 0 number of series added: 0 number of series added: 0 number of series added: 0 Signed-off-by: György Krajcsovits --- exporter/prometheusexporter/end_to_end_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/prometheusexporter/end_to_end_test.go b/exporter/prometheusexporter/end_to_end_test.go index dffd799dd1c19..928eb722d34f8 100644 --- a/exporter/prometheusexporter/end_to_end_test.go +++ b/exporter/prometheusexporter/end_to_end_test.go @@ -138,7 +138,7 @@ func TestEndToEndSummarySupport(t *testing.T) { `test_scrape_samples_scraped.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". 13 .*`, `. HELP test_scrape_series_added The approximate number of new series in this scrape`, `. TYPE test_scrape_series_added gauge`, - `test_scrape_series_added.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". 13 .*`, + `test_scrape_series_added.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". (0|13) .*`, `. HELP test_up The scraping was successful`, `. TYPE test_up gauge`, `test_up.instance="127.0.0.1:.*",job="otel-collector",otel_scope_name=\"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver\",otel_scope_schema_url=\"\",otel_scope_version=\"latest\". 1 .*`,