From e3f52942a3e7f340ab5f5f798db63d8ad4a43778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 18 Jun 2025 16:12:01 +0200 Subject: [PATCH 1/7] test(prometheusreceiver): add unit test for target allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test checks target allocator + scrape e2e. The test fails if: - Prometheus logged something above the WARN level - Prometheus reports failed scrape pools in metrics - No metrics are in the result Signed-off-by: György Krajcsovits --- .../metrics_receiver_target_allocator_test.go | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go diff --git a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go new file mode 100644 index 0000000000000..1bde1bfdb97be --- /dev/null +++ b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go @@ -0,0 +1,182 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package prometheusreceiver + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" + promConfig "github.com/prometheus/prometheus/config" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "gopkg.in/yaml.v3" + + promTestUtil "github.com/prometheus/client_golang/prometheus/testutil" + promHTTP "github.com/prometheus/prometheus/discovery/http" + promTG "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/config/confighttp" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/receiver/receivertest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver/internal/metadata" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver/targetallocator" +) + +const exportedMetrics = ` +# HELP test_gauge0 This is my gauge +# TYPE test_gauge0 gauge +test_gauge0{label1="value1",label2="value2"} 10 +` + +func TestTargetAllocatorConfigLoad(t *testing.T) { + // Make a prometheus exporter that can serve some metrics. + mockProm := newMockPrometheus(map[string][]mockPrometheusResponse{ + "/metrics": { + { + code: 200, + data: exportedMetrics, + }, + }, + }) + t.Cleanup(func() { mockProm.srv.Close() }) + + // Fake TargetAllocator to serve discovery and targets. + tas := newMockTargetAllocator(mockProm.srv.Listener.Addr().String()) + t.Cleanup(func() { tas.srv.Close() }) + + promSDConfig := &promHTTP.SDConfig{ + RefreshInterval: model.Duration(30 * time.Second), + URL: tas.srv.URL, + } + + pCfg, err := promConfig.Load(string(""), promslog.NewNopLogger()) + require.NoError(t, err) + + config := &Config{ + PrometheusConfig: (*PromConfig)(pCfg), + StartTimeMetricRegex: "", + TargetAllocator: &targetallocator.Config{ + ClientConfig: confighttp.ClientConfig{ + Endpoint: tas.srv.URL, + }, + CollectorID: "1", + HTTPSDConfig: (*targetallocator.PromHTTPSDConfig)(promSDConfig), + Interval: 60 * time.Second, + }, + } + + cms := new(consumertest.MetricsSink) + settings := receivertest.NewNopSettings(metadata.Type) + logsOverWarn := atomic.Int64{} + settings.Logger, err = zap.NewDevelopment(zap.Hooks(func(logentry zapcore.Entry) error { + if logentry.Level >= zapcore.WarnLevel { + logsOverWarn.Add(1) + } + return nil + })) + require.NoError(t, err) + receiver, err := newPrometheusReceiver(settings, config, cms) + require.NoError(t, err, "Failed to create Prometheus receiver") + receiver.skipOffsetting = true + + require.NoError(t, receiver.Start(context.Background(), componenttest.NewNopHost()), "Failed to start Prometheus receiver") + t.Cleanup(func() { + require.NoError(t, receiver.Shutdown(context.Background())) + }) + + metricsCount := 0 + require.Eventually(t, func() bool { + metrics := cms.AllMetrics() + // Scrape was a success and we got metrics. + if len(metrics) > 0 { + metricsCount = len(metrics) + return true + } + // There was a log line above WARN level. + if logsOverWarn.Load() > 0 { + return true + } + return false + }, 30*time.Second, 100*time.Millisecond, "Failed to scrape the metrics via target allocator") + + require.Zero(t, logsOverWarn.Load(), "There are log messages over the WARN level, see logs") + + require.NoError(t, promTestUtil.GatherAndCompare(receiver.registry, bytes.NewBufferString(fmt.Sprintf(` + # TYPE prometheus_target_scrape_pools_failed_total counter + # HELP prometheus_target_scrape_pools_failed_total Total number of scrape pool creations that failed. + prometheus_target_scrape_pools_failed_total{receiver="%s"} 0 +`, receiver.settings.ID)), "prometheus_target_scrape_pools_failed_total"), "Prometheus scrape manager reports failed scrape pools") + + require.Positive(t, metricsCount, "No metrics were scraped even though successful") +} + +type mockTargetAllocator struct { + address string + srv *httptest.Server +} + +func newMockTargetAllocator(address string) *mockTargetAllocator { + s := &mockTargetAllocator{ + address: address, + } + srv := httptest.NewServer(s) + s.srv = srv + return s +} + +type sdResponse []*promTG.Group + +func (mp *mockTargetAllocator) ServeHTTP(rw http.ResponseWriter, req *http.Request) { + if strings.HasSuffix(req.URL.Path, "/scrape_configs") { + job := make(map[string]any) + job["job_name"] = "test" + job["metrics_path"] = "/metrics" + job["scrape_interval"] = "1s" + job["scrape_timeout"] = "500ms" + job["scrape_protocols"] = []string{"OpenMetricsText1.0.0, OpenMetricsText0.0.1, PrometheusText0.0.4"} + job["metric_name_validation_scheme"] = "legacy" + job["metric_name_escaping_scheme"] = "underscores" + + result := make(map[string]any) + result["test"] = job + + data, err := yaml.Marshal(&result) + if err != nil { + return + } + + rw.Write(data) + return + } + + sdResponse := sdResponse{ + { + Targets: []model.LabelSet{ + { + model.AddressLabel: model.LabelValue(mp.address), + model.SchemeLabel: "http", + }, + }, + }, + } + + data, err := json.Marshal(&sdResponse) + if err != nil { + return + } + rw.Header().Set("Content-Type", "application/json") + rw.Write(data) +} From 523ffc56ad688e80e9ee4ceeeabfa2cd8e777634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 18 Jun 2025 16:16:56 +0200 Subject: [PATCH 2/7] chore(prometheusreceiver): make the test fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- .../metrics_receiver_target_allocator_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go index 1bde1bfdb97be..667d808c71b37 100644 --- a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go +++ b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go @@ -147,8 +147,6 @@ func (mp *mockTargetAllocator) ServeHTTP(rw http.ResponseWriter, req *http.Reque job["scrape_interval"] = "1s" job["scrape_timeout"] = "500ms" job["scrape_protocols"] = []string{"OpenMetricsText1.0.0, OpenMetricsText0.0.1, PrometheusText0.0.4"} - job["metric_name_validation_scheme"] = "legacy" - job["metric_name_escaping_scheme"] = "underscores" result := make(map[string]any) result["test"] = job From 3ef7fe2c2232b89cadb144bdec76ae3bf780398c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 18 Jun 2025 16:34:08 +0200 Subject: [PATCH 3/7] chore(prometheusreceiver): add changelog entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- ...receiver-targetallocator-unset-config.yaml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .chloggen/prometheusreceiver-targetallocator-unset-config.yaml diff --git a/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml b/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml new file mode 100644 index 0000000000000..ee3ebdc01dd4a --- /dev/null +++ b/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: prometheusreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix invalid metric name validation error in scrape start from target allocator. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [40788] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: Prometheus made setting metric_name_validation_scheme, metric_name_escaping_scheme mandatory mandatory, use sane defaults. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] From c267d0fffe599a3cf679d5f913d1f44ac430028d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 18 Jun 2025 16:48:59 +0200 Subject: [PATCH 4/7] chore(prometheusreceiver): fix lint issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- .../metrics_receiver_target_allocator_test.go | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go index 667d808c71b37..a09f82f70004c 100644 --- a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go +++ b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go @@ -15,14 +15,10 @@ import ( "testing" "time" + promTestUtil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" "github.com/prometheus/common/promslog" promConfig "github.com/prometheus/prometheus/config" - "go.uber.org/zap" - "go.uber.org/zap/zapcore" - "gopkg.in/yaml.v3" - - promTestUtil "github.com/prometheus/client_golang/prometheus/testutil" promHTTP "github.com/prometheus/prometheus/discovery/http" promTG "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/stretchr/testify/require" @@ -30,6 +26,9 @@ import ( "go.opentelemetry.io/collector/config/confighttp" "go.opentelemetry.io/collector/consumer/consumertest" "go.opentelemetry.io/collector/receiver/receivertest" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "gopkg.in/yaml.v3" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver/internal/metadata" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver/targetallocator" @@ -58,7 +57,7 @@ func TestTargetAllocatorConfigLoad(t *testing.T) { t.Cleanup(func() { tas.srv.Close() }) promSDConfig := &promHTTP.SDConfig{ - RefreshInterval: model.Duration(30 * time.Second), + RefreshInterval: model.Duration(45 * time.Second), URL: tas.srv.URL, } @@ -137,8 +136,6 @@ func newMockTargetAllocator(address string) *mockTargetAllocator { return s } -type sdResponse []*promTG.Group - func (mp *mockTargetAllocator) ServeHTTP(rw http.ResponseWriter, req *http.Request) { if strings.HasSuffix(req.URL.Path, "/scrape_configs") { job := make(map[string]any) @@ -156,11 +153,11 @@ func (mp *mockTargetAllocator) ServeHTTP(rw http.ResponseWriter, req *http.Reque return } - rw.Write(data) + _, _ = rw.Write(data) return } - sdResponse := sdResponse{ + response := []*promTG.Group{ { Targets: []model.LabelSet{ { @@ -171,10 +168,10 @@ func (mp *mockTargetAllocator) ServeHTTP(rw http.ResponseWriter, req *http.Reque }, } - data, err := json.Marshal(&sdResponse) + data, err := json.Marshal(&response) if err != nil { return } rw.Header().Set("Content-Type", "application/json") - rw.Write(data) + _, _ = rw.Write(data) } From f69940e61eb3c2908fe5ddba63d664cd61bfd418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 18 Jun 2025 16:55:08 +0200 Subject: [PATCH 5/7] fix(prometheusreceiver): missing defaults for metric name conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set sane defaults if target allocator didn't return value for metric_name_validation_scheme metric_name_escaping_scheme The defaults are "legacy" and "underscores" Signed-off-by: György Krajcsovits --- receiver/prometheusreceiver/targetallocator/manager.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/receiver/prometheusreceiver/targetallocator/manager.go b/receiver/prometheusreceiver/targetallocator/manager.go index 1ee8e3d1bcac2..660c316bf6115 100644 --- a/receiver/prometheusreceiver/targetallocator/manager.go +++ b/receiver/prometheusreceiver/targetallocator/manager.go @@ -154,6 +154,14 @@ func (m *Manager) sync(compareHash uint64, httpClient *http.Client) (uint64, err scrapeConfig.ScrapeFallbackProtocol = promconfig.PrometheusText0_0_4 } + if scrapeConfig.MetricNameValidationScheme == "" { + scrapeConfig.MetricNameValidationScheme = promconfig.LegacyValidationConfig + } + + if scrapeConfig.MetricNameEscapingScheme == "" { + scrapeConfig.MetricNameEscapingScheme = model.EscapeUnderscores + } + m.promCfg.ScrapeConfigs = append(m.promCfg.ScrapeConfigs, scrapeConfig) } From 62b0fd872f6273cceafdb8f814709b860d99cfcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Thu, 19 Jun 2025 10:15:51 +0200 Subject: [PATCH 6/7] fix(prometheusreceiver): call prometheus to validate scrape config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in the target allocator when we receive a scrape config. This makes sure that globals are applied and defaults are set. Signed-off-by: György Krajcsovits --- .../metrics_receiver_target_allocator_test.go | 9 +++------ .../prometheusreceiver/targetallocator/manager.go | 13 +++++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go index a09f82f70004c..ae3f6deafa824 100644 --- a/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go +++ b/receiver/prometheusreceiver/metrics_receiver_target_allocator_test.go @@ -40,7 +40,7 @@ const exportedMetrics = ` test_gauge0{label1="value1",label2="value2"} 10 ` -func TestTargetAllocatorConfigLoad(t *testing.T) { +func TestTargetAllocatorProvidesEmptyScrapeConfig(t *testing.T) { // Make a prometheus exporter that can serve some metrics. mockProm := newMockPrometheus(map[string][]mockPrometheusResponse{ "/metrics": { @@ -61,7 +61,7 @@ func TestTargetAllocatorConfigLoad(t *testing.T) { URL: tas.srv.URL, } - pCfg, err := promConfig.Load(string(""), promslog.NewNopLogger()) + pCfg, err := promConfig.Load("", promslog.NewNopLogger()) require.NoError(t, err) config := &Config{ @@ -140,10 +140,7 @@ func (mp *mockTargetAllocator) ServeHTTP(rw http.ResponseWriter, req *http.Reque if strings.HasSuffix(req.URL.Path, "/scrape_configs") { job := make(map[string]any) job["job_name"] = "test" - job["metrics_path"] = "/metrics" - job["scrape_interval"] = "1s" - job["scrape_timeout"] = "500ms" - job["scrape_protocols"] = []string{"OpenMetricsText1.0.0, OpenMetricsText0.0.1, PrometheusText0.0.4"} + // Do not set any fields in the scrape config to verify that we have sane defaults. result := make(map[string]any) result["test"] = job diff --git a/receiver/prometheusreceiver/targetallocator/manager.go b/receiver/prometheusreceiver/targetallocator/manager.go index 660c316bf6115..117c2b64095eb 100644 --- a/receiver/prometheusreceiver/targetallocator/manager.go +++ b/receiver/prometheusreceiver/targetallocator/manager.go @@ -154,12 +154,17 @@ func (m *Manager) sync(compareHash uint64, httpClient *http.Client) (uint64, err scrapeConfig.ScrapeFallbackProtocol = promconfig.PrometheusText0_0_4 } - if scrapeConfig.MetricNameValidationScheme == "" { - scrapeConfig.MetricNameValidationScheme = promconfig.LegacyValidationConfig + // TODO(krajorama): remove once https://github.com/prometheus/prometheus/issues/16750 + // is solved and we're sure we want the UTF-8 as default. + if m.promCfg.GlobalConfig.MetricNameValidationScheme == "" { + m.promCfg.GlobalConfig.MetricNameValidationScheme = promconfig.LegacyValidationConfig } - if scrapeConfig.MetricNameEscapingScheme == "" { - scrapeConfig.MetricNameEscapingScheme = model.EscapeUnderscores + // Validate the scrape config and also fill in the defaults from the global config as needed. + err = scrapeConfig.Validate(m.promCfg.GlobalConfig) + if err != nil { + m.settings.Logger.Error("Failed to validate the scrape configuration", zap.Error(err)) + return 0, err } m.promCfg.ScrapeConfigs = append(m.promCfg.ScrapeConfigs, scrapeConfig) From 4eb465376b427c4cc486cdf812e65286d2c21b82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Fri, 20 Jun 2025 16:37:01 +0200 Subject: [PATCH 7/7] Update related issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- .../prometheusreceiver-targetallocator-unset-config.yaml | 2 +- receiver/prometheusreceiver/targetallocator/manager.go | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml b/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml index ee3ebdc01dd4a..a031533d2f20a 100644 --- a/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml +++ b/.chloggen/prometheusreceiver-targetallocator-unset-config.yaml @@ -10,7 +10,7 @@ component: prometheusreceiver note: Fix invalid metric name validation error in scrape start from target allocator. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. -issues: [40788] +issues: [35459,40788] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document. diff --git a/receiver/prometheusreceiver/targetallocator/manager.go b/receiver/prometheusreceiver/targetallocator/manager.go index 117c2b64095eb..c20fbf7f9b7e8 100644 --- a/receiver/prometheusreceiver/targetallocator/manager.go +++ b/receiver/prometheusreceiver/targetallocator/manager.go @@ -154,8 +154,10 @@ func (m *Manager) sync(compareHash uint64, httpClient *http.Client) (uint64, err scrapeConfig.ScrapeFallbackProtocol = promconfig.PrometheusText0_0_4 } - // TODO(krajorama): remove once https://github.com/prometheus/prometheus/issues/16750 - // is solved and we're sure we want the UTF-8 as default. + // TODO(krajorama): remove once + // https://github.com/prometheus/prometheus/issues/16750 is solved + // https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/35459 + // is implemented and is default. if m.promCfg.GlobalConfig.MetricNameValidationScheme == "" { m.promCfg.GlobalConfig.MetricNameValidationScheme = promconfig.LegacyValidationConfig }