diff --git a/CHANGELOG.md b/CHANGELOG.md index dd4c93ac051..97f28dcb9ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,13 +24,16 @@ Main (unreleased) - Added `send_traceparent` option for `tracing` config to enable traceparent header propagation. (@MyDigitalLife) +- Add `delay` option to `prometheus.exporter.cloudwatch` component to delay scraping of metrics to account for CloudWatch ingestion latency. (@tmeijn) + +- Export `yace_.*` metrics from the underlying YACE Exporter to `prometheus.exporter.cloudwatch`. (@tmeijn) + - (_Public Preview_) Additions to `database_observability.mysql` and `database_observability.postgres` components: - `explain_plans` - always send an explain plan log message for each query, even skipped or errored queries. (@rgeyer) ### Bugfixes - - (_Public Preview_) Additions to `database_observability.postgres` component: - `schema_details` - fixes collection of schema details for mixed case table names (@fridgepoet) diff --git a/docs/sources/reference/components/prometheus/prometheus.exporter.cloudwatch.md b/docs/sources/reference/components/prometheus/prometheus.exporter.cloudwatch.md index 88f1b4b2bd5..1c5269772bb 100644 --- a/docs/sources/reference/components/prometheus/prometheus.exporter.cloudwatch.md +++ b/docs/sources/reference/components/prometheus/prometheus.exporter.cloudwatch.md @@ -196,6 +196,7 @@ You can configure the `discovery` block one or multiple times to scrape metrics | `type` | `string` | CloudWatch service alias (`"alb"`, `"ec2"`, etc) or namespace name (`"AWS/EC2"`, `"AWS/S3"`, etc). Refer to [supported-services][] for a complete list. | | yes | | `custom_tags` | `map(string)` | Custom tags to be added as a list of key / value pairs. When exported to Prometheus format, the label name follows the following format: `custom_tag_{key}`. | `{}` | no | | `dimension_name_requirements` | `list(string)` | List of metric dimensions to query. Before querying metric values, the total list of metrics are filtered to only those that contain exactly this list of dimensions. An empty or undefined list results in all dimension combinations being included. | `{}` | no | +| `delay` | `duration` | Delay the start time of the CloudWatch metrics query by this duration. | `0` | no | | `nil_to_zero` | `bool` | When `true`, `NaN` metric values are converted to 0. Individual metrics can override this value in the [metric][] block. | `true` | no | | `recently_active_only` | `bool` | Only return metrics that have been active in the last 3 hours. | `false` | no | | `search_tags` | `map(string)` | List of key / value pairs to use for tag filtering (all must match). The value can be a regular expression. | `{}` | no | @@ -293,6 +294,7 @@ You can configure the `custom_namespace` block multiple times to scrape metrics | `namespace` | `string` | CloudWatch metric namespace. | | yes | | `regions` | `list(string)` | List of AWS regions. | | yes | | `custom_tags` | `map(string)` | Custom tags to be added as a list of key / value pairs. When exported to Prometheus format, the label name follows the following format: `custom_tag_{key}`. | `{}` | no | +| `delay` | `duration` | Delay the start time of the CloudWatch metrics query by this duration. | `0` | no | | `dimension_name_requirements` | `list(string)` | List of metric dimensions to query. Before querying metric values, the total list of metrics are filtered to only those that contain exactly this list of dimensions. An empty or undefined list results in all dimension combinations being included. | `{}` | no | | `nil_to_zero` | `bool` | When `true`, `NaN` metric values are converted to 0. Individual metrics can override this value in the [metric][] block. | `true` | no | | `recently_active_only` | `bool` | Only return metrics that have been active in the last 3 hours. | `false` | no | diff --git a/internal/component/prometheus/exporter/cloudwatch/config.go b/internal/component/prometheus/exporter/cloudwatch/config.go index 0741b4523c1..560f530a44d 100644 --- a/internal/component/prometheus/exporter/cloudwatch/config.go +++ b/internal/component/prometheus/exporter/cloudwatch/config.go @@ -60,6 +60,7 @@ type DiscoveryJob struct { DimensionNameRequirements []string `alloy:"dimension_name_requirements,attr,optional"` RecentlyActiveOnly bool `alloy:"recently_active_only,attr,optional"` Metrics []Metric `alloy:"metric,block"` + Delay time.Duration `alloy:"delay,attr,optional"` //TODO: Remove NilToZero, because it is deprecated upstream. NilToZero *bool `alloy:"nil_to_zero,attr,optional"` } @@ -76,6 +77,7 @@ type StaticJob struct { Namespace string `alloy:"namespace,attr"` Dimensions Dimensions `alloy:"dimensions,attr"` Metrics []Metric `alloy:"metric,block"` + Delay time.Duration `alloy:"delay,attr,optional"` //TODO: Remove NilToZero, because it is deprecated upstream. NilToZero *bool `alloy:"nil_to_zero,attr,optional"` } @@ -88,6 +90,7 @@ type CustomNamespaceJob struct { Namespace string `alloy:"namespace,attr"` RecentlyActiveOnly bool `alloy:"recently_active_only,attr,optional"` Metrics []Metric `alloy:"metric,block"` + Delay time.Duration `alloy:"delay,attr,optional"` //TODO: Remove NilToZero, because it is deprecated upstream. NilToZero *bool `alloy:"nil_to_zero,attr,optional"` } @@ -215,7 +218,6 @@ func convertToYACE(a Arguments) (yaceModel.JobsConfig, error) { if err != nil { return yaceModel.JobsConfig{}, err } - cloudwatch_exporter.PatchYACEDefaults(&modelConf) return modelConf, nil } @@ -246,7 +248,7 @@ func toYACEMetrics(ms []Metric, jobNilToZero *bool) []*yaceConf.Metric { for _, m := range ms { periodSeconds := int64(m.Period.Seconds()) lengthSeconds := periodSeconds - // If length is other than zero, that is, is configured, override the default period vaue + // If length is other than zero, that is, it is configured, override the default period value if m.Length != 0 { lengthSeconds = int64(m.Length.Seconds()) } @@ -266,10 +268,6 @@ func toYACEMetrics(ms []Metric, jobNilToZero *bool) []*yaceConf.Metric { Period: periodSeconds, Length: lengthSeconds, - // Delay moves back the time window for whom CloudWatch is requested data. Since we are already adjusting - // this with RoundingPeriod (see toYACEDiscoveryJob), we should omit this setting. - Delay: 0, - NilToZero: nilToZero, AddCloudwatchTimestamp: m.AddCloudwatchTimestamp, }) @@ -316,7 +314,10 @@ func toYACEDiscoveryJob(rj DiscoveryJob) *yaceConf.Job { // metrics, with the smallest period in the retrieved batch. RoundingPeriod: nil, RecentlyActiveOnly: rj.RecentlyActiveOnly, - Metrics: toYACEMetrics(rj.Metrics, nilToZero), + JobLevelMetricFields: yaceConf.JobLevelMetricFields{ + Delay: int64(rj.Delay.Seconds()), + }, + Metrics: toYACEMetrics(rj.Metrics, nilToZero), } return job } @@ -337,7 +338,10 @@ func toYACECustomNamespaceJob(cn CustomNamespaceJob) *yaceConf.CustomNamespace { // metrics, with the smallest period in the retrieved batch. RoundingPeriod: nil, RecentlyActiveOnly: cn.RecentlyActiveOnly, - Metrics: toYACEMetrics(cn.Metrics, nilToZero), + JobLevelMetricFields: yaceConf.JobLevelMetricFields{ + Delay: int64(cn.Delay.Seconds()), + }, + Metrics: toYACEMetrics(cn.Metrics, nilToZero), } } diff --git a/internal/component/prometheus/exporter/cloudwatch/config_test.go b/internal/component/prometheus/exporter/cloudwatch/config_test.go index 07f30f05e7e..cdbb1524d04 100644 --- a/internal/component/prometheus/exporter/cloudwatch/config_test.go +++ b/internal/component/prometheus/exporter/cloudwatch/config_test.go @@ -222,6 +222,65 @@ custom_namespace "customEC2Metrics" { } ` +const discoveryJobDelayConfig = ` +sts_region = "us-east-2" +debug = true +discovery { + type = "AWS/EC2" + regions = ["us-east-2"] + delay = "2m" + metric { + name = "CPUUtilization" + statistics = ["Average"] + period = "5m" + } + metric { + name = "NetworkIn" + statistics = ["Sum"] + period = "5m" + } +} +` + +const staticJobDelayConfig = ` +sts_region = "us-east-2" +debug = true +static "test_instance" { + regions = ["us-east-2"] + namespace = "AWS/EC2" + dimensions = { + "InstanceId" = "i-test", + } + metric { + name = "CPUUtilization" + statistics = ["Average"] + period = "5m" + } +} +` + +const customNamespaceDelayConfig = ` +sts_region = "eu-west-1" + +custom_namespace "testMetrics" { + namespace = "TestMetrics" + regions = ["us-east-1"] + delay = "30s" + + metric { + name = "metric1" + statistics = ["Average"] + period = "1m" + } + + metric { + name = "metric2" + statistics = ["Sum"] + period = "1m" + } +} +` + func TestCloudwatchComponentConfig(t *testing.T) { type testcase struct { raw string @@ -560,6 +619,110 @@ func TestCloudwatchComponentConfig(t *testing.T) { }, }, }, + "discovery job with delay": { + raw: discoveryJobDelayConfig, + expected: yaceModel.JobsConfig{ + StsRegion: "us-east-2", + DiscoveryJobs: []yaceModel.DiscoveryJob{ + { + Regions: []string{"us-east-2"}, + Roles: []yaceModel.Role{{}}, + Type: "AWS/EC2", + SearchTags: []yaceModel.SearchTag{}, + CustomTags: []yaceModel.Tag{}, + Metrics: []*yaceModel.MetricConfig{ + { + Name: "CPUUtilization", + Statistics: []string{"Average"}, + Period: 300, + Length: 300, + Delay: 120, // 2 minutes + NilToZero: defaultNilToZero, + }, + { + Name: "NetworkIn", + Statistics: []string{"Sum"}, + Period: 300, + Length: 300, + Delay: 120, // 2 minutes + NilToZero: defaultNilToZero, + }, + }, + RoundingPeriod: nil, + ExportedTagsOnMetrics: []string{}, + DimensionsRegexps: []yaceModel.DimensionsRegexp{ + { + Regexp: regexp.MustCompile("instance/(?P[^/]+)"), + DimensionsNames: []string{"InstanceId"}, + }, + }, + }, + }, + }, + }, + "static job with delay": { + raw: staticJobDelayConfig, + expected: yaceModel.JobsConfig{ + StsRegion: "us-east-2", + StaticJobs: []yaceModel.StaticJob{ + { + Name: "test_instance", + Roles: []yaceModel.Role{{}}, + Regions: []string{"us-east-2"}, + Namespace: "AWS/EC2", + CustomTags: []yaceModel.Tag{}, + Dimensions: []yaceModel.Dimension{ + { + Name: "InstanceId", + Value: "i-test", + }, + }, + Metrics: []*yaceModel.MetricConfig{{ + Name: "CPUUtilization", + Statistics: []string{"Average"}, + Period: 300, + Length: 300, + Delay: 0, // Delay not supported for static jobs + NilToZero: defaultNilToZero, + }}, + }, + }, + }, + }, + "custom namespace job with delay": { + raw: customNamespaceDelayConfig, + expected: yaceModel.JobsConfig{ + StsRegion: "eu-west-1", + CustomNamespaceJobs: []yaceModel.CustomNamespaceJob{ + { + Name: "testMetrics", + Regions: []string{"us-east-1"}, + Roles: []yaceModel.Role{{}}, + CustomTags: []yaceModel.Tag{}, + Namespace: "TestMetrics", + Metrics: []*yaceModel.MetricConfig{ + { + Name: "metric1", + Statistics: []string{"Average"}, + Period: 60, + Length: 60, + Delay: 30, // 30 seconds + NilToZero: defaultNilToZero, + }, + { + Name: "metric2", + Statistics: []string{"Sum"}, + Period: 60, + Length: 60, + Delay: 30, // 30 seconds + NilToZero: defaultNilToZero, + }, + }, + RoundingPeriod: nil, + }, + }, + }, + }, } { t.Run(name, func(t *testing.T) { args := Arguments{} diff --git a/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter.go b/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter.go index 902161bb8de..7efc87cf397 100644 --- a/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter.go +++ b/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter.go @@ -72,6 +72,11 @@ func (e *exporter) MetricsHandler() (http.Handler, error) { defer e.cachingClientFactory.Clear() reg := prometheus.NewRegistry() + for _, metric := range yace.Metrics { + if err := reg.Register(metric); err != nil { + e.logger.Debug("Could not register cloudwatch api metric") + } + } err := yace.UpdateMetrics( context.Background(), e.logger, diff --git a/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter_decoupled.go b/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter_decoupled.go index e06138a04be..99f51abb3f0 100644 --- a/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter_decoupled.go +++ b/internal/static/integrations/cloudwatch_exporter/cloudwatch_exporter_decoupled.go @@ -100,6 +100,11 @@ func (e *asyncExporter) scrape(ctx context.Context) { defer e.cachingClientFactory.Clear() reg := prometheus.NewRegistry() + for _, metric := range yace.Metrics { + if err := reg.Register(metric); err != nil { + e.logger.Debug("Could not register cloudwatch api metric") + } + } err := yace.UpdateMetrics( ctx, e.logger, diff --git a/internal/static/integrations/cloudwatch_exporter/config.go b/internal/static/integrations/cloudwatch_exporter/config.go index 132427ed6f3..4ad226be822 100644 --- a/internal/static/integrations/cloudwatch_exporter/config.go +++ b/internal/static/integrations/cloudwatch_exporter/config.go @@ -70,11 +70,12 @@ type TagsPerNamespace map[string][]string type DiscoveryJob struct { InlineRegionAndRoles `yaml:",inline"` InlineCustomTags `yaml:",inline"` - SearchTags []Tag `yaml:"search_tags"` - Type string `yaml:"type"` - DimensionNameRequirements []string `yaml:"dimension_name_requirements"` - Metrics []Metric `yaml:"metrics"` - NilToZero *bool `yaml:"nil_to_zero,omitempty"` + SearchTags []Tag `yaml:"search_tags"` + Type string `yaml:"type"` + DimensionNameRequirements []string `yaml:"dimension_name_requirements"` + Metrics []Metric `yaml:"metrics"` + Delay time.Duration `yaml:"delay,omitempty"` + NilToZero *bool `yaml:"nil_to_zero,omitempty"` } // StaticJob will scrape metrics that match all defined dimensions. @@ -231,28 +232,10 @@ func toYACEConfig(c *Config) (yaceModel.JobsConfig, bool, error) { if err != nil { return yaceModel.JobsConfig{}, fipsEnabled, err } - PatchYACEDefaults(&modelConf) return modelConf, fipsEnabled, nil } -// PatchYACEDefaults overrides some default values YACE applies after validation. -func PatchYACEDefaults(yc *yaceModel.JobsConfig) { - // YACE doesn't allow during validation a zero-delay in each metrics scrape. Override this behaviour since it's taken - // into account by the rounding period. - // https://github.com/prometheus-community/yet-another-cloudwatch-exporter/blob/7e5949124bb5f26353eeff298724a5897de2a2a4/pkg/config/config.go#L320 - for _, job := range yc.DiscoveryJobs { - for _, metric := range job.Metrics { - metric.Delay = 0 - } - } - for _, staticConf := range yc.StaticJobs { - for _, metric := range staticConf.Metrics { - metric.Delay = 0 - } - } -} - func toYACEStaticJob(job StaticJob) *yaceConf.Static { nilToZero := job.NilToZero if nilToZero == nil { @@ -298,6 +281,9 @@ func toYACEDiscoveryJob(job *DiscoveryJob) *yaceConf.Job { // By setting RoundingPeriod to nil, the exporter will align the start and end times for retrieving CloudWatch // metrics, with the smallest period in the retrieved batch. RoundingPeriod: nil, + JobLevelMetricFields: yaceConf.JobLevelMetricFields{ + Delay: int64(job.Delay.Seconds()), + }, } return &yaceJob } @@ -328,10 +314,6 @@ func toYACEMetrics(metrics []Metric, jobNilToZero *bool) []*yaceConf.Metric { Period: periodSeconds, Length: lengthSeconds, - // Delay moves back the time window for whom CloudWatch is requested data. Since we are already adjusting - // this with RoundingPeriod (see toYACEDiscoveryJob), we should omit this setting. - Delay: 0, - NilToZero: nilToZero, AddCloudwatchTimestamp: &addCloudwatchTimestamp, }) diff --git a/internal/static/integrations/cloudwatch_exporter/config_test.go b/internal/static/integrations/cloudwatch_exporter/config_test.go index 5dc5cbd9cda..5321a0f127f 100644 --- a/internal/static/integrations/cloudwatch_exporter/config_test.go +++ b/internal/static/integrations/cloudwatch_exporter/config_test.go @@ -186,6 +186,50 @@ static: - Average ` +// for testing configuration with both discovery and static jobs +const configString4 = ` +sts_region: us-east-2 +discovery: + exported_tags: + AWS/EC2: + - name + - type + jobs: + - type: AWS/EC2 + search_tags: + - key: instance_type + value: spot + regions: + - us-east-2 + roles: + - role_arn: arn:aws:iam::878167871295:role/yace_testing + custom_tags: + - key: alias + value: tesis + delay: 1m + metrics: + - name: CPUUtilization + period: 5m + statistics: + - Maximum + - Average +static: + - regions: + - us-east-2 + name: custom_tesis_metrics + namespace: CoolApp + dimensions: + - name: PURCHASES_SERVICE + value: CoolService + - name: APP_VERSION + value: 1.0 + metrics: + - name: KPIs + period: 5m + statistics: + - Average +` + var ( falsePtr = false truePtr = true @@ -353,6 +397,60 @@ var expectedConfig3 = model.JobsConfig{ CustomNamespaceJobs: []model.CustomNamespaceJob(nil), } +var expectedConfig4 = model.JobsConfig{ + StsRegion: "us-east-2", + DiscoveryJobs: []model.DiscoveryJob{{ + Regions: []string{"us-east-2"}, + Type: "AWS/EC2", + Roles: []model.Role{{RoleArn: "arn:aws:iam::878167871295:role/yace_testing", ExternalID: ""}}, + SearchTags: []model.SearchTag{{Key: "instance_type", Value: regexp.MustCompile("spot")}}, + CustomTags: []model.Tag{{Key: "alias", Value: "tesis"}}, + DimensionNameRequirements: []string(nil), + Metrics: []*model.MetricConfig{ + { + Name: "CPUUtilization", + Statistics: []string{"Maximum", "Average"}, + Period: 300, + Length: 300, + Delay: 60, // Delay applied from job level + NilToZero: true, + AddCloudwatchTimestamp: false, + }, + }, + RoundingPeriod: (*int64)(nil), + RecentlyActiveOnly: false, + ExportedTagsOnMetrics: []string{"name", "type"}, + IncludeContextOnInfoMetrics: false, + DimensionsRegexps: []model.DimensionsRegexp{{ + Regexp: regexp.MustCompile("instance/(?P[^/]+)"), + DimensionsNames: []string{"InstanceId"}, + }}, + }}, + StaticJobs: []model.StaticJob{{ + Name: "custom_tesis_metrics", + Regions: []string{"us-east-2"}, + Roles: []model.Role{{RoleArn: "", ExternalID: ""}}, + Namespace: "CoolApp", + CustomTags: []model.Tag{}, + Dimensions: []model.Dimension{ + {Name: "PURCHASES_SERVICE", Value: "CoolService"}, + {Name: "APP_VERSION", Value: "1.0"}, + }, + Metrics: []*model.MetricConfig{ + { + Name: "KPIs", + Statistics: []string{"Average"}, + Period: 300, + Length: 300, + Delay: 0, + NilToZero: true, + AddCloudwatchTimestamp: false, + }, + }, + }}, + CustomNamespaceJobs: []model.CustomNamespaceJob(nil), +} + func TestTranslateConfigToYACEConfig(t *testing.T) { c := Config{} err := yaml.Unmarshal([]byte(configString), &c) @@ -392,6 +490,33 @@ func TestTranslateNilToZeroConfigToYACEConfig(t *testing.T) { require.EqualValues(t, truePtr, fipsEnabled) } +func TestTranslateMixedJobsConfigToYACEConfig(t *testing.T) { + c := Config{} + err := yaml.Unmarshal([]byte(configString4), &c) + require.NoError(t, err, "failed to unmarshal config") + + logger, err := logging.New(io.Discard, logging.DefaultOptions) + require.NoError(t, err) + + yaceConf, fipsEnabled, err := ToYACEConfig(&c, logger) + require.NoError(t, err, "failed to translate to YACE configuration") + + require.EqualValues(t, expectedConfig4, yaceConf) + require.EqualValues(t, truePtr, fipsEnabled) + + // Verify that delay is applied to discovery job metrics but remains 0 for static job metrics + for _, job := range yaceConf.DiscoveryJobs { + for _, metric := range job.Metrics { + require.Equal(t, int64(60), metric.Delay, "delay should be applied to discovery job metrics") + } + } + for _, job := range yaceConf.StaticJobs { + for _, metric := range job.Metrics { + require.Equal(t, int64(0), metric.Delay, "delay should remain 0 for static job metrics") + } + } +} + func TestCloudwatchExporterConfigInstanceKey(t *testing.T) { cfg1 := &Config{ STSRegion: "us-east-2",