Skip to content

Commit

Permalink
[connector/spanmetrics] Add feature to expire metrics (#31106)
Browse files Browse the repository at this point in the history
**Description:** 
Adds a new feature to expire metrics that are considered stale. If no
new spans are received within given time frame, on the next export
cycle, the metrics are considered expired and will no longer be exported
by the `spanmetricsconnector`.

This intends to solve a situation where a service is no longer producing
spans (e.g. because it was removed), but the metrics for such spans keep
being produced indefinitely. See the linked issue for more details.

Feature can be configured by setting `metrics_expiration` option. The
current behavior (metrics never expire) is kept as the default.

**Link to tracking Issue:** #30559

**Testing:** Added unit tests and tested manually as well.

**Documentation:** Updated in-code documentation and README.

---------

Signed-off-by: Matej Gera <[email protected]>
  • Loading branch information
matej-g authored Mar 12, 2024
1 parent c999301 commit 3fecbcc
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 87 deletions.
27 changes: 27 additions & 0 deletions .chloggen/spanmetrics-feature-metrics-expiration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: spanmetricsconnector

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `metrics_expiration` option to enable expiration of metrics if spans are not received within a certain time frame.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [30559]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: The feature can be configured by specifiying the desired duration in the `metrics_expiration` option. By default, the expiration is disabled (set to 0).

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
2 changes: 2 additions & 0 deletions connector/spanmetricsconnector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ The following settings can be optionally configured:
One of either `AGGREGATION_TEMPORALITY_CUMULATIVE` or `AGGREGATION_TEMPORALITY_DELTA`.
- `namespace`: Defines the namespace of the generated metrics. If `namespace` provided, generated metric name will be added `namespace.` prefix.
- `metrics_flush_interval` (default: `15s`): Defines the flush interval of the generated metrics.
- `metrics_expiration` (default: `0`): Defines the expiration time as `time.Duration`, after which, if no new spans are received, metrics will no longer be exported. Setting to `0` means the metrics will never expire (default behavior).
- `exemplars`: Use to configure how to attach exemplars to histograms
- `enabled` (default: `false`): enabling will add spans as Exemplars.
- `events`: Use to configure the events metric.
Expand Down Expand Up @@ -154,6 +155,7 @@ connectors:
dimensions_cache_size: 1000
aggregation_temporality: "AGGREGATION_TEMPORALITY_CUMULATIVE"
metrics_flush_interval: 15s
metrics_expiration: 5m
events:
enabled: true
dimensions:
Expand Down
12 changes: 12 additions & 0 deletions connector/spanmetricsconnector/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ type Config struct {
// MetricsEmitInterval is the time period between when metrics are flushed or emitted to the configured MetricsExporter.
MetricsFlushInterval time.Duration `mapstructure:"metrics_flush_interval"`

// MetricsExpiration is the time period after which, if no new spans are received, metrics are considered stale and will no longer be exported.
// Default value (0) means that the metrics will never expire.
MetricsExpiration time.Duration `mapstructure:"metrics_expiration"`

// Namespace is the namespace of the metrics emitted by the connector.
Namespace string `mapstructure:"namespace"`

Expand Down Expand Up @@ -127,6 +131,14 @@ func (c Config) Validate() error {
return errors.New("use either `explicit` or `exponential` buckets histogram")
}

if c.MetricsFlushInterval < 0 {
return fmt.Errorf("invalid metrics_flush_interval: %v, the duration should be positive", c.MetricsFlushInterval)
}

if c.MetricsExpiration < 0 {
return fmt.Errorf("invalid metrics_expiration: %v, the duration should be positive", c.MetricsExpiration)
}

return nil
}

Expand Down
4 changes: 4 additions & 0 deletions connector/spanmetricsconnector/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func TestLoadConfig(t *testing.T) {
id: component.NewIDWithName(metadata.Type, "invalid_histogram_unit"),
errorMessage: "unknown Unit \"h\"",
},
{
id: component.NewIDWithName(metadata.Type, "invalid_metrics_expiration"),
errorMessage: "the duration should be positive",
},
{
id: component.NewIDWithName(metadata.Type, "exemplars_enabled"),
expected: &Config{
Expand Down
29 changes: 25 additions & 4 deletions connector/spanmetricsconnector/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ type resourceMetrics struct {
attributes pcommon.Map
// startTimestamp captures when the first data points for this resource are recorded.
startTimestamp pcommon.Timestamp
// lastSeen captures when the last data points for this resource were recorded.
lastSeen time.Time
}

type dimension struct {
Expand Down Expand Up @@ -289,12 +291,25 @@ func (p *connectorImp) resetState() {
p.resourceMetrics.RemoveEvictedItems()
p.metricKeyToDimensions.RemoveEvictedItems()

// Exemplars are only relevant to this batch of traces, so must be cleared within the lock
if p.config.Histogram.Disable {
// If no histogram and no metrics expiration is configured, we can skip the remaining operations.
// Enabling either of these features requires to go over resource metrics and do operation on each.
if p.config.Histogram.Disable && p.config.MetricsExpiration == 0 {
return
}
p.resourceMetrics.ForEach(func(_ resourceKey, m *resourceMetrics) {
m.histograms.Reset(true)

now := time.Now()
p.resourceMetrics.ForEach(func(k resourceKey, m *resourceMetrics) {
// Exemplars are only relevant to this batch of traces, so must be cleared within the lock
if !p.config.Histogram.Disable {
m.histograms.Reset(true)
}

// If metrics expiration is configured, remove metrics that haven't been seen for longer than the expiration period.
if p.config.MetricsExpiration > 0 {
if now.Sub(m.lastSeen) >= p.config.MetricsExpiration {
p.resourceMetrics.Remove(k)
}
}
})

}
Expand Down Expand Up @@ -425,6 +440,12 @@ func (p *connectorImp) getOrCreateResourceMetrics(attr pcommon.Map) *resourceMet
}
p.resourceMetrics.Add(key, v)
}

// If expiration is enabled, track the last seen time.
if p.config.MetricsExpiration > 0 {
v.lastSeen = time.Now()
}

return v
}

Expand Down
Loading

0 comments on commit 3fecbcc

Please sign in to comment.