diff --git a/.chloggen/config.yaml b/.chloggen/config.yaml index 9cc37469bd91d..3b148bad0f21f 100644 --- a/.chloggen/config.yaml +++ b/.chloggen/config.yaml @@ -215,6 +215,7 @@ components: - processor/scalewaydetector - processor/schema - processor/span + - processor/spanpruning - processor/sumologic - processor/tail_sampling - processor/transform diff --git a/.chloggen/spanpruning-processor.yaml b/.chloggen/spanpruning-processor.yaml new file mode 100644 index 0000000000000..bf6c5ca9576e7 --- /dev/null +++ b/.chloggen/spanpruning-processor.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: new_component + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: processor/spanpruning + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add spanpruning processor for intelligent trace data reduction through span aggregation + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [3] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/processor/spanpruningprocessor/Makefile b/processor/spanpruningprocessor/Makefile new file mode 100644 index 0000000000000..ded7a36092dc3 --- /dev/null +++ b/processor/spanpruningprocessor/Makefile @@ -0,0 +1 @@ +include ../../Makefile.Common diff --git a/processor/spanpruningprocessor/README.md b/processor/spanpruningprocessor/README.md new file mode 100644 index 0000000000000..2587ab2c00f12 --- /dev/null +++ b/processor/spanpruningprocessor/README.md @@ -0,0 +1,614 @@ +# Span Pruning Processor + + +| Status | | +| ------------- |-----------| +| Stability | [alpha]: traces | +| Distributions | [contrib] | +| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Aprocessor%2Fspanpruning%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor%2Fspanpruning) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Aprocessor%2Fspanpruning%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Aprocessor%2Fspanpruning) | +| Code coverage | [![codecov](https://codecov.io/github/open-telemetry/opentelemetry-collector-contrib/graph/main/badge.svg?component=processor_spanpruning)](https://app.codecov.io/gh/open-telemetry/opentelemetry-collector-contrib/tree/main/?components%5B0%5D=processor_spanpruning&displayType=list) | +| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@portertech](https://www.github.com/portertech), [@csmarchbanks](https://www.github.com/csmarchbanks) | + +[alpha]: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#alpha +[contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib + + +## Overview + +The Span Pruning Processor identifies duplicate or similar leaf spans within a single trace, groups them, and replaces each group with a single aggregated summary span. When leaf spans are aggregated, the processor also recursively aggregates their parent spans if all children of those parents are being aggregated. + +**Leaf spans** are spans that are not referenced as a parent by any other span in the trace. They typically represent the last actions in an execution call stack (e.g., individual database queries, HTTP calls to external services). + +Spans are grouped by: +1. **Span name** - spans must have the same name +2. **Span kind** - spans must have the same kind (Internal, Server, Client, Producer, Consumer) +3. **Status code** - spans must have the same status (OK, Error, or Unset) +4. **TraceState** - spans must have identical TraceState values (for Consistent Probability Sampling compatibility) +5. **Configured attributes** - spans must have matching values for attributes specified in `group_by_attributes` +6. **Parent span name** - leaf spans must share the same parent span name to be grouped together + +Parent spans are eligible for aggregation when all of their children are aggregated, they share the same name, kind, and status code, and they are not root spans. + +Optionally, the processor can detect **duration outliers** using statistical methods (IQR or MAD) and either annotate summary spans with outlier correlations or **preserve outlier spans** as individual spans for debugging while still aggregating normal spans. + +This processor is useful for reducing trace data volume while preserving meaningful information about repeated operations. + +## Use Cases + +- **Database query optimization**: When an application makes many similar database queries (e.g., N+1 queries), aggregate them into a single summary span +- **Batch operations**: Consolidate many similar leaf operations into a single representative span +- **Cost reduction**: Reduce trace storage costs by eliminating redundant span data + +## Configuration + +```yaml +processors: + spanpruning: + # Attributes to use for grouping similar leaf spans (supports glob patterns) + # Spans with the same name AND same values for matching attributes will be grouped + # Examples: + # - "db.*" matches db.operation, db.name, db.statement, etc. + # - "http.request.*" matches http.request.method, http.request.header, etc. + # - "db.operation" matches only the exact key "db.operation" + group_by_attributes: + - "db.*" + - "http.method" + + # Minimum number of similar leaf spans required before aggregation + # Default: 5 + min_spans_to_aggregate: 3 + + # Maximum depth of parent span aggregation above leaf spans + # 0 = only aggregate leaf spans (no parent aggregation) + # -1 = unlimited depth + # Default: 1 + max_parent_depth: 1 + + # Prefix for aggregation statistics attributes + # Default: "aggregation." + aggregation_attribute_prefix: "batch." + + # Upper bounds for histogram buckets (latency distribution) + # Default: [5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s, 5s, 10s] + # Set to empty list to disable histogram + aggregation_histogram_buckets: [10ms, 50ms, 100ms, 500ms, 1s] + + # Enable attribute loss analysis during aggregation + # Default: false (reduces telemetry overhead) + # When enabled, analyzes attribute differences, records metrics, and adds summary attributes + enable_attribute_loss_analysis: false + + # Attribute loss exemplar sampling rate + # Fraction of attribute-loss metric recordings that include trace exemplars. + # Range: 0.0 (disabled) to 1.0 (always) + # Default: 0.01 (1%) + attribute_loss_exemplar_sample_rate: 0.01 + + # Enable measurement of serialized trace sizes before and after pruning + # When enabled, records bytes_received and bytes_emitted metrics + # This requires serializing the trace data which can be expensive for large batches + # Default: false + enable_bytes_metrics: false + + # Enable IQR or MAD outlier detection and attribute correlation + # When enabled, adds duration_median_ns and outlier_correlated_attributes + # to summary spans + # Default: false + enable_outlier_analysis: false + + # Outlier analysis configuration (optional) + outlier_analysis: + # Statistical method for outlier detection + # "iqr" (default): Interquartile Range method + # "mad": Median Absolute Deviation method (more robust to extreme outliers) + method: iqr + + # IQR multiplier for outlier detection threshold (when method=iqr) + # Outliers are spans with duration > Q3 + (iqr_multiplier * IQR) + # Common values: 1.5 (standard), 3.0 (extreme only) + # Default: 1.5 + iqr_multiplier: 1.5 + + # MAD multiplier for outlier detection threshold (when method=mad) + # Outliers are spans with duration > median + (mad_multiplier * MAD * 1.4826) + # Common values: 2.5-3.0 (standard), 3.5+ (extreme only) + # Default: 3.0 + mad_multiplier: 3.0 + + # Minimum group size for reliable IQR calculation + # Groups smaller than this skip outlier analysis + # Must be at least 4 (need quartiles) + # Default: 7 + min_group_size: 7 + + # Minimum fraction of outliers that must share an attribute value + # for it to be reported as correlated + # Range: (0.0, 1.0] + # Default: 0.75 (75% of outliers must share the value) + correlation_min_occurrence: 0.75 + + # Maximum fraction of normal spans that can have the correlated value + # Lower values mean stronger signal + # Range: [0.0, 1.0) + # Default: 0.25 (at most 25% of normal spans can have the value) + correlation_max_normal_occurrence: 0.25 + + # Maximum correlated attributes to report in summary span attribute + # Default: 5 + max_correlated_attributes: 5 + + # Preserve outlier spans as individual spans instead of aggregating + # When true, only normal spans are aggregated; outliers remain in the trace + # Default: false + preserve_outliers: false + + # Maximum number of outlier spans to preserve per aggregation group + # Spans are selected by most extreme duration first + # 0 = preserve all detected outliers + # Default: 2 + max_preserved_outliers: 2 + + # Only preserve outliers when a strong attribute correlation is found + # This avoids preserving outliers that are just random variance + # Default: false + preserve_only_with_correlation: false +``` + +## Configuration Options + +| Field | Type | Default | Description | +|-----|--|---------|-------| +| `group_by_attributes` | []string | [] | Attribute patterns for grouping (supports glob patterns like `db.*`) | +| `min_spans_to_aggregate` | int | 5 | Minimum group size before aggregation occurs | +| `max_parent_depth` | int | 1 | Max depth of parent aggregation (0=none, -1=unlimited) | +| `aggregation_attribute_prefix` | string | "aggregation." | Prefix for aggregation statistics attributes | +| `aggregation_histogram_buckets` | []time.Duration | `[5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s, 5s, 10s]` | Upper bounds for histogram buckets | +| `enable_attribute_loss_analysis` | bool | false | Enable attribute loss analysis (adds metrics and span attributes showing attribute differences) | +| `attribute_loss_exemplar_sample_rate` | float64 | 0.01 | Fraction of attribute-loss metric recordings that include trace exemplars (0.0–1.0). Only applies when `enable_attribute_loss_analysis` is true. | +| `enable_bytes_metrics` | bool | false | Enable measurement of serialized trace sizes (bytes_received/bytes_emitted metrics) | +| `enable_outlier_analysis` | bool | false | Enable outlier detection and correlation analysis | +| `outlier_analysis.method` | string | "iqr" | Statistical method: "iqr" or "mad" | +| `outlier_analysis.iqr_multiplier` | float64 | 1.5 | IQR threshold multiplier (when method=iqr) | +| `outlier_analysis.mad_multiplier` | float64 | 3.0 | MAD threshold multiplier (when method=mad) | +| `outlier_analysis.min_group_size` | int | 7 | Minimum group size for outlier analysis | +| `outlier_analysis.correlation_min_occurrence` | float64 | 0.75 | Minimum outlier occurrence fraction for correlation | +| `outlier_analysis.correlation_max_normal_occurrence` | float64 | 0.25 | Maximum normal occurrence fraction for correlation | +| `outlier_analysis.max_correlated_attributes` | int | 5 | Maximum correlated attributes to report | +| `outlier_analysis.preserve_outliers` | bool | false | Keep outliers as individual spans instead of aggregating | +| `outlier_analysis.max_preserved_outliers` | int | 2 | Max outliers to preserve per group (0=preserve all) | +| `outlier_analysis.preserve_only_with_correlation` | bool | false | Only preserve outliers if a strong correlation is found | + +### Glob Pattern Support + +The `group_by_attributes` field supports glob patterns for matching attribute keys: + +| Pattern | Matches | +|-----|--| +| `db.*` | `db.operation`, `db.name`, `db.statement`, etc. | +| `http.request.*` | `http.request.method`, `http.request.header.content-type`, etc. | +| `rpc.*` | `rpc.method`, `rpc.service`, `rpc.system`, etc. | +| `db.operation` | Only the exact key `db.operation` | + +When multiple attributes match a pattern, they are all included in the grouping key (sorted alphabetically for consistency). + +## Summary Span + +When spans are aggregated, the summary span includes: + +### Properties +- **Name**: Original span name (e.g., `SELECT`) +- **TraceID**: Same as original spans +- **SpanID**: Newly generated unique ID +- **ParentSpanID**: Same as original spans (common parent) +- **Kind**: Same as template span (inherited from slowest span) +- **StartTimestamp**: Earliest start time of all spans in the group +- **EndTimestamp**: Latest end time of all spans in the group +- **Status**: Same as original spans (spans are grouped by status code) +- **TraceState**: Inherited from the template span (preserved for Consistent Probability Sampling compatibility) +- **Attributes**: Inherited from the slowest span in the group + +> **Note**: The summary span's duration (`EndTimestamp - StartTimestamp`) represents the total time window covered by all aggregated spans, which may exceed `duration_max_ns`. For example, if spans overlap or are staggered, the time range can be larger than any individual span's duration. Use `duration_max_ns` to find the slowest individual operation. + +### What Gets Aggregated Away + +When spans are aggregated into a summary span, the following data from non-template spans is **lost**: + +| Data | Behavior | +|------|----------| +| **Span Events** | Only the template (slowest) span's events are preserved | +| **Span Links** | Only the template span's links are preserved | +| **Attributes** | Non-matching attribute values are lost (see [attribute loss analysis](#optional-attribute-loss-metrics)) | +| **Individual Timestamps** | Original start/end times replaced by the group's time range | +| **SpanIDs** | Original SpanIDs are replaced by a single summary SpanID | + +To understand attribute loss, enable `enable_attribute_loss_analysis: true` which adds `diverse_attributes` and `missing_attributes` to summary spans. + +### Aggregation Attributes +The following attributes are added to the summary span (shown with default `aggregation_attribute_prefix: "aggregation."`): + +| Attribute | Type | Description | +|-----------|------|-------------| +| `is_summary` | bool | Always `true` to identify summary spans | +| `span_count` | int64 | Number of spans that were aggregated | +| `duration_min_ns` | int64 | Minimum duration in nanoseconds | +| `duration_max_ns` | int64 | Maximum duration in nanoseconds | +| `duration_avg_ns` | int64 | Average duration in nanoseconds | +| `duration_total_ns` | int64 | Total duration in nanoseconds | +| `histogram_bucket_bounds_s` | []float64 | Bucket upper bounds in seconds (excludes +Inf) | +| `histogram_bucket_counts` | []int64 | Cumulative count per bucket (includes +Inf bucket) | + +#### Optional Outlier Analysis Attributes + +When `enable_outlier_analysis: true`, the following additional attributes are added: + +| Attribute | Type | Description | +|-----------|------|-------------| +| `duration_median_ns` | int64 | Median duration (more robust than average for skewed distributions) | +| `outlier_correlated_attributes` | string | Attributes that distinguish outliers from normal spans (format: `key=value(outlier%/normal%), ...`) | + +### Histogram Buckets + +The histogram provides a latency distribution of the aggregated spans. The buckets are cumulative, meaning each bucket count includes all spans with duration less than or equal to the bucket boundary. + +**Example** with buckets `[10ms, 50ms, 100ms]` and 5 spans with durations `[5ms, 15ms, 25ms, 75ms, 150ms]`: +- `histogram_bucket_bounds_s`: `[0.01, 0.05, 0.1]` +- `histogram_bucket_counts`: `[1, 3, 4, 5]` + - Bucket 0 (≤10ms): 1 span (5ms) + - Bucket 1 (≤50ms): 3 spans (5ms, 15ms, 25ms) + - Bucket 2 (≤100ms): 4 spans (5ms, 15ms, 25ms, 75ms) + - Bucket 3 (+Inf): 5 spans (all) + +### Outlier Analysis (Optional) + +When `enable_outlier_analysis: true`, the processor detects duration outliers and identifies attributes that correlate with slow spans. + +#### Detection Methods + +The processor supports two statistical methods for outlier detection: + +| Method | Formula | Characteristics | +|--------|---------|----------------| +| **IQR** (default) | `threshold = Q3 + (multiplier × IQR)` | Standard method; sensitive to moderate outliers; uses quartiles | +| **MAD** | `threshold = median + (multiplier × MAD × 1.4826)` | More robust to extreme outliers; uses median | + +**When to use each:** + +- **IQR**: Best for typical distributions with moderate outliers. Standard choice for most use cases. +- **MAD**: Better when you have extreme outliers that would skew IQR calculations, or when you need more stable detection thresholds. + +#### How It Works + +**IQR (Interquartile Range) Method:** +1. Sort spans by duration +2. Calculate Q1 (25th percentile) and Q3 (75th percentile) +3. Calculate IQR = Q3 - Q1 +4. Flag spans with duration > Q3 + (iqr_multiplier × IQR) as outliers + +**MAD (Median Absolute Deviation) Method:** +1. Sort spans by duration and find the median +2. Calculate |duration - median| for each span +3. MAD = median of those deviations +4. Flag spans with duration > median + (mad_multiplier × MAD × 1.4826) as outliers + +*Note: The 1.4826 scale factor makes MAD comparable to standard deviation for normal distributions.* + +**Attribute Correlation** (same for both methods): +- Compare attribute values between outliers and normal spans +- Find attribute values that appear frequently in outliers but rarely in normal spans +- Report the strongest correlations based on the configured thresholds + +#### Configuration Example + +```yaml +processors: + spanpruning: + enable_outlier_analysis: true + outlier_analysis: + method: iqr # or "mad" for more robustness + iqr_multiplier: 1.5 # Standard outlier threshold (IQR method) + mad_multiplier: 3.0 # Standard outlier threshold (MAD method) + min_group_size: 7 # Skip groups with <7 spans + correlation_min_occurrence: 0.75 # 75% of outliers must share value + correlation_max_normal_occurrence: 0.25 # <25% of normal spans can have it + max_correlated_attributes: 5 # Report top 5 correlations +``` + +#### Example Output + +``` +SELECT (summary, span_count: 20) + aggregation.duration_avg_ns: 45000000 + aggregation.duration_median_ns: 8000000 + aggregation.outlier_correlated_attributes: "db.cache_hit=false(100%/0%), db.shard=7(80%/10%)" +``` + +**Interpretation:** +- **Median vs Avg**: Large difference (8ms vs 45ms) indicates outliers are skewing the average +- **Primary correlation**: All outliers (100%) had `cache_hit=false`, while 0% of normal spans did +- **Secondary correlation**: 80% of outliers hit shard 7, but only 10% of normal spans did + +This helps identify root causes of latency issues: +- Cache misses +- Specific database shards +- Failed retries +- Timeout scenarios + +#### When to Use + +- **Enable** when you need to understand why some operations are slow +- **Disable** (default) to minimize overhead when outlier analysis isn't needed +- Works best with groups of 10+ spans for statistical reliability + +#### Performance Impact + +- **Computational overhead**: Sorts durations, calculates quartiles, counts attribute occurrences +- **Minimal when disabled**: Zero overhead (no sorting or calculations) +- **Recommended**: Use `min_group_size: 7` or higher to skip analysis on small groups + +### Preserving Outlier Spans (Optional) + +When `outlier_analysis.preserve_outliers: true`, detected outlier spans are **kept as individual spans** instead of being aggregated. This provides: + +- **Full visibility** into slow operations for debugging +- **Preserved context**: Original attributes, events, and links remain intact +- **Selective aggregation**: Only prune repetitive normal spans + +#### Configuration + +```yaml +processors: + spanpruning: + enable_outlier_analysis: true + outlier_analysis: + preserve_outliers: true # Keep outliers as individual spans + max_preserved_outliers: 2 # Keep top 2 slowest outliers per group + preserve_only_with_correlation: false # Preserve even without correlation +``` + +#### Configuration Options + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `preserve_outliers` | bool | false | Keep outliers as individual spans instead of aggregating | +| `max_preserved_outliers` | int | 2 | Max outliers to preserve per group (0=preserve all detected) | +| `preserve_only_with_correlation` | bool | false | Only preserve outliers if a strong attribute correlation is found | + +#### Example Output + +**Before** (10 similar SELECT spans, 2 are outliers): +``` +handler +├── SELECT - 5ms (normal) +├── SELECT - 6ms (normal) +├── SELECT - 7ms (normal) +├── SELECT - 8ms (normal) +├── SELECT - 9ms (normal) +├── SELECT - 10ms (normal) +├── SELECT - 11ms (normal) +├── SELECT - 12ms (normal) +├── SELECT - 500ms (outlier, cache_hit=false) +└── SELECT - 600ms (outlier, cache_hit=false) +``` + +**After** (with `preserve_outliers: true`, `max_preserved_outliers: 2`): +``` +handler +├── SELECT (summary, span_count=8) ← Normal spans aggregated +│ - aggregation.preserved_outlier_count: 2 +│ - aggregation.outlier_correlated_attributes: "cache_hit=false(100%/0%)" +├── SELECT - 500ms ← Outlier preserved +│ - aggregation.is_preserved_outlier: true +│ - aggregation.summary_span_id: "abc123" +│ - cache_hit: false +└── SELECT - 600ms ← Outlier preserved + - aggregation.is_preserved_outlier: true + - aggregation.summary_span_id: "abc123" + - cache_hit: false +``` + +#### Summary Span Attributes (When Preserving Outliers) + +| Attribute | Type | Description | +|-----------|------|-------------| +| `preserved_outlier_count` | int64 | Number of outlier spans preserved | +| `preserved_outlier_span_ids` | []string | SpanIDs of preserved outliers | + +#### Preserved Outlier Span Attributes + +| Attribute | Type | Description | +|-----------|------|-------------| +| `is_preserved_outlier` | bool | Identifies span as a preserved outlier | +| `summary_span_id` | string | SpanID of the associated summary span | + +#### Behavior Notes + +- **Parent aggregation**: Parents can still be aggregated if all their children are either aggregated or preserved as outliers +- **Skip aggregation**: If preserving outliers leaves too few normal spans (below `min_spans_to_aggregate`), the entire group is left unchanged +- **Selection order**: Outliers are preserved starting with the most extreme (longest duration) first + +## Pipeline Placement + +This processor is designed to work best when placed after processors that ensure complete traces are available: + +```yaml +service: + pipelines: + traces: + receivers: [otlp] + processors: [groupbytrace, spanpruning, batch] + exporters: [otlp] +``` + +Or with tail sampling: + +```yaml +service: + pipelines: + traces: + receivers: [otlp] + processors: [tail_sampling, spanpruning, batch] + exporters: [otlp] +``` + +## Example + +### Basic Example + +A trace with repeated database queries (some failing): + +**Before Processing:** +``` +root-span (parent) +├── SELECT (leaf) - duration: 10ms, db.operation: select, status: OK +├── SELECT (leaf) - duration: 15ms, db.operation: select, status: OK +├── SELECT (leaf) - duration: 12ms, db.operation: select, status: OK +├── SELECT (leaf) - duration: 50ms, db.operation: select, status: Error +├── SELECT (leaf) - duration: 45ms, db.operation: select, status: Error +└── INSERT (leaf) - duration: 20ms, db.operation: insert, status: OK +``` + +**After Processing (with `min_spans_to_aggregate: 2`):** +``` +root-span (parent) +├── SELECT (summary, status: OK) +│ - aggregation.is_summary: true +│ - aggregation.span_count: 3 +│ - aggregation.duration_min_ns: 10000000 +│ - aggregation.duration_max_ns: 15000000 +│ - aggregation.duration_avg_ns: 12333333 +├── SELECT (summary, status: Error) +│ - aggregation.is_summary: true +│ - aggregation.span_count: 2 +│ - aggregation.duration_min_ns: 45000000 +│ - aggregation.duration_max_ns: 50000000 +│ - aggregation.duration_avg_ns: 47500000 +└── INSERT (unchanged - only 1 span, below threshold) +``` + +Note: Spans with different status codes are grouped separately, preserving error information. + +### Recursive Parent Aggregation Example + +When spans are aggregated, the processor also checks if their parent spans can be aggregated. Parent spans are eligible for aggregation when: +1. All of their children are being aggregated +2. They share the same name, kind, and status code with other eligible parents +3. They are not root spans (must have a parent) +4. At least 2 parents meet the criteria + +**Before Processing (with `min_spans_to_aggregate: 2`, `group_by_attributes: ["db.op"]`):** +``` +root +├── handler (status: OK) +│ └── SELECT (db.op=select, status: OK) ───┐ +├── handler (status: OK) │ leaf group A: 3 OK SELECTs +│ └── SELECT (db.op=select, status: OK) ───┤ +├── handler (status: OK) │ +│ └── SELECT (db.op=select, status: OK) ───┘ +├── handler (status: Error) +│ └── SELECT (db.op=select, status: Error) ┐ leaf group B: 2 Error SELECTs +├── handler (status: Error) │ +│ └── SELECT (db.op=select, status: Error) ┘ +├── handler (status: OK) +│ └── INSERT (db.op=insert, status: OK) ──── only 1, below threshold +└── worker (status: OK) + └── SELECT (db.op=select, status: OK) ──── different parent name +``` + +**After Processing:** +``` +root +├── handler (summary, status: OK, span_count: 3) +│ └── SELECT (summary, status: OK, span_count: 3) +├── handler (summary, status: Error, span_count: 2) +│ └── SELECT (summary, status: Error, span_count: 2) +├── handler (status: OK) +│ └── INSERT (status: OK) ─────────────────────────── unchanged +└── worker (status: OK) + └── SELECT (status: OK) ─────────────────────────── unchanged +``` + +**Why each span was handled this way:** + +| Span | Result | Reason | +|------|--------|--------| +| 3x handler (OK) with SELECT children | Aggregated | All children aggregated, same name+kind+status | +| 3x SELECT (OK) under handler | Aggregated | Same name + kind + status + attributes + parent name | +| 2x handler (Error) with SELECT children | Aggregated | All children aggregated, same name+kind+status | +| 2x SELECT (Error) under handler | Aggregated | Same name + kind + status + attributes + parent name | +| handler (OK) with INSERT child | Unchanged | Child not aggregated (only 1 INSERT) | +| INSERT (OK) | Unchanged | Below threshold (only 1 span) | +| worker (OK) | Unchanged | Child not aggregated | +| SELECT (OK) under worker | Unchanged | Different parent name than other SELECTs | + +## Limitations + +- Requires complete traces for accurate leaf detection +- Summary span inherits attributes from the slowest span in the group +- Parent spans are only aggregated when ALL their children are aggregated + +## Consistent Probability Sampling (CPS) Compatibility + +The processor is designed to be compatible with [Consistent Probability Sampling](https://opentelemetry.io/docs/specs/otel/trace/tracestate-probability-sampling/) (CPS). CPS uses TraceState to carry sampling metadata (`ot=th:...;rv:...`) where: + +- `th` (threshold) indicates the sampling probability threshold +- `rv` (randomness value) provides consistent randomness for sampling decisions + +**Why TraceState matters for aggregation:** + +Spans with different TraceState values represent different sampling populations with different "adjusted counts" (weights). Aggregating them together would produce statistically incorrect summaries and break downstream sampling decisions. + +The processor uses **exact TraceState matching** (not just the `th` value) because: +- The `rv` value affects sampling decisions +- Vendor-specific keys may have semantic meaning +- Key ordering may be significant + +## Telemetry + +The processor emits the following metrics to help monitor its operation: + +### Counters + +| Metric | Description | +|--------|-------------| +| `otelcol_processor_spanpruning_spans_received` | Total number of spans received by the processor | +| `otelcol_processor_spanpruning_spans_pruned` | Total number of spans removed by aggregation | +| `otelcol_processor_spanpruning_aggregations_created` | Total number of aggregation summary spans created | +| `otelcol_processor_spanpruning_traces_processed` | Total number of traces processed | +| `otelcol_processor_spanpruning_outliers_detected` | Total spans identified as outliers by analysis (when `enable_outlier_analysis: true`) | +| `otelcol_processor_spanpruning_outliers_preserved` | Total outlier spans kept as individual spans (when `preserve_outliers: true`) | +| `otelcol_processor_spanpruning_outliers_correlations_detected` | Total aggregation groups where outliers had correlated attributes | +| `otelcol_processor_spanpruning_bytes_received` | Total bytes of serialized traces received (when `enable_bytes_metrics: true`) | +| `otelcol_processor_spanpruning_bytes_emitted` | Total bytes of serialized traces emitted after pruning (when `enable_bytes_metrics: true`) | + +### Histograms + +| Metric | Description | +|--------|-------------| +| `otelcol_processor_spanpruning_aggregation_group_size` | Distribution of the number of spans per aggregation group | +| `otelcol_processor_spanpruning_processing_duration` | Time taken to process each batch of traces (in seconds) | + +### Optional Attribute Loss Metrics + +When `enable_attribute_loss_analysis: true`, the processor also emits metrics about attribute loss during aggregation. These metrics help you understand how much information is being lost when spans are grouped together. + +To correlate these metrics back to traces, a configurable fraction of these metric recordings can include trace exemplars via `attribute_loss_exemplar_sample_rate`. Sampling is applied per aggregation group, and the exemplar context is taken from the slowest span in the group. + +#### Histograms (Optional) + +| Metric | Description | +|--------|-------------| +| `otelcol_processor_spanpruning_leaf_attribute_diversity_loss` | Attribute values lost due to diversity per leaf aggregation (when leaf spans have different attribute values) | +| `otelcol_processor_spanpruning_leaf_attribute_loss` | Attribute keys lost due to absence per leaf aggregation (when some spans don't have an attribute that others do) | +| `otelcol_processor_spanpruning_parent_attribute_diversity_loss` | Attribute values lost due to diversity per parent aggregation | +| `otelcol_processor_spanpruning_parent_attribute_loss` | Attribute keys lost due to absence per parent aggregation | + +Attribute loss analysis is **disabled by default** (`enable_attribute_loss_analysis: false`) to reduce overhead. When enabled, the processor: +- Analyzes attribute differences across spans being aggregated +- Records histogram metrics for loss tracking +- Adds `diverse_attributes` and `missing_attributes` summary attributes to aggregated spans + +These metrics can be used to: +- Monitor the effectiveness of span pruning (compare `spans_received` vs `spans_pruned`) +- Track the compression ratio achieved by aggregation +- Identify processing bottlenecks via `processing_duration` +- Understand aggregation patterns via `aggregation_group_size` diff --git a/processor/spanpruningprocessor/aggregation.go b/processor/spanpruningprocessor/aggregation.go new file mode 100644 index 0000000000000..a6e3abd96856c --- /dev/null +++ b/processor/spanpruningprocessor/aggregation.go @@ -0,0 +1,237 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "encoding/binary" + "math/rand/v2" + "sort" + "time" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// aggregationGroup captures the spans to aggregate along with execution +// metadata (tree depth, preassigned summary ID, and attribute loss info). +type aggregationGroup struct { + nodes []*spanNode // nodes to aggregate (replaces []spanInfo for efficiency) + depth int // tree depth (0 = leaf, 1 = parent of leaf, etc.) + summarySpanID pcommon.SpanID // SpanID of the summary span (assigned before creation) + lossInfo attributeLossSummary // attribute loss info (diverse + missing) + templateNode *spanNode // node to use as summary template (longest duration) + outlierAnalysis *outlierAnalysisResult // IQR analysis results + preservedOutliers []*spanNode // outliers to keep as individual spans +} + +// aggregationPlan orders aggregation groups for top-down execution and +// carries precomputed summary span IDs. +type aggregationPlan struct { + groups []aggregationGroup +} + +// findLongestDurationNode returns the node with the longest duration. +func findLongestDurationNode(nodes []*spanNode) *spanNode { + if len(nodes) == 0 { + return nil + } + longest := nodes[0] + // pcommon.Timestamp is uint64 nanoseconds; direct subtraction avoids + // creating intermediate time.Time objects (2 per span otherwise). + longestDuration := int64(longest.span.EndTimestamp()) - int64(longest.span.StartTimestamp()) + for _, node := range nodes[1:] { + duration := int64(node.span.EndTimestamp()) - int64(node.span.StartTimestamp()) + if duration > longestDuration { + longest = node + longestDuration = duration + } + } + return longest +} + +// generateSpanID produces a non-cryptographic span ID suitable for summary +// spans; uniqueness is sufficient, not randomness strength. +func generateSpanID() pcommon.SpanID { + var id [8]byte + binary.BigEndian.PutUint64(id[:], rand.Uint64()) + return pcommon.SpanID(id) +} + +// buildAggregationPlan sorts aggregation groups by depth (parents before +// children) and preassigns summary SpanIDs to avoid conflicts during writes. +func (*spanPruningProcessor) buildAggregationPlan(groups map[string]aggregationGroup) aggregationPlan { + // Convert map to slice with pre-allocation + groupSlice := make([]aggregationGroup, 0, len(groups)) + for key := range groups { + groupSlice = append(groupSlice, groups[key]) + } + + // Sort by depth descending (highest depth first = top-down) + sort.Slice(groupSlice, func(i, j int) bool { + return groupSlice[i].depth > groupSlice[j].depth + }) + + // Pre-assign SpanIDs for all summary spans + for i := range groupSlice { + groupSlice[i].summarySpanID = generateSpanID() + } + + return aggregationPlan{groups: groupSlice} +} + +// executeAggregations performs the top-down creation of summary spans, batch +// removes originals, and returns the number of pruned spans. +func (p *spanPruningProcessor) executeAggregations(plan aggregationPlan) int { + // Track which parent SpanID should map to which summary SpanID + parentReplacements := make(map[pcommon.SpanID]pcommon.SpanID, len(plan.groups)*4) + + // Track spans to remove per ScopeSpans for batch removal + spansToRemove := make(map[ptrace.ScopeSpans]map[pcommon.SpanID]struct{}, len(plan.groups)) + prunedCount := 0 + + prefix := p.config.AggregationAttributePrefix + + for i := range plan.groups { + group := &plan.groups[i] + // Calculate statistics and time range in single pass + data := p.calculateAggregationData(group.nodes) + + // Determine the parent SpanID for the summary span + // Use the first node's parent as template + originalParentID := group.nodes[0].span.ParentSpanID() + + // Check if the parent is being replaced by a summary span + summaryParentID := originalParentID + if replacementID, exists := parentReplacements[originalParentID]; exists { + summaryParentID = replacementID + } + + // Create summary span with correct parent + p.createSummarySpanWithParent(*group, data, summaryParentID) + + // Mark preserved outliers with reference to summary span + if len(group.preservedOutliers) > 0 { + for _, outlier := range group.preservedOutliers { + // Outliers become siblings of the summary span + outlier.span.SetParentSpanID(summaryParentID) + outlier.span.Attributes().PutBool(prefix+"is_preserved_outlier", true) + outlier.span.Attributes().PutStr(prefix+"summary_span_id", + group.summarySpanID.String()) + } + } + + // Record that these original span IDs should be replaced by the summary span ID + for _, node := range group.nodes { + spanID := node.span.SpanID() + parentReplacements[spanID] = group.summarySpanID + scopeSpans := node.scopeSpans + if spansToRemove[scopeSpans] == nil { + spansToRemove[scopeSpans] = make(map[pcommon.SpanID]struct{}, len(group.nodes)) + } + spansToRemove[scopeSpans][spanID] = struct{}{} + } + prunedCount += len(group.nodes) + } + + // Batch remove all marked spans in a single pass per ScopeSpans + for scopeSpans, spanIDs := range spansToRemove { + scopeSpans.Spans().RemoveIf(func(span ptrace.Span) bool { + _, shouldRemove := spanIDs[span.SpanID()] + return shouldRemove + }) + } + + return prunedCount +} + +// createSummarySpanWithParent builds the summary span for an aggregation +// group, wiring it under the provided parent SpanID and attaching stats +// and attribute-loss annotations. +func (p *spanPruningProcessor) createSummarySpanWithParent(group aggregationGroup, data aggregationData, parentSpanID pcommon.SpanID) ptrace.Span { + // Use the template node (longest duration span) as a template + templateNode := group.templateNode + templateSpan := templateNode.span + scopeSpans := templateNode.scopeSpans + + // Create new span in the same ScopeSpans as the first span + newSpan := scopeSpans.Spans().AppendEmpty() + + // Copy basic properties from template + newSpan.SetName(templateSpan.Name()) + newSpan.SetTraceID(templateSpan.TraceID()) + newSpan.SetSpanID(group.summarySpanID) + newSpan.SetParentSpanID(parentSpanID) + newSpan.SetKind(templateSpan.Kind()) + + // Set timestamps from aggregation data + newSpan.SetStartTimestamp(data.earliestStart) + newSpan.SetEndTimestamp(data.latestEnd) + + // Copy attributes from template + templateSpan.Attributes().CopyTo(newSpan.Attributes()) + + // Copy status from template + templateSpan.Status().CopyTo(newSpan.Status()) + + // Copy TraceState from template for Consistent Probability Sampling compatibility + newSpan.TraceState().FromRaw(templateSpan.TraceState().AsRaw()) + + // Add aggregation statistics as attributes + prefix := p.config.AggregationAttributePrefix + newSpan.Attributes().PutBool(prefix+"is_summary", true) + newSpan.Attributes().PutInt(prefix+"span_count", data.count) + newSpan.Attributes().PutInt(prefix+"duration_min_ns", int64(data.minDuration)) + newSpan.Attributes().PutInt(prefix+"duration_max_ns", int64(data.maxDuration)) + newSpan.Attributes().PutInt(prefix+"duration_total_ns", int64(data.sumDuration)) + if data.count > 0 { + newSpan.Attributes().PutInt(prefix+"duration_avg_ns", int64(data.sumDuration)/data.count) + } + + // Add outlier analysis attributes when enabled + if group.outlierAnalysis != nil { + newSpan.Attributes().PutInt(prefix+"duration_median_ns", int64(group.outlierAnalysis.median)) + + if len(group.outlierAnalysis.correlations) > 0 { + newSpan.Attributes().PutStr(prefix+"outlier_correlated_attributes", + formatCorrelations(group.outlierAnalysis.correlations)) + } + + // Track preserved outliers + if len(group.preservedOutliers) > 0 { + newSpan.Attributes().PutInt(prefix+"preserved_outlier_count", + int64(len(group.preservedOutliers))) + + // List preserved outlier span IDs + outlierIDs := newSpan.Attributes().PutEmptySlice(prefix + "preserved_outlier_span_ids") + for _, outlier := range group.preservedOutliers { + outlierIDs.AppendEmpty().SetStr(outlier.span.SpanID().String()) + } + } + } + + // Add histogram attributes if enabled + if len(p.config.AggregationHistogramBuckets) > 0 { + // Add bucket bounds (in seconds) + bucketBoundsSlice := newSpan.Attributes().PutEmptySlice(prefix + "histogram_bucket_bounds_s") + for _, bucket := range p.config.AggregationHistogramBuckets { + bucketBoundsSlice.AppendEmpty().SetDouble(float64(bucket) / float64(time.Second)) + } + + // Add bucket counts + bucketCountsSlice := newSpan.Attributes().PutEmptySlice(prefix + "histogram_bucket_counts") + for _, count := range data.bucketCounts { + bucketCountsSlice.AppendEmpty().SetInt(count) + } + } + + // Add attribute loss info when detected + if len(group.lossInfo.diverse) > 0 { + newSpan.Attributes().PutStr(prefix+"diverse_attributes", formatAttributeCardinality(group.lossInfo.diverse)) + } + if len(group.lossInfo.missing) > 0 { + newSpan.Attributes().PutStr(prefix+"missing_attributes", formatAttributeCardinality(group.lossInfo.missing)) + } + + return newSpan +} diff --git a/processor/spanpruningprocessor/aggregation_test.go b/processor/spanpruningprocessor/aggregation_test.go new file mode 100644 index 0000000000000..55e86827e5a83 --- /dev/null +++ b/processor/spanpruningprocessor/aggregation_test.go @@ -0,0 +1,136 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +func TestFindLongestDurationNode_Empty(t *testing.T) { + result := findLongestDurationNode(nil) + assert.Nil(t, result) + + result = findLongestDurationNode([]*spanNode{}) + assert.Nil(t, result) +} + +func TestFindLongestDurationNode_SingleNode(t *testing.T) { + nodes := createSpanNodesWithDurations(t, []int64{100}) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[0], result) +} + +func TestFindLongestDurationNode_LongestFirst(t *testing.T) { + // Longest duration is first in the slice + nodes := createSpanNodesWithDurations(t, []int64{500, 100, 200}) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[0], result, "should return first node (500ns)") +} + +func TestFindLongestDurationNode_LongestMiddle(t *testing.T) { + // Longest duration is in the middle + nodes := createSpanNodesWithDurations(t, []int64{100, 500, 200}) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[1], result, "should return middle node (500ns)") +} + +func TestFindLongestDurationNode_LongestLast(t *testing.T) { + // Longest duration is last in the slice + nodes := createSpanNodesWithDurations(t, []int64{100, 200, 500}) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[2], result, "should return last node (500ns)") +} + +func TestFindLongestDurationNode_EqualDurations(t *testing.T) { + // All durations are equal - should return first + nodes := createSpanNodesWithDurations(t, []int64{100, 100, 100}) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[0], result, "should return first node when all equal") +} + +func TestFindLongestDurationNode_LargeDurations(t *testing.T) { + // Test with large duration values (milliseconds in nanoseconds) + durations := []int64{ + 1_000_000, // 1ms + 500_000_000, // 500ms + 100_000_000, // 100ms + } + nodes := createSpanNodesWithDurations(t, durations) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[1], result, "should return node with 500ms duration") +} + +func TestFindLongestDurationNode_ZeroDuration(t *testing.T) { + // Test with zero duration spans + nodes := createSpanNodesWithDurations(t, []int64{0, 100, 0}) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[1], result, "should return node with non-zero duration") +} + +func TestFindLongestDurationNode_ManyNodes(t *testing.T) { + // Test with many nodes to verify iteration works correctly + durations := make([]int64, 100) + for i := range durations { + durations[i] = int64(i * 10) + } + // Set one in the middle to be the longest + durations[50] = 99999 + + nodes := createSpanNodesWithDurations(t, durations) + + result := findLongestDurationNode(nodes) + require.NotNil(t, result) + assert.Equal(t, nodes[50], result, "should return node at index 50 with longest duration") +} + +// createSpanNodesWithDurations creates span nodes with specified durations in nanoseconds +func createSpanNodesWithDurations(t *testing.T, durationsNs []int64) []*spanNode { + t.Helper() + + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + nodes := make([]*spanNode, 0, len(durationsNs)) + baseTime := int64(1000000000) + + for i, duration := range durationsNs { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("test") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + duration)) + + nodes = append(nodes, &spanNode{ + span: span, + scopeSpans: ss, + }) + } + + return nodes +} diff --git a/processor/spanpruningprocessor/attribute_loss.go b/processor/spanpruningprocessor/attribute_loss.go new file mode 100644 index 0000000000000..46897387f3bae --- /dev/null +++ b/processor/spanpruningprocessor/attribute_loss.go @@ -0,0 +1,147 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "sort" + "strconv" + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// attributeCardinality records how many distinct values an attribute key has +// across spans being aggregated. +type attributeCardinality struct { + key string + uniqueValues int +} + +// attributeLossSummary separates two types of attribute loss during aggregation: +// - diverse: attributes present in ALL spans but with different values +// - missing: attributes absent from SOME spans (won't be in summary) +type attributeLossSummary struct { + diverse []attributeCardinality // present in all spans, multiple values + missing []attributeCardinality // absent from some spans +} + +// isEmpty reports whether any attribute loss was detected. +func (s attributeLossSummary) isEmpty() bool { + return len(s.diverse) == 0 && len(s.missing) == 0 +} + +// analyzeAttributeLoss examines spans being aggregated to determine which +// attributes will lose information in the summary span. The template node's +// attributes are preserved on the summary. Results include: +// - diverse: present in all spans but with multiple unique values (loss = unique - 1) +// - missing: absent from some spans (loss depends on template presence) +// Both slices are sorted by uniqueValues descending. +func analyzeAttributeLoss(nodes []*spanNode, template *spanNode) attributeLossSummary { + if len(nodes) < 2 || template == nil { + return attributeLossSummary{} + } + + numSpans := len(nodes) + templateSpan := template.span + + // Track unique values and presence count per attribute key + // map[attributeKey]map[attributeValue]struct{} + attributeValues := make(map[string]map[string]struct{}) + // map[attributeKey]count of spans that have this attribute + attributePresence := make(map[string]int) + + for _, node := range nodes { + node.span.Attributes().Range(func(k string, v pcommon.Value) bool { + if attributeValues[k] == nil { + attributeValues[k] = make(map[string]struct{}) + } + attributeValues[k][v.AsString()] = struct{}{} + attributePresence[k]++ + return true + }) + } + + var result attributeLossSummary + + for key, values := range attributeValues { + presence := attributePresence[key] + uniqueCount := len(values) + + if presence < numSpans { + // Attribute missing from some spans - presence loss + // Loss depends on whether template has this attribute + _, templateHasAttr := templateSpan.Attributes().Get(key) + var lostCount int + if templateHasAttr { + // Template's value is preserved, lose the rest + lostCount = uniqueCount - 1 + } else { + // Template lacks it, summary lacks it, lose all values + lostCount = uniqueCount + } + if lostCount > 0 { + result.missing = append(result.missing, attributeCardinality{ + key: key, + uniqueValues: lostCount, + }) + } + } else if uniqueCount > 1 { + // Present in all spans but with different values - diversity loss + // Summary span keeps one value (from template), so loss = uniqueCount - 1 + result.diverse = append(result.diverse, attributeCardinality{ + key: key, + uniqueValues: uniqueCount - 1, + }) + } + } + + // Sort both slices by uniqueValues descending, then key ascending + sortFunc := func(slice []attributeCardinality) { + sort.Slice(slice, func(i, j int) bool { + if slice[i].uniqueValues != slice[j].uniqueValues { + return slice[i].uniqueValues > slice[j].uniqueValues + } + return slice[i].key < slice[j].key + }) + } + sortFunc(result.diverse) + sortFunc(result.missing) + + return result +} + +// maxLostAttributesEntries bounds how many attribute keys are serialized into +// the loss strings to prevent excessively long attribute values. +const maxLostAttributesEntries = 10 + +// formatAttributeCardinality formats attribute cardinality as "key(count),..." +// truncated to maxLostAttributesEntries with an ellipsis when needed. +func formatAttributeCardinality(attrs []attributeCardinality) string { + if len(attrs) == 0 { + return "" + } + + truncated := len(attrs) > maxLostAttributesEntries + count := len(attrs) + if truncated { + count = maxLostAttributesEntries + } + + var sb strings.Builder + for i, attr := range attrs[:count] { + if i > 0 { + sb.WriteByte(',') + } + sb.WriteString(attr.key) + sb.WriteByte('(') + sb.WriteString(strconv.Itoa(attr.uniqueValues)) + sb.WriteByte(')') + } + + if truncated { + sb.WriteString(",...") + } + + return sb.String() +} diff --git a/processor/spanpruningprocessor/attribute_loss_test.go b/processor/spanpruningprocessor/attribute_loss_test.go new file mode 100644 index 0000000000000..18a057be00925 --- /dev/null +++ b/processor/spanpruningprocessor/attribute_loss_test.go @@ -0,0 +1,616 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.opentelemetry.io/collector/processor/processortest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" +) + +func TestAnalyzeAttributeLoss_NoDiversity(t *testing.T) { + // All spans have identical attributes - no diversity loss + nodes := createTestSpanNodes(t, []map[string]string{ + {"db.operation": "select", "db.name": "users"}, + {"db.operation": "select", "db.name": "users"}, + {"db.operation": "select", "db.name": "users"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + assert.True(t, result.isEmpty(), "no loss when all values are identical") +} + +func TestAnalyzeAttributeLoss_WithDiversity(t *testing.T) { + // Spans have different values for some attributes (all present in all spans) + nodes := createTestSpanNodes(t, []map[string]string{ + {"db.operation": "select", "db.name": "users", "db.statement": "SELECT * FROM users"}, + {"db.operation": "select", "db.name": "orders", "db.statement": "SELECT * FROM orders"}, + {"db.operation": "select", "db.name": "products", "db.statement": "SELECT * FROM products"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // Should have 2 attributes with diversity: db.name (3 values, 2 lost) and db.statement (3 values, 2 lost) + assert.Len(t, result.diverse, 2, "should have 2 diverse attributes") + assert.Empty(t, result.missing, "no missing attributes") + + // Verify sorted by uniqueValues descending (both have 2 lost values) + assert.Equal(t, 2, result.diverse[0].uniqueValues) + assert.Equal(t, 2, result.diverse[1].uniqueValues) + + // Keys should be sorted alphabetically when uniqueValues are equal + keys := []string{result.diverse[0].key, result.diverse[1].key} + assert.Contains(t, keys, "db.name") + assert.Contains(t, keys, "db.statement") +} + +func TestAnalyzeAttributeLoss_MixedDiversity(t *testing.T) { + // Some attributes have diversity, others don't + nodes := createTestSpanNodes(t, []map[string]string{ + {"db.operation": "select", "http.method": "GET", "http.route": "/api/users"}, + {"db.operation": "select", "http.method": "POST", "http.route": "/api/users"}, + {"db.operation": "select", "http.method": "PUT", "http.route": "/api/users"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // Only http.method has diversity (3 values, 2 lost) + // db.operation and http.route have identical values + assert.Len(t, result.diverse, 1, "should have 1 diverse attribute") + assert.Equal(t, "http.method", result.diverse[0].key) + assert.Equal(t, 2, result.diverse[0].uniqueValues) + assert.Empty(t, result.missing) +} + +func TestAnalyzeAttributeLoss_SortOrder(t *testing.T) { + // Test that results are sorted by uniqueValues descending, then key ascending + nodes := createTestSpanNodes(t, []map[string]string{ + {"a": "1", "b": "1", "c": "1"}, + {"a": "2", "b": "2", "c": "1"}, + {"a": "3", "b": "2", "c": "1"}, + {"a": "4", "b": "2", "c": "1"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // a has 4 unique values (3 lost), b has 2 unique values (1 lost), c has 1 (no diversity) + assert.Len(t, result.diverse, 2) + assert.Equal(t, "a", result.diverse[0].key) + assert.Equal(t, 3, result.diverse[0].uniqueValues) // 4 - 1 = 3 lost + assert.Equal(t, "b", result.diverse[1].key) + assert.Equal(t, 1, result.diverse[1].uniqueValues) // 2 - 1 = 1 lost +} + +func TestAnalyzeAttributeLoss_SingleNode(t *testing.T) { + // Single node should return empty (no aggregation happening) + nodes := createTestSpanNodes(t, []map[string]string{ + {"db.operation": "select"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + assert.True(t, result.isEmpty(), "single node should return empty") +} + +func TestAnalyzeAttributeLoss_EmptyNodes(t *testing.T) { + result := analyzeAttributeLoss(nil, nil) + assert.True(t, result.isEmpty()) + + result = analyzeAttributeLoss([]*spanNode{}, nil) + assert.True(t, result.isEmpty()) +} + +func TestAnalyzeAttributeLoss_MissingAttributes(t *testing.T) { + // Attributes not present in ALL spans should be in 'missing' + // span1 (template) has {a, b}, span2 has {a, c}, span3 has {a, b} + // 'a' is in all with diversity -> diverse (2 lost) + // 'b' missing from span2, but template has it -> 1 lost (value "2") + // 'c' missing from span1 (template) and span3 -> 1 lost (all values) + nodes := createTestSpanNodes(t, []map[string]string{ + {"a": "1", "b": "1"}, + {"a": "2", "c": "1"}, + {"a": "3", "b": "2"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // 'a' has 3 unique values, present in all -> diverse (2 lost) + assert.Len(t, result.diverse, 1) + assert.Equal(t, "a", result.diverse[0].key) + assert.Equal(t, 2, result.diverse[0].uniqueValues) // 3 - 1 = 2 lost + + // 'b' and 'c' are missing from some spans -> missing + assert.Len(t, result.missing, 2) + + // 'b': template has b=1, other value is b=2 -> 1 lost + // 'c': template lacks c, value c=1 exists -> 1 lost + // Both have 1 lost, sorted alphabetically + assert.Equal(t, "b", result.missing[0].key) + assert.Equal(t, 1, result.missing[0].uniqueValues) + assert.Equal(t, "c", result.missing[1].key) + assert.Equal(t, 1, result.missing[1].uniqueValues) +} + +func TestAnalyzeAttributeLoss_AllMissingFromSome(t *testing.T) { + // When each span has a unique attribute, all should be in 'missing' + // span1 (template) has {a}, span2 has {b}, span3 has {c} + // Template has 'a', so 'a' has 0 lost (only 1 value, template keeps it) + // Template lacks 'b' and 'c', so they lose all their values (1 each) + nodes := createTestSpanNodes(t, []map[string]string{ + {"a": "1"}, + {"b": "1"}, + {"c": "1"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // No attribute is present in all spans, so diverse should be empty + assert.Empty(t, result.diverse) + + // 'a' has 1 value, template has it -> 0 lost (not reported) + // 'b' and 'c' have 1 value each, template lacks them -> 1 lost each + assert.Len(t, result.missing, 2, "b and c missing from template") + for _, attr := range result.missing { + assert.Equal(t, 1, attr.uniqueValues, "1 value lost for %s", attr.key) + assert.NotEqual(t, "a", attr.key, "a should not be in missing (template has it, no loss)") + } +} + +func TestAnalyzeAttributeLoss_BothDiverseAndMissing(t *testing.T) { + // Test that a single aggregation can have both diverse and missing attributes + // span1 (template): {a:1, b:1} + // span2: {a:2, b:1, c:1} <- c only here + // span3: {a:3, b:1, c:2} <- c only here with different value + // Result: a is diverse (2 lost), c is missing and template lacks it (2 lost) + nodes := createTestSpanNodes(t, []map[string]string{ + {"a": "1", "b": "1"}, + {"a": "2", "b": "1", "c": "1"}, + {"a": "3", "b": "1", "c": "2"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // 'a' has 3 unique values, present in all -> diverse (2 lost) + assert.Len(t, result.diverse, 1) + assert.Equal(t, "a", result.diverse[0].key) + assert.Equal(t, 2, result.diverse[0].uniqueValues) // 3 - 1 = 2 lost + + // 'c' missing from template, has 2 values -> 2 lost + assert.Len(t, result.missing, 1) + assert.Equal(t, "c", result.missing[0].key) + assert.Equal(t, 2, result.missing[0].uniqueValues) + + // 'b' has 1 value, present in all -> no loss + assert.False(t, result.isEmpty(), "should have both diverse and missing") +} + +func TestAnalyzeAttributeLoss_MissingWithTemplateHavingIt(t *testing.T) { + // When template has the attribute, only non-template values are lost + // span1 (template): {a:1, c:1} + // span2: {a:2} <- c missing + // span3: {a:3} <- c missing + // Result: a is diverse (2 lost), c is missing but template has it (0 lost) + nodes := createTestSpanNodes(t, []map[string]string{ + {"a": "1", "c": "1"}, + {"a": "2"}, + {"a": "3"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // 'a' has 3 unique values -> 2 lost + assert.Len(t, result.diverse, 1) + assert.Equal(t, "a", result.diverse[0].key) + assert.Equal(t, 2, result.diverse[0].uniqueValues) + + // 'c' missing from others but template has c=1 -> 0 lost (not reported) + assert.Empty(t, result.missing, "c preserved by template") +} + +func TestAnalyzeAttributeLoss_EmptyStringValues(t *testing.T) { + // Empty string is a valid distinct value + nodes := createTestSpanNodes(t, []map[string]string{ + {"a": "", "b": "value"}, + {"a": "non-empty", "b": ""}, + {"a": "", "b": "value"}, + }) + + result := analyzeAttributeLoss(nodes, nodes[0]) + + // 'a' has 2 unique values: "" and "non-empty" -> 1 lost + // 'b' has 2 unique values: "value" and "" -> 1 lost + assert.Len(t, result.diverse, 2) + assert.Empty(t, result.missing) + + // Both should show 1 lost value (2 unique - 1 kept = 1 lost) + for _, attr := range result.diverse { + assert.Equal(t, 1, attr.uniqueValues, "attribute %s should have 1 lost value", attr.key) + } +} + +func TestFormatAttributeCardinality_Empty(t *testing.T) { + result := formatAttributeCardinality(nil) + assert.Empty(t, result) + + result = formatAttributeCardinality([]attributeCardinality{}) + assert.Empty(t, result) +} + +func TestFormatAttributeCardinality_Single(t *testing.T) { + attrs := []attributeCardinality{ + {key: "db.statement", uniqueValues: 12}, + } + + result := formatAttributeCardinality(attrs) + assert.Equal(t, "db.statement(12)", result) +} + +func TestFormatAttributeCardinality_Multiple(t *testing.T) { + attrs := []attributeCardinality{ + {key: "db.statement", uniqueValues: 12}, + {key: "db.system", uniqueValues: 3}, + {key: "http.route", uniqueValues: 2}, + } + + result := formatAttributeCardinality(attrs) + assert.Equal(t, "db.statement(12),db.system(3),http.route(2)", result) +} + +func TestFormatAttributeCardinality_Truncation(t *testing.T) { + // Create more than maxLostAttributesEntries (10) entries + attrs := make([]attributeCardinality, 15) + for i := range 15 { + attrs[i] = attributeCardinality{ + key: "attr" + strconv.Itoa(i), + uniqueValues: 15 - i, // descending order + } + } + + result := formatAttributeCardinality(attrs) + + // Should only have first 10 entries plus ",..." + assert.Contains(t, result, "attr0(15)") + assert.Contains(t, result, "attr9(6)") + assert.NotContains(t, result, "attr10") + assert.True(t, strings.HasSuffix(result, ",..."), "should end with truncation indicator") +} + +func TestFormatAttributeCardinality_ExactlyMax(t *testing.T) { + // Exactly maxLostAttributesEntries (10) entries should not be truncated + attrs := make([]attributeCardinality, 10) + for i := range 10 { + attrs[i] = attributeCardinality{ + key: "attr" + strconv.Itoa(i), + uniqueValues: 10 - i, + } + } + + result := formatAttributeCardinality(attrs) + + assert.NotContains(t, result, "...", "should not have truncation indicator at exactly max") + assert.Contains(t, result, "attr9(1)") +} + +// Integration test: verify diverse_attributes is set on parent summary spans +func TestLeafSpanPruning_DiverseAttributesOnParentSummary(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.MaxParentDepth = -1 // Enable parent aggregation + cfg.EnableAttributeLossAnalysis = true + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with parent spans that have different attributes + td := createTestTraceWithDiverseParentAttributes(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Find the parent aggregated span + handlerAgg, found := findSummarySpanByName(td, "handler") + require.True(t, found, "handler summary should exist") + + // Check for diverse_attributes attribute + diverseAttrs, exists := handlerAgg.Attributes().Get("aggregation.diverse_attributes") + require.True(t, exists, "diverse_attributes should exist on parent summary span") + + // Verify the format contains user.id (which has different values) + diverseAttrsStr := diverseAttrs.Str() + assert.Contains(t, diverseAttrsStr, "user.id", "should contain user.id") + assert.Contains(t, diverseAttrsStr, "(2)", "should show 2 lost values (3 unique - 1 kept)") +} + +func TestLeafSpanPruning_NoAttributeLossOnIdenticalLeafSpans(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.EnableAttributeLossAnalysis = true + cfg.MaxParentDepth = 0 // Disable parent aggregation + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with identical leaf spans + td := createTestTraceWithLeafSpans(t, 3, "SELECT", map[string]string{"db.operation": "select"}) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Find the leaf summary span + summarySpan, found := findSummarySpanByName(td, "SELECT") + require.True(t, found, "SELECT summary should exist") + + // Leaf spans are grouped by identical attributes, so no loss attributes + _, diverseExists := summarySpan.Attributes().Get("aggregation.diverse_attributes") + _, missingExists := summarySpan.Attributes().Get("aggregation.missing_attributes") + assert.False(t, diverseExists, "diverse_attributes should NOT exist on identical spans") + assert.False(t, missingExists, "missing_attributes should NOT exist on identical spans") +} + +// Integration test: verify diverse_attributes is set on leaf summary spans with diverse attributes +func TestLeafSpanPruning_DiverseAttributesOnLeafSummary(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.EnableAttributeLossAnalysis = true + cfg.MaxParentDepth = 0 // Disable parent aggregation + cfg.GroupByAttributes = []string{"db.operation"} // Only group by db.operation + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with leaf spans that have same db.operation but different db.statement + td := createTestTraceWithDiverseLeafAttributes(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Find the leaf summary span + summarySpan, found := findSummarySpanByName(td, "db_query") + require.True(t, found, "db_query summary should exist") + + // Leaf spans have diverse db.statement values + diverseAttrs, exists := summarySpan.Attributes().Get("aggregation.diverse_attributes") + require.True(t, exists, "diverse_attributes should exist on leaf summary span with diverse attributes") + + // Verify the format contains the attribute key with diversity + diverseAttrsStr := diverseAttrs.Str() + assert.Contains(t, diverseAttrsStr, "db.statement", "should contain db.statement") + assert.Contains(t, diverseAttrsStr, "(2)", "should show 2 lost values (3 unique - 1 kept)") +} + +// Integration test: verify missing_attributes is set when attributes are absent from some spans +func TestLeafSpanPruning_MissingAttributesOnLeafSummary(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.EnableAttributeLossAnalysis = true + cfg.MaxParentDepth = 0 + cfg.GroupByAttributes = []string{"db.operation"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with leaf spans where some have extra attributes + td := createTestTraceWithMissingLeafAttributes(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Find the leaf summary span + summarySpan, found := findSummarySpanByName(td, "db_query") + require.True(t, found, "db_query summary should exist") + + // Check for missing_attributes + missingAttrs, exists := summarySpan.Attributes().Get("aggregation.missing_attributes") + require.True(t, exists, "missing_attributes should exist") + + missingAttrsStr := missingAttrs.Str() + assert.Contains(t, missingAttrsStr, "extra.attr", "should contain extra attributes (missing from some spans)") +} + +// Helper function to create test span nodes +func createTestSpanNodes(t *testing.T, attrSets []map[string]string) []*spanNode { + t.Helper() + + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + nodes := make([]*spanNode, 0, len(attrSets)) + for i, attrs := range attrSets { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("test") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + + for k, v := range attrs { + span.Attributes().PutStr(k, v) + } + + nodes = append(nodes, &spanNode{ + span: span, + scopeSpans: ss, + }) + } + + return nodes +} + +// Helper to create trace with diverse parent attributes +func createTestTraceWithDiverseParentAttributes(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + rootSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(traceID) + root.SetSpanID(rootSpanID) + root.SetName("root") + root.Status().SetCode(ptrace.StatusCodeOk) + + // 3 handler spans with different user.id attributes + userIDs := []string{"user-001", "user-002", "user-003"} + for i := range 3 { + handlerID := pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0}) + handler := ss.Spans().AppendEmpty() + handler.SetTraceID(traceID) + handler.SetSpanID(handlerID) + handler.SetParentSpanID(rootSpanID) + handler.SetName("handler") + handler.Status().SetCode(ptrace.StatusCodeOk) + handler.Attributes().PutStr("user.id", userIDs[i]) + handler.Attributes().PutStr("http.method", "GET") // Same for all + + // Each handler has a leaf SELECT span (for aggregation trigger) + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + selectSpan.SetParentSpanID(handlerID) + selectSpan.SetName("SELECT") + selectSpan.Status().SetCode(ptrace.StatusCodeOk) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +// Helper to create trace with diverse leaf attributes (same grouping key, different other attributes) +func createTestTraceWithDiverseLeafAttributes(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parent := ss.Spans().AppendEmpty() + parent.SetTraceID(traceID) + parent.SetSpanID(parentSpanID) + parent.SetName("parent") + parent.Status().SetCode(ptrace.StatusCodeOk) + + // 3 leaf spans with same db.operation (grouping key) but different db.statement + statements := []string{ + "SELECT * FROM users WHERE id=1", + "SELECT * FROM users WHERE id=2", + "SELECT * FROM orders WHERE user_id=1", + } + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Status().SetCode(ptrace.StatusCodeOk) + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + span.Attributes().PutStr("db.operation", "select") // Same for all (grouping key) + span.Attributes().PutStr("db.statement", statements[i]) // Different for each + } + + return td +} + +// Helper to create trace with missing leaf attributes (some spans have extra attrs) +func createTestTraceWithMissingLeafAttributes(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parent := ss.Spans().AppendEmpty() + parent.SetTraceID(traceID) + parent.SetSpanID(parentSpanID) + parent.SetName("parent") + parent.Status().SetCode(ptrace.StatusCodeOk) + + // 3 leaf spans with varying attribute presence + // Regardless of which span becomes template, some attributes will be missing + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Status().SetCode(ptrace.StatusCodeOk) + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + span.Attributes().PutStr("db.operation", "select") // Same for all + + // Each span has a different extra attribute + // This ensures missing_attributes is always set regardless of template selection + switch i { + case 0: + span.Attributes().PutStr("extra.attr0", "value0") + case 1: + span.Attributes().PutStr("extra.attr1", "value1") + case 2: + span.Attributes().PutStr("extra.attr2", "value2") + } + } + + return td +} + +// Test that attribute loss analysis is skipped when EnableAttributeLossAnalysis is false +func TestLeafSpanPruning_AttributeLossAnalysisDisabled(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.EnableAttributeLossAnalysis = false // Explicitly disabled + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with spans that WOULD have diverse attributes if analysis was enabled + td := createTestTraceWithDiverseLeafAttributes(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Find the summary span + summarySpan, found := findSummarySpanByName(td, "db_query") + require.True(t, found, "db_query summary should exist") + + // Verify NO attribute loss attributes are added when analysis is disabled + _, diverseExists := summarySpan.Attributes().Get("aggregation.diverse_attributes") + _, missingExists := summarySpan.Attributes().Get("aggregation.missing_attributes") + + assert.False(t, diverseExists, "diverse_attributes should NOT exist when EnableAttributeLossAnalysis is false") + assert.False(t, missingExists, "missing_attributes should NOT exist when EnableAttributeLossAnalysis is false") + + // Verify aggregation still works (span_count should exist) + spanCount, exists := summarySpan.Attributes().Get("aggregation.span_count") + assert.True(t, exists, "span_count should exist") + assert.Equal(t, int64(3), spanCount.Int()) +} diff --git a/processor/spanpruningprocessor/benchmark_testdata_test.go b/processor/spanpruningprocessor/benchmark_testdata_test.go new file mode 100644 index 0000000000000..631701420edb5 --- /dev/null +++ b/processor/spanpruningprocessor/benchmark_testdata_test.go @@ -0,0 +1,241 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "fmt" + "time" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// testTraceID is a fixed trace ID used across all test trace generators. +var testTraceID = pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + +// makeSpanID converts a uint64 to a SpanID. +func makeSpanID(id uint64) pcommon.SpanID { + return pcommon.SpanID([8]byte{ + byte(id >> 56), byte(id >> 48), byte(id >> 40), byte(id >> 32), + byte(id >> 24), byte(id >> 16), byte(id >> 8), byte(id), + }) +} + +// generateTestTrace creates a flat test trace: root -> parents -> leaves. +func generateTestTrace(numSpans, leafSpansPerParent int) ptrace.Traces { + td := ptrace.NewTraces() + ss := td.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty() + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(testTraceID) + root.SetSpanID(makeSpanID(1)) + root.SetName("root") + root.SetKind(ptrace.SpanKindServer) + root.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + root.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(time.Second))) + root.Status().SetCode(ptrace.StatusCodeOk) + + spanID := uint64(2) + numParents := max((numSpans-1)/leafSpansPerParent, 1) + + // Parent spans + parentIDs := make([]pcommon.SpanID, 0, numParents) + for i := 0; i < numParents && spanID < uint64(numSpans); i++ { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + id := makeSpanID(spanID) + span.SetSpanID(id) + span.SetParentSpanID(root.SpanID()) + span.SetName(fmt.Sprintf("parent-%d", i)) + span.SetKind(ptrace.SpanKindInternal) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(100 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + parentIDs = append(parentIDs, id) + spanID++ + } + + // Leaf spans + for i := 0; spanID < uint64(numSpans+1); i++ { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + span.SetSpanID(makeSpanID(spanID)) + span.SetParentSpanID(parentIDs[i%len(parentIDs)]) + span.SetName("leaf-operation") + span.SetKind(ptrace.SpanKindClient) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(10 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + span.Attributes().PutStr("http.method", "GET") + span.Attributes().PutStr("http.url", "/api/data") + spanID++ + } + + return td +} + +// generateSparseTrace creates a trace where only a small fraction aggregates. +func generateSparseTrace(numSpans, minSpans int) ptrace.Traces { + td := ptrace.NewTraces() + ss := td.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty() + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(testTraceID) + root.SetSpanID(makeSpanID(1)) + root.SetName("root") + root.SetKind(ptrace.SpanKindServer) + root.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + root.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(time.Second))) + root.Status().SetCode(ptrace.StatusCodeOk) + + spanID := uint64(2) + + // Handler spans (unique names, won't aggregate) + numHandlers := numSpans / 10 + handlerIDs := make([]pcommon.SpanID, 0, numHandlers) + for i := range numHandlers { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + id := makeSpanID(spanID) + span.SetSpanID(id) + span.SetParentSpanID(root.SpanID()) + span.SetName(fmt.Sprintf("handler-%d", i)) + span.SetKind(ptrace.SpanKindInternal) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(100 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + handlerIDs = append(handlerIDs, id) + spanID++ + } + + // Unique leaf spans (won't aggregate) + numRepeated := minSpans * 2 + numUnique := numSpans - numHandlers - 1 - numRepeated + for i := 0; i < numUnique && spanID < uint64(numSpans+1); i++ { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + span.SetSpanID(makeSpanID(spanID)) + span.SetParentSpanID(handlerIDs[i%len(handlerIDs)]) + span.SetName(fmt.Sprintf("unique-op-%d", i)) + span.SetKind(ptrace.SpanKindClient) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(10 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + span.Attributes().PutStr("db.system", "postgresql") + spanID++ + } + + // Repeated leaf spans (will aggregate) + if len(handlerIDs) > 0 { + targetHandler := handlerIDs[0] + for i := 0; i < numRepeated && spanID < uint64(numSpans+1); i++ { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + span.SetSpanID(makeSpanID(spanID)) + span.SetParentSpanID(targetHandler) + span.SetName("SELECT") + span.SetKind(ptrace.SpanKindClient) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(10 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + span.Attributes().PutStr("db.system", "postgresql") + span.Attributes().PutStr("db.operation", "select") + spanID++ + } + } + + return td +} + +// generateDeepTrace creates a trace with specified depth and branching factor. +// Each level has spans with the same name, enabling parent aggregation. +func generateDeepTrace(depth, branchingFactor, leafsPerBranch, maxSpans int) ptrace.Traces { + td := ptrace.NewTraces() + ss := td.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty() + + spanID := uint64(1) + totalSpans := 0 + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(testTraceID) + root.SetSpanID(makeSpanID(spanID)) + root.SetName("root") + root.SetKind(ptrace.SpanKindServer) + root.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + root.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(time.Second))) + root.Status().SetCode(ptrace.StatusCodeOk) + spanID++ + totalSpans++ + + // Build tree level by level + currentLevel := []pcommon.SpanID{root.SpanID()} + + for d := 1; d < depth && totalSpans < maxSpans; d++ { + nextLevel := make([]pcommon.SpanID, 0, len(currentLevel)*branchingFactor) + levelName := fmt.Sprintf("level-%d", d) + + for _, parentID := range currentLevel { + for b := 0; b < branchingFactor && totalSpans < maxSpans; b++ { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + id := makeSpanID(spanID) + span.SetSpanID(id) + span.SetParentSpanID(parentID) + span.SetName(levelName) + span.SetKind(ptrace.SpanKindInternal) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(100 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + nextLevel = append(nextLevel, id) + spanID++ + totalSpans++ + } + } + currentLevel = nextLevel + } + + // Add leaf spans + for _, parentID := range currentLevel { + for l := 0; l < leafsPerBranch && totalSpans < maxSpans; l++ { + span := ss.Spans().AppendEmpty() + span.SetTraceID(testTraceID) + span.SetSpanID(makeSpanID(spanID)) + span.SetParentSpanID(parentID) + span.SetName("db-query") + span.SetKind(ptrace.SpanKindClient) + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now())) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(10 * time.Millisecond))) + span.Status().SetCode(ptrace.StatusCodeOk) + span.Attributes().PutStr("db.system", "postgresql") + span.Attributes().PutStr("db.operation", "select") + spanID++ + totalSpans++ + } + } + + return td +} + +// generateTestSpans extracts spanInfo slice from a trace for tree benchmarks. +func generateTestSpans(numSpans, leafSpansPerParent int) []spanInfo { + td := generateTestTrace(numSpans, leafSpansPerParent) + spans := make([]spanInfo, 0, numSpans) + + for i := 0; i < td.ResourceSpans().Len(); i++ { + for j := 0; j < td.ResourceSpans().At(i).ScopeSpans().Len(); j++ { + ss := td.ResourceSpans().At(i).ScopeSpans().At(j) + for k := 0; k < ss.Spans().Len(); k++ { + spans = append(spans, spanInfo{ + span: ss.Spans().At(k), + scopeSpans: ss, + }) + } + } + } + + return spans +} diff --git a/processor/spanpruningprocessor/config.go b/processor/spanpruningprocessor/config.go new file mode 100644 index 0000000000000..dd7b8fc645e87 --- /dev/null +++ b/processor/spanpruningprocessor/config.go @@ -0,0 +1,250 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "errors" + "fmt" + "strings" + "time" + + "github.com/gobwas/glob" + "go.opentelemetry.io/collector/component" +) + +// OutlierMethod defines the statistical method for outlier detection. +type OutlierMethod string + +const ( + // OutlierMethodIQR uses Interquartile Range for outlier detection. + // Threshold: Q3 + (IQRMultiplier * IQR) + OutlierMethodIQR OutlierMethod = "iqr" + + // OutlierMethodMAD uses Median Absolute Deviation for outlier detection. + // Threshold: median + (MADMultiplier * MAD * 1.4826) + // MAD is more robust to extreme outliers than IQR. + OutlierMethodMAD OutlierMethod = "mad" +) + +// OutlierAnalysisConfig controls outlier detection and attribute correlation. +type OutlierAnalysisConfig struct { + // Method selects the statistical method for outlier detection. + // Valid values: "iqr" (default), "mad" + Method OutlierMethod `mapstructure:"method"` + + // IQRMultiplier sets the threshold for IQR-based outlier detection. + // Outliers are spans with duration > Q3 + (IQRMultiplier * IQR). + // Common values: 1.5 (standard), 3.0 (extreme only). + // Default: 1.5 + IQRMultiplier float64 `mapstructure:"iqr_multiplier"` + + // MADMultiplier sets the threshold for MAD-based outlier detection. + // Outliers are spans with duration > median + (MADMultiplier * MAD * 1.4826). + // Common values: 2.5-3.0 (standard), 3.5+ (extreme only). + // Default: 3.0 + MADMultiplier float64 `mapstructure:"mad_multiplier"` + + // MinGroupSize is the minimum number of spans needed for reliable + // outlier detection. Groups smaller than this skip outlier analysis. + // Must be at least 4 (need quartiles). + // Default: 7 + MinGroupSize int `mapstructure:"min_group_size"` + + // CorrelationMinOccurrence is the minimum fraction of outliers that must + // share an attribute value for it to be reported as correlated. + // Range: (0.0, 1.0] + // Default: 0.75 (75% of outliers must share the value) + CorrelationMinOccurrence float64 `mapstructure:"correlation_min_occurrence"` + + // CorrelationMaxNormalOccurrence is the maximum fraction of normal spans + // that can have the correlated value. Lower values mean stronger signal. + // Range: [0.0, 1.0) + // Default: 0.25 (at most 25% of normal spans can have the value) + CorrelationMaxNormalOccurrence float64 `mapstructure:"correlation_max_normal_occurrence"` + + // MaxCorrelatedAttributes limits how many correlated attributes are + // reported in the summary span attribute. + // Default: 5 + MaxCorrelatedAttributes int `mapstructure:"max_correlated_attributes"` + + // PreserveOutliers controls whether outlier spans are kept as individual + // spans instead of being aggregated. When true, only normal spans are + // aggregated; outliers remain in the trace. + // Default: false (aggregate all, add summary attributes) + PreserveOutliers bool `mapstructure:"preserve_outliers"` + + // MaxPreservedOutliers limits how many outlier spans are preserved per + // aggregation group. Spans are selected by most extreme duration first. + // 0 = preserve all detected outliers. + // Default: 2 + MaxPreservedOutliers int `mapstructure:"max_preserved_outliers"` + + // PreserveOnlyWithCorrelation only preserves outliers when a strong + // attribute correlation is found. This avoids preserving outliers that + // are just random variance. + // Default: false + PreserveOnlyWithCorrelation bool `mapstructure:"preserve_only_with_correlation"` + + // MinOutlierThresholdPercent sets a minimum percentage above median that + // a span must exceed to be considered an outlier, regardless of statistical + // method. This prevents overly sensitive outlier detection when IQR or MAD + // is zero (tightly clustered data) or produces very small thresholds. + // Range: [0.0, 1.0+] + // Default: 0.1 (10% above median) + MinOutlierThresholdPercent float64 `mapstructure:"min_outlier_threshold_percent"` +} + +// Config defines the configuration options for the span pruning processor +// and the rules used to identify and aggregate similar spans. +type Config struct { + // GroupByAttributes lists attribute patterns used to decide which leaf spans + // belong in the same aggregation group. Spans must share the span name and + // have identical values for every matched attribute to be grouped. Patterns + // accept glob syntax, for example: + // - "db.*" matches db.operation, db.name, db.statement, etc. + // - "http.request.*" matches http.request.method, http.request.header, etc. + // - "service" matches only the exact key "service" + // Examples: ["db.*", "http.method"], ["rpc.*"]. + GroupByAttributes []string `mapstructure:"group_by_attributes"` + + // MinSpansToAggregate is the minimum number of similar spans required before + // aggregation occurs. Groups smaller than this threshold are preserved. + // Default: 5 + MinSpansToAggregate int `mapstructure:"min_spans_to_aggregate"` + + // MaxParentDepth bounds how many ancestor levels above the aggregated leaves + // can also be aggregated. Use 0 to aggregate only leaves, -1 for unlimited + // depth, or a positive integer to cap traversal. + // Default: 1 + MaxParentDepth int `mapstructure:"max_parent_depth"` + + // AggregationAttributePrefix prefixes all aggregation-related attributes that + // are added to summary spans. + // Default: "aggregation." + AggregationAttributePrefix string `mapstructure:"aggregation_attribute_prefix"` + + // AggregationHistogramBuckets lists cumulative histogram bucket upper bounds + // for latency tracking on aggregated spans. Empty slice disables histograms. + // Example: [5*time.Millisecond, 10*time.Millisecond, 100*time.Millisecond] + // Default: [5*time.Millisecond, 10*time.Millisecond, 25*time.Millisecond, 50*time.Millisecond, 100*time.Millisecond, 250*time.Millisecond, 500*time.Millisecond, time.Second, 2500*time.Millisecond, 5*time.Second, 10*time.Second] + AggregationHistogramBuckets []time.Duration `mapstructure:"aggregation_histogram_buckets"` + + // EnableAttributeLossAnalysis toggles analysis of attribute loss during + // aggregation. When enabled, the processor compares attribute sets across + // aggregated spans, records loss metrics, and annotates summary spans. + // Default: false (to reduce telemetry overhead) + EnableAttributeLossAnalysis bool `mapstructure:"enable_attribute_loss_analysis"` + + // AttributeLossExemplarSampleRate controls the fraction of attribute-loss + // metric recordings that include exemplars when loss analysis is enabled. + // Range: 0.0 (disabled) to 1.0 (always). Default: 0.01 (1%). + AttributeLossExemplarSampleRate float64 `mapstructure:"attribute_loss_exemplar_sample_rate"` + + // EnableOutlierAnalysis toggles IQR-based outlier detection and attribute + // correlation. When enabled, adds duration_median_ns and outlier_correlated_attributes + // to summary spans. + // Default: false + EnableOutlierAnalysis bool `mapstructure:"enable_outlier_analysis"` + + // EnableBytesMetrics toggles measurement of serialized trace sizes before + // and after pruning. When enabled, records bytes_received and bytes_emitted + // metrics. This requires serializing the trace data which can be expensive + // for large batches. + // Default: false + EnableBytesMetrics bool `mapstructure:"enable_bytes_metrics"` + + // OutlierAnalysis configures IQR-based outlier detection and + // attribute correlation for aggregation groups. + OutlierAnalysis OutlierAnalysisConfig `mapstructure:"outlier_analysis"` +} + +var _ component.Config = (*Config)(nil) + +// Validate checks if the processor configuration is valid +func (cfg *Config) Validate() error { + if cfg.MinSpansToAggregate < 2 { + return errors.New("min_spans_to_aggregate must be at least 2") + } + + if cfg.MaxParentDepth < -1 { + return errors.New("max_parent_depth must be -1 (unlimited) or >= 0") + } + + // Validate AggregationAttributePrefix + prefix := strings.TrimSpace(cfg.AggregationAttributePrefix) + if prefix == "" { + return errors.New("aggregation_attribute_prefix cannot be empty") + } + if strings.ContainsAny(prefix, " \t\n\r") { + return errors.New("aggregation_attribute_prefix cannot contain whitespace") + } + + // Validate GroupByAttributes glob patterns + for i, pattern := range cfg.GroupByAttributes { + if strings.TrimSpace(pattern) == "" { + return fmt.Errorf("group_by_attributes[%d] cannot be empty", i) + } + // Try to compile the same way processor.go does to catch invalid syntax early + _, err := glob.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid glob pattern at group_by_attributes[%d]: %q: %w", i, pattern, err) + } + } + + // Validate histogram buckets + for i, bucket := range cfg.AggregationHistogramBuckets { + if bucket <= 0 { + return errors.New("histogram bucket values must be positive") + } + if i > 0 && bucket <= cfg.AggregationHistogramBuckets[i-1] { + return errors.New("histogram buckets must be sorted in ascending order") + } + } + + // Validate AttributeLossExemplarSampleRate + if cfg.AttributeLossExemplarSampleRate < 0 || cfg.AttributeLossExemplarSampleRate > 1 { + return errors.New("attribute_loss_exemplar_sample_rate must be between 0.0 and 1.0") + } + + if err := cfg.OutlierAnalysis.Validate(cfg.EnableOutlierAnalysis); err != nil { + return err + } + + return nil +} + +// Validate checks OutlierAnalysisConfig for invalid values. +func (cfg *OutlierAnalysisConfig) Validate(enabled bool) error { + if !enabled { + return nil // Skip validation when disabled + } + if cfg.Method != "" && cfg.Method != OutlierMethodIQR && cfg.Method != OutlierMethodMAD { + return fmt.Errorf("outlier_analysis.method must be %q or %q", OutlierMethodIQR, OutlierMethodMAD) + } + if cfg.IQRMultiplier <= 0 { + return errors.New("outlier_analysis.iqr_multiplier must be positive") + } + if cfg.MADMultiplier <= 0 { + return errors.New("outlier_analysis.mad_multiplier must be positive") + } + if cfg.MinGroupSize < 4 { + return errors.New("outlier_analysis.min_group_size must be at least 4") + } + if cfg.CorrelationMinOccurrence <= 0 || cfg.CorrelationMinOccurrence > 1 { + return errors.New("outlier_analysis.correlation_min_occurrence must be in range (0.0, 1.0]") + } + if cfg.CorrelationMaxNormalOccurrence < 0 || cfg.CorrelationMaxNormalOccurrence >= 1 { + return errors.New("outlier_analysis.correlation_max_normal_occurrence must be in range [0.0, 1.0)") + } + if cfg.MaxCorrelatedAttributes < 1 { + return errors.New("outlier_analysis.max_correlated_attributes must be at least 1") + } + if cfg.PreserveOutliers && cfg.MaxPreservedOutliers < 0 { + return errors.New("outlier_analysis.max_preserved_outliers must be >= 0") + } + if cfg.MinOutlierThresholdPercent < 0 { + return errors.New("outlier_analysis.min_outlier_threshold_percent must be >= 0") + } + return nil +} diff --git a/processor/spanpruningprocessor/config_test.go b/processor/spanpruningprocessor/config_test.go new file mode 100644 index 0000000000000..4c1568cc117bb --- /dev/null +++ b/processor/spanpruningprocessor/config_test.go @@ -0,0 +1,429 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap/confmaptest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" +) + +var defaultHistogramBuckets = []time.Duration{ + 5 * time.Millisecond, + 10 * time.Millisecond, + 25 * time.Millisecond, + 50 * time.Millisecond, + 100 * time.Millisecond, + 250 * time.Millisecond, + 500 * time.Millisecond, + time.Second, + 2500 * time.Millisecond, + 5 * time.Second, + 10 * time.Second, +} + +func TestLoadConfig(t *testing.T) { + t.Parallel() + + tests := []struct { + id component.ID + expected *Config + errorMessage string + }{ + { + id: component.NewIDWithName(metadata.Type, ""), + expected: &Config{ + GroupByAttributes: []string{"db.operation"}, + MinSpansToAggregate: 5, + MaxParentDepth: 1, + AggregationAttributePrefix: "aggregation.", + AggregationHistogramBuckets: defaultHistogramBuckets, + EnableAttributeLossAnalysis: false, + AttributeLossExemplarSampleRate: 0.01, + EnableOutlierAnalysis: false, + OutlierAnalysis: OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + PreserveOutliers: false, + MaxPreservedOutliers: 2, + PreserveOnlyWithCorrelation: false, + MinOutlierThresholdPercent: 0.1, + }, + }, + }, + { + id: component.NewIDWithName(metadata.Type, "custom"), + expected: &Config{ + GroupByAttributes: []string{"db.operation", "db.name"}, + MinSpansToAggregate: 3, + MaxParentDepth: 1, + AggregationAttributePrefix: "batch.", + AggregationHistogramBuckets: defaultHistogramBuckets, + EnableAttributeLossAnalysis: false, + AttributeLossExemplarSampleRate: 0.01, + EnableOutlierAnalysis: false, + OutlierAnalysis: OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + PreserveOutliers: false, + MaxPreservedOutliers: 2, + PreserveOnlyWithCorrelation: false, + MinOutlierThresholdPercent: 0.1, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.id.String(), func(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "config.yaml")) + require.NoError(t, err) + + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(tt.id.String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + oCfg := cfg.(*Config) + if tt.errorMessage != "" { + assert.EqualError(t, oCfg.Validate(), tt.errorMessage) + return + } + + assert.NoError(t, oCfg.Validate()) + assert.Equal(t, tt.expected, oCfg) + }) + } +} + +func TestConfig_Validate(t *testing.T) { + tests := []struct { + name string + config *Config + expectError bool + }{ + { + name: "valid config", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + GroupByAttributes: []string{"db.operation"}, + }, + expectError: false, + }, + { + name: "min_spans_to_aggregate below minimum", + config: &Config{ + MinSpansToAggregate: 1, + }, + expectError: true, + }, + { + name: "min_spans_to_aggregate zero", + config: &Config{ + MinSpansToAggregate: 0, + }, + expectError: true, + }, + { + name: "min_spans_to_aggregate negative", + config: &Config{ + MinSpansToAggregate: -1, + }, + expectError: true, + }, + { + name: "empty aggregation_attribute_prefix", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "", + }, + expectError: true, + }, + { + name: "whitespace-only aggregation_attribute_prefix", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: " ", + }, + expectError: true, + }, + { + name: "empty group_by_attributes pattern", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + GroupByAttributes: []string{"db.operation", ""}, + }, + expectError: true, + }, + { + name: "whitespace-only group_by_attributes pattern", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + GroupByAttributes: []string{"db.operation", " "}, + }, + expectError: true, + }, + { + name: "invalid glob pattern in group_by_attributes", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + GroupByAttributes: []string{"db.operation", "[invalid*"}, + }, + expectError: true, + }, + + { + name: "max_parent_depth unlimited", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + MaxParentDepth: -1, + }, + expectError: false, + }, + { + name: "invalid attribute_loss_exemplar_sample_rate negative", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + AttributeLossExemplarSampleRate: -0.1, + }, + expectError: true, + }, + { + name: "invalid attribute_loss_exemplar_sample_rate > 1", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + AttributeLossExemplarSampleRate: 1.5, + }, + expectError: true, + }, + { + name: "valid outlier analysis config", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: false, + }, + { + name: "valid outlier analysis with MAD method", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + Method: OutlierMethodMAD, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: false, + }, + { + name: "invalid outlier method", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + Method: "invalid", + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: true, + }, + { + name: "outlier analysis disabled skips validation", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: false, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: -1, // invalid but ignored when disabled + }, + }, + expectError: false, + }, + { + name: "invalid outlier iqr_multiplier", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: 0, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: true, + }, + { + name: "invalid outlier mad_multiplier", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MADMultiplier: 0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: true, + }, + { + name: "invalid outlier min_group_size", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MinGroupSize: 3, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: true, + }, + { + name: "invalid outlier correlation_min_occurrence zero", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: true, + }, + { + name: "invalid outlier correlation_max_normal_occurrence", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 1.0, + MaxCorrelatedAttributes: 5, + }, + }, + expectError: true, + }, + { + name: "invalid outlier max_correlated_attributes", + config: &Config{ + MinSpansToAggregate: 2, + AggregationAttributePrefix: "aggregation.", + EnableOutlierAnalysis: true, + OutlierAnalysis: OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 0, + }, + }, + expectError: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestEnableAttributeLossAnalysis(t *testing.T) { + factory := NewFactory() + + t.Run("disabled by default", func(t *testing.T) { + cfg := factory.CreateDefaultConfig().(*Config) + assert.False(t, cfg.EnableAttributeLossAnalysis) + }) + + t.Run("has correct sample rate default", func(t *testing.T) { + cfg := factory.CreateDefaultConfig().(*Config) + assert.Equal(t, 0.01, cfg.AttributeLossExemplarSampleRate) + }) + + t.Run("can be enabled", func(t *testing.T) { + c := &Config{ + EnableAttributeLossAnalysis: true, + } + assert.True(t, c.EnableAttributeLossAnalysis) + }) + + t.Run("can be disabled explicitly", func(t *testing.T) { + c := &Config{ + EnableAttributeLossAnalysis: false, + } + assert.False(t, c.EnableAttributeLossAnalysis) + }) +} diff --git a/processor/spanpruningprocessor/coverage.out b/processor/spanpruningprocessor/coverage.out new file mode 100644 index 0000000000000..11ba6292d16d8 --- /dev/null +++ b/processor/spanpruningprocessor/coverage.out @@ -0,0 +1,414 @@ +mode: set +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:15.63,17.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:19.64,21.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:51.68,53.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:56.45,59.44 3 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:59.44,61.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:66.126,68.29 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:68.29,70.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go:71.2,155.23 29 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:35.59,36.21 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:36.21,38.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:39.2,43.33 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:43.33,45.33 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:45.33,48.4 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:50.2,50.16 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:55.38,59.2 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:63.103,66.26 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:66.26,68.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:71.2,71.45 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:71.45,73.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:76.2,76.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:76.28,78.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:80.2,80.44 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:85.78,95.29 5 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:95.29,106.76 5 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:106.76,108.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:111.3,114.39 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:114.39,115.52 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:115.52,121.5 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:125.3,125.36 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:125.36,129.40 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:129.40,131.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:132.4,132.50 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:134.3,134.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:138.2,138.49 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:138.49,139.59 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:139.59,142.4 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:145.2,145.20 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:151.147,184.20 20 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:184.20,186.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:189.2,189.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:189.34,193.50 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:193.50,196.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:199.3,199.39 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:199.39,205.52 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:205.52,207.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:212.2,212.51 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:212.51,215.63 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:215.63,217.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:220.3,221.43 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:221.43,223.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:227.2,227.37 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:227.37,229.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:230.2,230.37 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:230.37,232.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/aggregation.go:234.2,234.16 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:30.46,32.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:40.87,41.39 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:41.39,43.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:45.2,54.29 5 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:54.29,55.69 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:55.69,56.33 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:56.33,58.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:59.4,61.15 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:65.2,67.43 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:67.43,71.26 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:71.26,76.23 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:76.23,79.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:79.10,82.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:83.4,83.21 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:83.21,88.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:89.9,89.29 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:89.29,96.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:100.2,100.49 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:100.49,101.41 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:101.41,102.54 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:102.54,104.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:105.4,105.38 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:108.2,111.15 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:120.70,121.21 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:121.21,123.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:125.2,127.15 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:127.15,129.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:131.2,132.37 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:132.37,133.12 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:133.12,135.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:136.3,139.20 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:142.2,142.15 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:142.15,144.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/attribute_loss.go:146.2,146.20 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:150.37,151.33 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:151.33,153.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:155.2,155.29 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:155.29,157.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:160.2,161.18 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:161.18,163.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:164.2,164.44 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:164.44,166.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:169.2,169.48 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:169.48,170.39 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:170.39,172.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:174.3,175.17 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:175.17,177.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:181.2,181.57 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:181.57,182.18 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:182.18,184.4 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:185.3,185.62 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:185.62,187.4 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:191.2,191.88 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:191.88,193.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:195.2,195.80 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:195.80,197.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:199.2,199.12 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:203.64,204.14 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:204.14,206.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:207.2,207.90 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:207.90,209.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:210.2,210.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:210.28,212.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:213.2,213.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:213.28,215.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:216.2,216.26 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:216.26,218.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:219.2,219.75 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:219.75,221.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:222.2,222.87 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:222.87,224.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:225.2,225.37 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:225.37,227.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:228.2,228.58 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:228.58,230.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/config.go:231.2,231.12 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:21.37,26.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:28.45,62.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:69.29,73.16 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:73.16,75.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:77.2,78.16 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:78.16,80.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/factory.go:82.2,89.44 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:17.18,19.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:25.71,40.57 9 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:40.57,41.47 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:41.47,42.31 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:42.31,44.10 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:47.3,47.14 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:51.2,52.30 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:52.30,54.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:55.2,58.27 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:58.27,63.3 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:65.2,65.25 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:70.75,79.2 7 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:84.73,86.25 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:86.25,88.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:90.2,95.24 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:95.24,99.3 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:102.2,106.22 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:111.98,114.33 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:114.33,117.3 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/grouping.go:118.2,118.15 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:38.91,40.26 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:40.26,42.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:45.2,46.29 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:46.29,52.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:53.2,53.44 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:53.44,55.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:58.2,59.18 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:59.18,61.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:63.2,66.16 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:67.24,68.90 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:69.10,70.90 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:73.2,76.17 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:76.17,77.50 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:77.50,81.4 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:85.2,85.57 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:85.57,93.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:96.2,112.3 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:123.103,128.14 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:128.14,130.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:130.8,132.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:135.2,144.30 7 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:144.30,145.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:145.34,147.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:147.9,149.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:152.2,152.46 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:163.103,168.14 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:168.14,170.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:170.8,172.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:175.2,176.30 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:176.30,178.14 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:178.14,180.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:181.3,181.22 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:185.2,188.14 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:188.14,190.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:190.8,192.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:196.2,197.14 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:197.14,200.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:200.8,203.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:206.2,208.30 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:208.30,209.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:209.34,211.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:211.9,213.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:216.2,216.46 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:227.26,236.46 6 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:236.46,237.48 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:237.48,239.34 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:239.34,240.13 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:243.4,244.55 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:244.55,246.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:247.4,249.39 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:249.39,250.13 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:253.4,259.6 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:263.2,263.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:263.28,265.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:268.2,268.47 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:268.47,269.53 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:269.53,271.4 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:272.3,272.51 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:275.2,275.39 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:275.39,277.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:279.2,279.21 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:283.87,285.30 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:285.30,286.75 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:286.75,287.24 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:287.24,289.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:290.4,291.15 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:294.2,294.15 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:298.69,299.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:299.28,301.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:303.2,304.33 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:304.33,305.12 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:305.12,307.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:308.3,311.27 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:313.2,313.20 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:317.48,319.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:327.30,328.71 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:328.71,330.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:333.2,333.72 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:333.72,335.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:338.2,339.93 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:339.93,341.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:344.2,345.39 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:345.39,347.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:349.2,352.29 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:352.29,353.47 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:353.47,355.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:355.9,357.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:361.2,361.47 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:361.47,363.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/outlier.go:365.2,365.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:43.143,46.48 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:46.48,48.17 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:48.17,50.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:51.3,53.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:56.2,62.8 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:66.66,69.2 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:73.73,75.15 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:75.15,77.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:78.2,78.15 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:78.15,80.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:81.2,81.30 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:86.113,92.2 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:96.108,101.48 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:101.48,102.68 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:102.68,104.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:106.2,113.35 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:113.35,116.3 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:119.2,119.25 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:119.25,123.3 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:125.2,125.16 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:130.99,134.33 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:134.33,137.35 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:137.35,140.37 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:140.37,147.5 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:151.2,151.19 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:157.84,160.29 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:160.29,162.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:165.2,166.33 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:166.33,168.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:171.2,179.29 5 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:179.29,181.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:187.126,190.25 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:190.25,192.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:195.2,204.42 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:204.42,205.48 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:205.48,206.12 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:209.3,213.37 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:213.37,217.57 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:217.57,219.44 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:219.44,221.6 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:225.4,225.73 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:225.73,233.35 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:233.35,235.6 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:238.5,238.59 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:238.59,239.14 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:245.3,249.36 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:249.36,254.45 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:254.45,257.5 2 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:259.4,259.27 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:259.27,268.5 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:271.3,281.39 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:281.39,283.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:284.3,287.45 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:287.45,289.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:292.2,292.33 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:292.33,294.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:298.2,298.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:298.34,300.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:303.2,306.26 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:306.26,308.69 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:308.69,309.9 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:313.3,314.32 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:314.32,315.9 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:319.3,320.40 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:320.40,323.4 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:326.3,327.46 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:327.46,328.22 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:328.22,329.13 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:332.4,333.38 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:333.38,337.58 2 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:337.58,339.45 2 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:339.45,341.7 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:346.4,350.37 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:350.37,355.46 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:355.46,358.6 2 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:360.5,360.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:360.28,369.6 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:372.4,380.31 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:380.31,382.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:383.4,383.47 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:386.3,386.28 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:386.28,387.9 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:391.3,392.10 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/processor.go:395.2,395.26 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:27.92,33.51 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:33.51,35.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:37.2,37.29 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:37.29,40.3 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:42.2,42.13 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:47.111,53.13 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:53.13,58.3 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:58.8,59.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:59.34,61.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:62.3,62.34 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:62.34,64.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:65.3,65.49 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:65.49,67.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:68.3,68.43 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:68.43,70.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:72.2,75.31 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:75.31,78.43 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:78.43,79.26 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:79.26,81.10 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:85.3,85.57 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/stats.go:85.57,87.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:36.76,41.21 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:41.21,43.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:46.2,46.29 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:46.29,53.3 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:57.2,60.37 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:60.37,62.25 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:62.25,65.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:65.9,65.63 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:65.63,69.30 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:69.30,71.5 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:72.4,72.51 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:73.9,76.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:80.2,81.37 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:81.37,82.18 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:82.18,84.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:88.2,88.19 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:88.19,91.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:91.8,91.52 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:91.52,93.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:95.2,95.27 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:95.27,98.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:100.2,100.13 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:104.45,106.2 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:111.106,112.26 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:112.26,114.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:116.2,117.34 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:117.34,118.45 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:118.45,120.4 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:122.2,122.24 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:127.67,128.27 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:128.27,130.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:132.2,135.35 3 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:135.35,136.25 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:136.25,137.47 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:137.47,140.5 2 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:144.2,144.19 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:149.82,151.17 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:151.17,153.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:156.2,156.24 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:156.24,158.3 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:161.2,161.27 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:161.27,163.3 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:166.2,166.38 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:166.38,167.59 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:167.59,169.4 1 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/tree.go:172.2,172.13 1 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:17.66,22.2 4 0 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:24.176,37.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:39.166,53.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:55.182,68.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:70.173,83.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:85.175,99.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:101.163,115.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:117.164,131.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:133.184,146.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:148.175,161.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:163.176,176.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:178.158,192.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:194.160,208.2 4 1 +github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go:210.162,224.2 4 1 diff --git a/processor/spanpruningprocessor/doc.go b/processor/spanpruningprocessor/doc.go new file mode 100644 index 0000000000000..3f125a6ff26d1 --- /dev/null +++ b/processor/spanpruningprocessor/doc.go @@ -0,0 +1,11 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:generate mdatagen metadata.yaml + +// Package spanpruningprocessor detects duplicate or similar leaf spans within a +// single trace and replaces each set with a single aggregated summary span. +// Leaf spans are spans that are never referenced as a parent by another span. +// When all children of a parent are aggregated, the parent can also be +// aggregated, preserving the trace structure while reducing volume. +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" diff --git a/processor/spanpruningprocessor/documentation.md b/processor/spanpruningprocessor/documentation.md new file mode 100644 index 0000000000000..70dce50c6ecaf --- /dev/null +++ b/processor/spanpruningprocessor/documentation.md @@ -0,0 +1,79 @@ +[comment]: <> (Code generated by mdatagen. DO NOT EDIT.) + +# spanpruning + +## Internal Telemetry + +The following telemetry is emitted by this component. + +### otelcol_processor_spanpruning_aggregation_group_size + +Distribution of spans per aggregation group [Development] + +| Unit | Metric Type | Value Type | Stability | +| ---- | ----------- | ---------- | --------- | +| {spans} | Histogram | Int | Development | + +### otelcol_processor_spanpruning_aggregations_created + +Total aggregation summary spans created [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {spans} | Sum | Int | true | Development | + +### otelcol_processor_spanpruning_outliers_correlations_detected + +Groups where outliers had correlated attributes [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {groups} | Sum | Int | true | Development | + +### otelcol_processor_spanpruning_outliers_detected + +Spans identified as outliers by analysis [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {spans} | Sum | Int | true | Development | + +### otelcol_processor_spanpruning_outliers_preserved + +Outlier spans kept (excluded from aggregation) [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {spans} | Sum | Int | true | Development | + +### otelcol_processor_spanpruning_processing_duration + +Time to process each batch of traces [Development] + +| Unit | Metric Type | Value Type | Stability | +| ---- | ----------- | ---------- | --------- | +| s | Histogram | Double | Development | + +### otelcol_processor_spanpruning_spans_pruned + +Total spans pruned/removed by aggregation [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {spans} | Sum | Int | true | Development | + +### otelcol_processor_spanpruning_spans_received + +Total spans received by the processor [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {spans} | Sum | Int | true | Development | + +### otelcol_processor_spanpruning_traces_processed + +Total traces processed [Development] + +| Unit | Metric Type | Value Type | Monotonic | Stability | +| ---- | ----------- | ---------- | --------- | --------- | +| {traces} | Sum | Int | true | Development | diff --git a/processor/spanpruningprocessor/factory.go b/processor/spanpruningprocessor/factory.go new file mode 100644 index 0000000000000..8a3457946b378 --- /dev/null +++ b/processor/spanpruningprocessor/factory.go @@ -0,0 +1,91 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "context" + "time" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/processor" + "go.opentelemetry.io/collector/processor/processorhelper" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" +) + +var processorCapabilities = consumer.Capabilities{MutatesData: true} + +// NewFactory returns a new factory for the Span Pruning processor. +func NewFactory() processor.Factory { + return processor.NewFactory( + metadata.Type, + createDefaultConfig, + processor.WithTraces(createTracesProcessor, metadata.TracesStability)) +} + +func createDefaultConfig() component.Config { + return &Config{ + MinSpansToAggregate: 5, + MaxParentDepth: 1, + AggregationAttributePrefix: "aggregation.", + AggregationHistogramBuckets: []time.Duration{ + 5 * time.Millisecond, + 10 * time.Millisecond, + 25 * time.Millisecond, + 50 * time.Millisecond, + 100 * time.Millisecond, + 250 * time.Millisecond, + 500 * time.Millisecond, + time.Second, + 2500 * time.Millisecond, + 5 * time.Second, + 10 * time.Second, + }, + EnableAttributeLossAnalysis: false, + AttributeLossExemplarSampleRate: 0.01, // 1% default + EnableOutlierAnalysis: false, + OutlierAnalysis: OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + PreserveOutliers: false, + MaxPreservedOutliers: 2, + PreserveOnlyWithCorrelation: false, + MinOutlierThresholdPercent: 0.1, + }, + } +} + +func createTracesProcessor( + ctx context.Context, + set processor.Settings, + cfg component.Config, + nextConsumer consumer.Traces, +) (processor.Traces, error) { + pCfg := cfg.(*Config) + + telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings) + if err != nil { + return nil, err + } + + p, err := newSpanPruningProcessor(set, pCfg, telemetryBuilder) + if err != nil { + return nil, err + } + + return processorhelper.NewTraces( + ctx, + set, + cfg, + nextConsumer, + p.processTraces, + processorhelper.WithCapabilities(processorCapabilities), + processorhelper.WithShutdown(p.shutdown)) +} diff --git a/processor/spanpruningprocessor/factory_test.go b/processor/spanpruningprocessor/factory_test.go new file mode 100644 index 0000000000000..0a18549eb4201 --- /dev/null +++ b/processor/spanpruningprocessor/factory_test.go @@ -0,0 +1,48 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/processor/processortest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" +) + +func TestFactory_Type(t *testing.T) { + factory := NewFactory() + assert.Equal(t, metadata.Type, factory.Type()) +} + +func TestFactory_CreateDefaultConfig(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + assert.NotNil(t, cfg) + assert.NoError(t, componenttest.CheckConfigStruct(cfg)) + + oCfg := cfg.(*Config) + assert.Equal(t, 5, oCfg.MinSpansToAggregate) + assert.Equal(t, "aggregation.", oCfg.AggregationAttributePrefix) +} + +func TestFactory_CreateTracesProcessor(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + tp, err := factory.CreateTraces( + t.Context(), + processortest.NewNopSettings(metadata.Type), + cfg, + consumertest.NewNop(), + ) + + require.NoError(t, err) + assert.NotNil(t, tp) +} diff --git a/processor/spanpruningprocessor/generated_component_test.go b/processor/spanpruningprocessor/generated_component_test.go new file mode 100644 index 0000000000000..2e0d80129dd08 --- /dev/null +++ b/processor/spanpruningprocessor/generated_component_test.go @@ -0,0 +1,153 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package spanpruningprocessor + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/confmap/confmaptest" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/plog" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.opentelemetry.io/collector/processor" + "go.opentelemetry.io/collector/processor/processortest" +) + +var typ = component.MustNewType("spanpruning") + +func TestComponentFactoryType(t *testing.T) { + require.Equal(t, typ, NewFactory().Type()) +} + +func TestComponentConfigStruct(t *testing.T) { + require.NoError(t, componenttest.CheckConfigStruct(NewFactory().CreateDefaultConfig())) +} + +func TestComponentLifecycle(t *testing.T) { + factory := NewFactory() + + tests := []struct { + createFn func(ctx context.Context, set processor.Settings, cfg component.Config) (component.Component, error) + name string + }{ + + { + name: "traces", + createFn: func(ctx context.Context, set processor.Settings, cfg component.Config) (component.Component, error) { + return factory.CreateTraces(ctx, set, cfg, consumertest.NewNop()) + }, + }, + } + + cm, err := confmaptest.LoadConf("metadata.yaml") + require.NoError(t, err) + cfg := factory.CreateDefaultConfig() + sub, err := cm.Sub("tests::config") + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(&cfg)) + + for _, tt := range tests { + t.Run(tt.name+"-shutdown", func(t *testing.T) { + c, err := tt.createFn(context.Background(), processortest.NewNopSettings(typ), cfg) + require.NoError(t, err) + err = c.Shutdown(context.Background()) + require.NoError(t, err) + }) + t.Run(tt.name+"-lifecycle", func(t *testing.T) { + c, err := tt.createFn(context.Background(), processortest.NewNopSettings(typ), cfg) + require.NoError(t, err) + host := newMdatagenNopHost() + err = c.Start(context.Background(), host) + require.NoError(t, err) + require.NotPanics(t, func() { + switch tt.name { + case "logs": + e, ok := c.(processor.Logs) + require.True(t, ok) + logs := generateLifecycleTestLogs() + if !e.Capabilities().MutatesData { + logs.MarkReadOnly() + } + err = e.ConsumeLogs(context.Background(), logs) + case "metrics": + e, ok := c.(processor.Metrics) + require.True(t, ok) + metrics := generateLifecycleTestMetrics() + if !e.Capabilities().MutatesData { + metrics.MarkReadOnly() + } + err = e.ConsumeMetrics(context.Background(), metrics) + case "traces": + e, ok := c.(processor.Traces) + require.True(t, ok) + traces := generateLifecycleTestTraces() + if !e.Capabilities().MutatesData { + traces.MarkReadOnly() + } + err = e.ConsumeTraces(context.Background(), traces) + } + }) + require.NoError(t, err) + err = c.Shutdown(context.Background()) + require.NoError(t, err) + }) + } +} + +func generateLifecycleTestLogs() plog.Logs { + logs := plog.NewLogs() + rl := logs.ResourceLogs().AppendEmpty() + rl.Resource().Attributes().PutStr("resource", "R1") + l := rl.ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + l.Body().SetStr("test log message") + l.SetTimestamp(pcommon.NewTimestampFromTime(time.Now())) + return logs +} + +func generateLifecycleTestMetrics() pmetric.Metrics { + metrics := pmetric.NewMetrics() + rm := metrics.ResourceMetrics().AppendEmpty() + rm.Resource().Attributes().PutStr("resource", "R1") + m := rm.ScopeMetrics().AppendEmpty().Metrics().AppendEmpty() + m.SetName("test_metric") + dp := m.SetEmptyGauge().DataPoints().AppendEmpty() + dp.Attributes().PutStr("test_attr", "value_1") + dp.SetIntValue(123) + dp.SetTimestamp(pcommon.NewTimestampFromTime(time.Now())) + return metrics +} + +func generateLifecycleTestTraces() ptrace.Traces { + traces := ptrace.NewTraces() + rs := traces.ResourceSpans().AppendEmpty() + rs.Resource().Attributes().PutStr("resource", "R1") + span := rs.ScopeSpans().AppendEmpty().Spans().AppendEmpty() + span.Attributes().PutStr("test_attr", "value_1") + span.SetName("test_span") + span.SetStartTimestamp(pcommon.NewTimestampFromTime(time.Now().Add(-1 * time.Second))) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(time.Now())) + return traces +} + +var _ component.Host = (*mdatagenNopHost)(nil) + +type mdatagenNopHost struct{} + +func newMdatagenNopHost() component.Host { + return &mdatagenNopHost{} +} + +func (mnh *mdatagenNopHost) GetExtensions() map[component.ID]component.Component { + return nil +} + +func (mnh *mdatagenNopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { + return nil +} diff --git a/processor/spanpruningprocessor/generated_package_test.go b/processor/spanpruningprocessor/generated_package_test.go new file mode 100644 index 0000000000000..4615b705bd160 --- /dev/null +++ b/processor/spanpruningprocessor/generated_package_test.go @@ -0,0 +1,12 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package spanpruningprocessor + +import ( + "go.uber.org/goleak" + "testing" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/spanpruningprocessor/go.mod b/processor/spanpruningprocessor/go.mod new file mode 100644 index 0000000000000..c4fd5546e5d21 --- /dev/null +++ b/processor/spanpruningprocessor/go.mod @@ -0,0 +1,55 @@ +module github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor + +go 1.24.0 + +require ( + github.com/gobwas/glob v0.2.3 + github.com/stretchr/testify v1.11.1 + go.opentelemetry.io/collector/component v1.49.0 + go.opentelemetry.io/collector/component/componenttest v0.143.0 + go.opentelemetry.io/collector/confmap v1.49.0 + go.opentelemetry.io/collector/consumer v1.49.0 + go.opentelemetry.io/collector/consumer/consumertest v0.143.0 + go.opentelemetry.io/collector/pdata v1.49.0 + go.opentelemetry.io/collector/processor v1.49.0 + go.opentelemetry.io/collector/processor/processorhelper v0.143.0 + go.opentelemetry.io/collector/processor/processortest v0.143.0 + go.opentelemetry.io/otel/metric v1.39.0 + go.opentelemetry.io/otel/sdk/metric v1.39.0 + go.opentelemetry.io/otel/trace v1.39.0 + go.uber.org/goleak v1.3.0 + go.uber.org/zap v1.27.1 +) + +require ( + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/hashicorp/go-version v1.8.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/knadh/koanf/maps v0.1.2 // indirect + github.com/knadh/koanf/providers/confmap v1.0.0 // indirect + github.com/knadh/koanf/v2 v2.3.0 // indirect + github.com/mitchellh/copystructure v1.2.0 // indirect + github.com/mitchellh/reflectwalk v1.0.2 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/collector/component/componentstatus v0.143.0 // indirect + go.opentelemetry.io/collector/consumer/xconsumer v0.143.0 // indirect + go.opentelemetry.io/collector/featuregate v1.49.0 // indirect + go.opentelemetry.io/collector/pdata/pprofile v0.143.0 // indirect + go.opentelemetry.io/collector/pdata/testdata v0.143.0 // indirect + go.opentelemetry.io/collector/pipeline v1.49.0 // indirect + go.opentelemetry.io/collector/processor/xprocessor v0.143.0 // indirect + go.opentelemetry.io/otel v1.39.0 // indirect + go.opentelemetry.io/otel/sdk v1.39.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/sys v0.39.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/processor/spanpruningprocessor/go.sum b/processor/spanpruningprocessor/go.sum new file mode 100644 index 0000000000000..9d9119baa7f81 --- /dev/null +++ b/processor/spanpruningprocessor/go.sum @@ -0,0 +1,120 @@ +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/go-version v1.8.0 h1:KAkNb1HAiZd1ukkxDFGmokVZe1Xy9HG6NUp+bPle2i4= +github.com/hashicorp/go-version v1.8.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/knadh/koanf/maps v0.1.2 h1:RBfmAW5CnZT+PJ1CVc1QSJKf4Xu9kxfQgYVQSu8hpbo= +github.com/knadh/koanf/maps v0.1.2/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= +github.com/knadh/koanf/providers/confmap v1.0.0 h1:mHKLJTE7iXEys6deO5p6olAiZdG5zwp8Aebir+/EaRE= +github.com/knadh/koanf/providers/confmap v1.0.0/go.mod h1:txHYHiI2hAtF0/0sCmcuol4IDcuQbKTybiB1nOcUo1A= +github.com/knadh/koanf/v2 v2.3.0 h1:Qg076dDRFHvqnKG97ZEsi9TAg2/nFTa9hCdcSa1lvlM= +github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= +github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= +github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= +github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/collector/component v1.49.0 h1:iJ56qiTWNtTyqafDx/X6zMukGEF8UZJA/+HNyPGVbks= +go.opentelemetry.io/collector/component v1.49.0/go.mod h1:EZd8hSQkzy/SJwahBKLF/NXsdhBEteiP4B6KXN7Ttpg= +go.opentelemetry.io/collector/component/componentstatus v0.143.0 h1:mtjfxahSl7LqreJ1fKrvmVLWv5wM6gNcmcAhFIBQLpo= +go.opentelemetry.io/collector/component/componentstatus v0.143.0/go.mod h1:7Is2U4lChyTtkOOpnPZy2bHVnj8kDETVUUnEX3UYIMY= +go.opentelemetry.io/collector/component/componenttest v0.143.0 h1:63Z2/UaFQSHnBs5fKLZ2BP9WTM7OL6CalMadq86PpeQ= +go.opentelemetry.io/collector/component/componenttest v0.143.0/go.mod h1:zUC76cTk9l+P7+0GPXgXgj8J+LxxrTD0j8EJHfX6Xa8= +go.opentelemetry.io/collector/confmap v1.49.0 h1:QUUymb4To6wgxDpD5USPkFqqsTe97vIEUmAmldXsvOM= +go.opentelemetry.io/collector/confmap v1.49.0/go.mod h1:nXdTzIrHuIJ6Q30Woy/JgeHRnCvEmao6AEFZJiP28T4= +go.opentelemetry.io/collector/consumer v1.49.0 h1:xNQxfM/5P+wYrwl6IaU35RsLA8ANM74okG1ahZdWO0c= +go.opentelemetry.io/collector/consumer v1.49.0/go.mod h1:LAzZPC8d2CpmLqXpn3K4zTM/z8a6VxA0hMGOE9MWXxo= +go.opentelemetry.io/collector/consumer/consumertest v0.143.0 h1:69w92MikFVvzV22VFkjmddELHV1V3BlIKWb4L+epcgM= +go.opentelemetry.io/collector/consumer/consumertest v0.143.0/go.mod h1:Qi4RlpzDuO/2+k+UrV9Nw0Km2UlunnN1RU8nIhsI/LA= +go.opentelemetry.io/collector/consumer/xconsumer v0.143.0 h1:m5NjAWhKczxWzsCENEmQoiKdIK0yfOR3Rn0c5J0puMQ= +go.opentelemetry.io/collector/consumer/xconsumer v0.143.0/go.mod h1:7hyToLEwxC4PwGjjTsSdLAiiABUh6Mg5poJb9BC/gP0= +go.opentelemetry.io/collector/featuregate v1.49.0 h1:4UfnqTvSvm6GkeD/w39LYLPmnZDfk4f+grkWuyl0NPU= +go.opentelemetry.io/collector/featuregate v1.49.0/go.mod h1:/1bclXgP91pISaEeNulRxzzmzMTm4I5Xih2SnI4HRSo= +go.opentelemetry.io/collector/internal/testutil v0.143.0 h1:rp3vIsOhXg/H3YXuStdggGTLuU+Udf1BdDIF/I7+Tyk= +go.opentelemetry.io/collector/internal/testutil v0.143.0/go.mod h1:YAD9EAkwh/l5asZNbEBEUCqEjoL1OKMjAMoPjPqH76c= +go.opentelemetry.io/collector/pdata v1.49.0 h1:h6V3rdLNxweI3K8B5SZzjMiVdsPPBB1TPAWwZkCtGZE= +go.opentelemetry.io/collector/pdata v1.49.0/go.mod h1:gidKN58CUnhd4DSM61UzPKWjXmG0vyoIn7dd+URZW9A= +go.opentelemetry.io/collector/pdata/pprofile v0.143.0 h1:qFrT+33PvKGr1F8yCpn3ysGWmEXYJjMvDKTGcwPKP1A= +go.opentelemetry.io/collector/pdata/pprofile v0.143.0/go.mod h1:RCZhNPEvZ1ctaPxDJ7tUdfVwGd0ee8uY4h4twq+01PE= +go.opentelemetry.io/collector/pdata/testdata v0.143.0 h1:csvYoOv8c6vD8pZ4dmkkfsjk1qVhaIUbNBWkSGx1VWo= +go.opentelemetry.io/collector/pdata/testdata v0.143.0/go.mod h1:DLjTEVsK9+lTsEuyjNKNaEdfWEM2wYeMCNl7waSlpfg= +go.opentelemetry.io/collector/pipeline v1.49.0 h1:JlczxvcgjnwMP2bm55lHt8A3eBE/qIv/Swv5twBOUpg= +go.opentelemetry.io/collector/pipeline v1.49.0/go.mod h1:xUrAqiebzYbrgxyoXSkk6/Y3oi5Sy3im2iCA51LwUAI= +go.opentelemetry.io/collector/processor v1.49.0 h1:vALRR0gW+WIoE2ERTJo381FHLUfypOsJZw3mTPA2/hw= +go.opentelemetry.io/collector/processor v1.49.0/go.mod h1:fGWONigLHkkoDODevNv6BIZIfk/gZxxIBe0QZXL1pBI= +go.opentelemetry.io/collector/processor/processorhelper v0.143.0 h1:agwy9xsJSih5vzP9cMZo/GBTOvbhR1ShyWvqbq58bIE= +go.opentelemetry.io/collector/processor/processorhelper v0.143.0/go.mod h1:mudWeMoxEX2TzWsu/kEyhthhbNhS2HEbfH48ehtbeig= +go.opentelemetry.io/collector/processor/processortest v0.143.0 h1:QPNLk7eRLQulS3EH9CMkuxV4+wte5BjlYGZoGlbz/74= +go.opentelemetry.io/collector/processor/processortest v0.143.0/go.mod h1:oGDwx8e2BeS8glxfkehswTRics/s8WGzN5LPKywoxWU= +go.opentelemetry.io/collector/processor/xprocessor v0.143.0 h1:8UXrve/Ak0c5jNI1VqTUiyxPMkMMwYEcqANgLX92SK8= +go.opentelemetry.io/collector/processor/xprocessor v0.143.0/go.mod h1:0pSR0Fj+gTMRgfOg6/Wg5AGE5GTIqAAVIPZwe7SiB/4= +go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= +go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= +go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= +go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= +go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/proto/slim/otlp v1.9.0 h1:fPVMv8tP3TrsqlkH1HWYUpbCY9cAIemx184VGkS6vlE= +go.opentelemetry.io/proto/slim/otlp v1.9.0/go.mod h1:xXdeJJ90Gqyll+orzUkY4bOd2HECo5JofeoLpymVqdI= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.2.0 h1:o13nadWDNkH/quoDomDUClnQBpdQQ2Qqv0lQBjIXjE8= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.2.0/go.mod h1:Gyb6Xe7FTi/6xBHwMmngGoHqL0w29Y4eW8TGFzpefGA= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.2.0 h1:EiUYvtwu6PMrMHVjcPfnsG3v+ajPkbUeH+IL93+QYyk= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.2.0/go.mod h1:mUUHKFiN2SST3AhJ8XhJxEoeVW12oqfXog0Bo8W3Ec4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc= +go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/processor/spanpruningprocessor/grouping.go b/processor/spanpruningprocessor/grouping.go new file mode 100644 index 0000000000000..6e326104aa411 --- /dev/null +++ b/processor/spanpruningprocessor/grouping.go @@ -0,0 +1,134 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "sort" + "strings" + "sync" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// builderPool reduces allocations in the hot path by reusing string builders. +var builderPool = sync.Pool{ + New: func() any { + return &strings.Builder{} + }, +} + +// buildGroupKey assembles the grouping key for a span using its name, +// status, and configured attribute matches. A pooled builder minimizes +// allocations in this frequently executed path. +func (p *spanPruningProcessor) buildGroupKey(span ptrace.Span) string { + builder := builderPool.Get().(*strings.Builder) + builder.Reset() + defer builderPool.Put(builder) + + builder.WriteString(span.Name()) + + // Include span kind in grouping key + builder.WriteString("|kind=") + builder.WriteString(span.Kind().String()) + + // Include status code in grouping key + builder.WriteString("|status=") + builder.WriteString(span.Status().Code().String()) + + // Include TraceState for Consistent Probability Sampling (CPS) compatibility. + // Spans with different TraceState values (e.g., different sampling thresholds) + // represent different sampling populations and must not be aggregated together. + builder.WriteString("|ts=") + builder.WriteString(span.TraceState().AsRaw()) + + attrs := span.Attributes() + + // Collect all matching attribute key-value pairs + matchedAttrs := make(map[string]string) + attrs.Range(func(key string, value pcommon.Value) bool { + for _, pattern := range p.attributePatterns { + if pattern.glob.Match(key) { + matchedAttrs[key] = value.AsString() + break // Only match each key once + } + } + return true + }) + + // Sort keys for consistent ordering in the group key + keys := make([]string, 0, len(matchedAttrs)) + for k := range matchedAttrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Build the group key with sorted attribute key-value pairs + for _, key := range keys { + builder.WriteString("|") + builder.WriteString(key) + builder.WriteString("=") + builder.WriteString(matchedAttrs[key]) + } + + return builder.String() +} + +// buildParentGroupKey constructs a parent grouping key from name and status +// only; attributes are intentionally excluded for parent aggregation. +func (*spanPruningProcessor) buildParentGroupKey(span ptrace.Span) string { + builder := builderPool.Get().(*strings.Builder) + builder.Reset() + defer builderPool.Put(builder) + + builder.WriteString(span.Name()) + builder.WriteString("|kind=") + builder.WriteString(span.Kind().String()) + builder.WriteString("|status=") + builder.WriteString(span.Status().Code().String()) + // Include TraceState for CPS compatibility + builder.WriteString("|ts=") + builder.WriteString(span.TraceState().AsRaw()) + return builder.String() +} + +// buildLeafGroupKey derives a leaf grouping key that includes the parent's +// span name (if present) plus the standard grouping key, caching results per +// node to avoid recomputation. +func (p *spanPruningProcessor) buildLeafGroupKey(node *spanNode) string { + // Use cached group key if available + if node.groupKey != "" { + return node.groupKey + } + + builder := builderPool.Get().(*strings.Builder) + builder.Reset() + defer builderPool.Put(builder) + + // Include parent span name to separate groups by parent + if node.parent != nil { + builder.WriteString("parent=") + builder.WriteString(node.parent.span.Name()) + builder.WriteString("|") + } + + // Include regular group key (name + status + attributes) + builder.WriteString(p.buildGroupKey(node.span)) + + // Cache the key for future use + node.groupKey = builder.String() + return node.groupKey +} + +// groupLeafNodesByKey groups leaf nodes by their derived key so that spans +// with identical grouping characteristics can be aggregated together. +func (p *spanPruningProcessor) groupLeafNodesByKey(leafNodes []*spanNode) map[string][]*spanNode { + // Pre-size map based on expected number of groups (assume ~1/4 unique groups) + groups := make(map[string][]*spanNode, len(leafNodes)/4+1) + for _, node := range leafNodes { + key := p.buildLeafGroupKey(node) + groups[key] = append(groups[key], node) + } + return groups +} diff --git a/processor/spanpruningprocessor/internal/metadata/generated_status.go b/processor/spanpruningprocessor/internal/metadata/generated_status.go new file mode 100644 index 0000000000000..71cf4cf138adb --- /dev/null +++ b/processor/spanpruningprocessor/internal/metadata/generated_status.go @@ -0,0 +1,16 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package metadata + +import ( + "go.opentelemetry.io/collector/component" +) + +var ( + Type = component.MustNewType("spanpruning") + ScopeName = "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" +) + +const ( + TracesStability = component.StabilityLevelAlpha +) diff --git a/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go b/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go new file mode 100644 index 0000000000000..055a03557bb9b --- /dev/null +++ b/processor/spanpruningprocessor/internal/metadata/generated_telemetry.go @@ -0,0 +1,170 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package metadata + +import ( + "errors" + "sync" + + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" + + "go.opentelemetry.io/collector/component" +) + +func Meter(settings component.TelemetrySettings) metric.Meter { + return settings.MeterProvider.Meter("github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor") +} + +func Tracer(settings component.TelemetrySettings) trace.Tracer { + return settings.TracerProvider.Tracer("github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor") +} + +// TelemetryBuilder provides an interface for components to report telemetry +// as defined in metadata and user config. +type TelemetryBuilder struct { + meter metric.Meter + mu sync.Mutex + registrations []metric.Registration + ProcessorSpanpruningAggregationGroupSize metric.Int64Histogram + ProcessorSpanpruningAggregationsCreated metric.Int64Counter + ProcessorSpanpruningBytesEmitted metric.Int64Counter + ProcessorSpanpruningBytesReceived metric.Int64Counter + ProcessorSpanpruningLeafAttributeDiversityLoss metric.Int64Histogram + ProcessorSpanpruningLeafAttributeLoss metric.Int64Histogram + ProcessorSpanpruningOutliersCorrelationsDetected metric.Int64Counter + ProcessorSpanpruningOutliersDetected metric.Int64Counter + ProcessorSpanpruningOutliersPreserved metric.Int64Counter + ProcessorSpanpruningParentAttributeDiversityLoss metric.Int64Histogram + ProcessorSpanpruningParentAttributeLoss metric.Int64Histogram + ProcessorSpanpruningProcessingDuration metric.Float64Histogram + ProcessorSpanpruningSpansPruned metric.Int64Counter + ProcessorSpanpruningSpansReceived metric.Int64Counter + ProcessorSpanpruningTracesProcessed metric.Int64Counter +} + +// TelemetryBuilderOption applies changes to default builder. +type TelemetryBuilderOption interface { + apply(*TelemetryBuilder) +} + +type telemetryBuilderOptionFunc func(mb *TelemetryBuilder) + +func (tbof telemetryBuilderOptionFunc) apply(mb *TelemetryBuilder) { + tbof(mb) +} + +// Shutdown unregister all registered callbacks for async instruments. +func (builder *TelemetryBuilder) Shutdown() { + builder.mu.Lock() + defer builder.mu.Unlock() + for _, reg := range builder.registrations { + reg.Unregister() + } +} + +// NewTelemetryBuilder provides a struct with methods to update all internal telemetry +// for a component +func NewTelemetryBuilder(settings component.TelemetrySettings, options ...TelemetryBuilderOption) (*TelemetryBuilder, error) { + builder := TelemetryBuilder{} + for _, op := range options { + op.apply(&builder) + } + builder.meter = Meter(settings) + var err, errs error + builder.ProcessorSpanpruningAggregationGroupSize, err = builder.meter.Int64Histogram( + "otelcol_processor_spanpruning_aggregation_group_size", + metric.WithDescription("Distribution of spans per aggregation group [Development]"), + metric.WithUnit("{spans}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningAggregationsCreated, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_aggregations_created", + metric.WithDescription("Total aggregation summary spans created [Development]"), + metric.WithUnit("{spans}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningBytesEmitted, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_bytes_emitted", + metric.WithDescription("Total bytes of serialized traces emitted after pruning [Development]"), + metric.WithUnit("By"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningBytesReceived, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_bytes_received", + metric.WithDescription("Total bytes of serialized traces received before pruning [Development]"), + metric.WithUnit("By"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningLeafAttributeDiversityLoss, err = builder.meter.Int64Histogram( + "otelcol_processor_spanpruning_leaf_attribute_diversity_loss", + metric.WithDescription("Attribute values lost due to diversity per leaf aggregation [Development]"), + metric.WithUnit("{values}"), + metric.WithExplicitBucketBoundaries([]float64{0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20}...), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningLeafAttributeLoss, err = builder.meter.Int64Histogram( + "otelcol_processor_spanpruning_leaf_attribute_loss", + metric.WithDescription("Attribute keys lost due to absence per leaf aggregation [Development]"), + metric.WithUnit("{keys}"), + metric.WithExplicitBucketBoundaries([]float64{0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20}...), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningOutliersCorrelationsDetected, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_outliers_correlations_detected", + metric.WithDescription("Groups where outliers had correlated attributes [Development]"), + metric.WithUnit("{groups}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningOutliersDetected, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_outliers_detected", + metric.WithDescription("Spans identified as outliers by analysis [Development]"), + metric.WithUnit("{spans}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningOutliersPreserved, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_outliers_preserved", + metric.WithDescription("Outlier spans kept (excluded from aggregation) [Development]"), + metric.WithUnit("{spans}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningParentAttributeDiversityLoss, err = builder.meter.Int64Histogram( + "otelcol_processor_spanpruning_parent_attribute_diversity_loss", + metric.WithDescription("Attribute values lost due to diversity per parent aggregation [Development]"), + metric.WithUnit("{values}"), + metric.WithExplicitBucketBoundaries([]float64{0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20}...), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningParentAttributeLoss, err = builder.meter.Int64Histogram( + "otelcol_processor_spanpruning_parent_attribute_loss", + metric.WithDescription("Attribute keys lost due to absence per parent aggregation [Development]"), + metric.WithUnit("{keys}"), + metric.WithExplicitBucketBoundaries([]float64{0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20}...), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningProcessingDuration, err = builder.meter.Float64Histogram( + "otelcol_processor_spanpruning_processing_duration", + metric.WithDescription("Time to process each batch of traces [Development]"), + metric.WithUnit("s"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningSpansPruned, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_spans_pruned", + metric.WithDescription("Total spans pruned/removed by aggregation [Development]"), + metric.WithUnit("{spans}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningSpansReceived, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_spans_received", + metric.WithDescription("Total spans received by the processor [Development]"), + metric.WithUnit("{spans}"), + ) + errs = errors.Join(errs, err) + builder.ProcessorSpanpruningTracesProcessed, err = builder.meter.Int64Counter( + "otelcol_processor_spanpruning_traces_processed", + metric.WithDescription("Total traces processed [Development]"), + metric.WithUnit("{traces}"), + ) + errs = errors.Join(errs, err) + return &builder, errs +} diff --git a/processor/spanpruningprocessor/internal/metadata/generated_telemetry_test.go b/processor/spanpruningprocessor/internal/metadata/generated_telemetry_test.go new file mode 100644 index 0000000000000..ae0341135ba4f --- /dev/null +++ b/processor/spanpruningprocessor/internal/metadata/generated_telemetry_test.go @@ -0,0 +1,74 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package metadata + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/metric" + embeddedmetric "go.opentelemetry.io/otel/metric/embedded" + noopmetric "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/trace" + embeddedtrace "go.opentelemetry.io/otel/trace/embedded" + nooptrace "go.opentelemetry.io/otel/trace/noop" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" +) + +type mockMeter struct { + noopmetric.Meter + name string +} +type mockMeterProvider struct { + embeddedmetric.MeterProvider +} + +func (m mockMeterProvider) Meter(name string, opts ...metric.MeterOption) metric.Meter { + return mockMeter{name: name} +} + +type mockTracer struct { + nooptrace.Tracer + name string +} + +type mockTracerProvider struct { + embeddedtrace.TracerProvider +} + +func (m mockTracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.Tracer { + return mockTracer{name: name} +} + +func TestProviders(t *testing.T) { + set := component.TelemetrySettings{ + MeterProvider: mockMeterProvider{}, + TracerProvider: mockTracerProvider{}, + } + + meter := Meter(set) + if m, ok := meter.(mockMeter); ok { + require.Equal(t, "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor", m.name) + } else { + require.Fail(t, "returned Meter not mockMeter") + } + + tracer := Tracer(set) + if m, ok := tracer.(mockTracer); ok { + require.Equal(t, "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor", m.name) + } else { + require.Fail(t, "returned Meter not mockTracer") + } +} + +func TestNewTelemetryBuilder(t *testing.T) { + set := componenttest.NewNopTelemetrySettings() + applied := false + _, err := NewTelemetryBuilder(set, telemetryBuilderOptionFunc(func(b *TelemetryBuilder) { + applied = true + })) + require.NoError(t, err) + require.True(t, applied) +} diff --git a/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go b/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go new file mode 100644 index 0000000000000..b9207fcc75eac --- /dev/null +++ b/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest.go @@ -0,0 +1,256 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package metadatatest + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/processor" + "go.opentelemetry.io/collector/processor/processortest" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest" +) + +func NewSettings(tt *componenttest.Telemetry) processor.Settings { + set := processortest.NewNopSettings(processortest.NopType) + set.ID = component.NewID(component.MustNewType("spanpruning")) + set.TelemetrySettings = tt.NewTelemetrySettings() + return set +} + +func AssertEqualProcessorSpanpruningAggregationGroupSize(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.HistogramDataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_aggregation_group_size", + Description: "Distribution of spans per aggregation group [Development]", + Unit: "{spans}", + Data: metricdata.Histogram[int64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_aggregation_group_size") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningAggregationsCreated(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_aggregations_created", + Description: "Total aggregation summary spans created [Development]", + Unit: "{spans}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_aggregations_created") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningBytesEmitted(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_bytes_emitted", + Description: "Total bytes of serialized traces emitted after pruning [Development]", + Unit: "By", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_bytes_emitted") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningBytesReceived(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_bytes_received", + Description: "Total bytes of serialized traces received before pruning [Development]", + Unit: "By", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_bytes_received") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningLeafAttributeDiversityLoss(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.HistogramDataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_leaf_attribute_diversity_loss", + Description: "Attribute values lost due to diversity per leaf aggregation [Development]", + Unit: "{values}", + Data: metricdata.Histogram[int64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_leaf_attribute_diversity_loss") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningLeafAttributeLoss(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.HistogramDataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_leaf_attribute_loss", + Description: "Attribute keys lost due to absence per leaf aggregation [Development]", + Unit: "{keys}", + Data: metricdata.Histogram[int64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_leaf_attribute_loss") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningOutliersCorrelationsDetected(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_outliers_correlations_detected", + Description: "Groups where outliers had correlated attributes [Development]", + Unit: "{groups}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_outliers_correlations_detected") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningOutliersDetected(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_outliers_detected", + Description: "Spans identified as outliers by analysis [Development]", + Unit: "{spans}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_outliers_detected") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningOutliersPreserved(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_outliers_preserved", + Description: "Outlier spans kept (excluded from aggregation) [Development]", + Unit: "{spans}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_outliers_preserved") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningParentAttributeDiversityLoss(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.HistogramDataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_parent_attribute_diversity_loss", + Description: "Attribute values lost due to diversity per parent aggregation [Development]", + Unit: "{values}", + Data: metricdata.Histogram[int64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_parent_attribute_diversity_loss") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningParentAttributeLoss(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.HistogramDataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_parent_attribute_loss", + Description: "Attribute keys lost due to absence per parent aggregation [Development]", + Unit: "{keys}", + Data: metricdata.Histogram[int64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_parent_attribute_loss") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningProcessingDuration(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.HistogramDataPoint[float64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_processing_duration", + Description: "Time to process each batch of traces [Development]", + Unit: "s", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_processing_duration") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningSpansPruned(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_spans_pruned", + Description: "Total spans pruned/removed by aggregation [Development]", + Unit: "{spans}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_spans_pruned") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningSpansReceived(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_spans_received", + Description: "Total spans received by the processor [Development]", + Unit: "{spans}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_spans_received") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} + +func AssertEqualProcessorSpanpruningTracesProcessed(t *testing.T, tt *componenttest.Telemetry, dps []metricdata.DataPoint[int64], opts ...metricdatatest.Option) { + want := metricdata.Metrics{ + Name: "otelcol_processor_spanpruning_traces_processed", + Description: "Total traces processed [Development]", + Unit: "{traces}", + Data: metricdata.Sum[int64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: true, + DataPoints: dps, + }, + } + got, err := tt.GetMetric("otelcol_processor_spanpruning_traces_processed") + require.NoError(t, err) + metricdatatest.AssertEqual(t, want, got, opts...) +} diff --git a/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest_test.go b/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest_test.go new file mode 100644 index 0000000000000..8a4d87f8074c2 --- /dev/null +++ b/processor/spanpruningprocessor/internal/metadatatest/generated_telemetrytest_test.go @@ -0,0 +1,84 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package metadatatest + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" + "go.opentelemetry.io/collector/component/componenttest" +) + +func TestSetupTelemetry(t *testing.T) { + testTel := componenttest.NewTelemetry() + tb, err := metadata.NewTelemetryBuilder(testTel.NewTelemetrySettings()) + require.NoError(t, err) + defer tb.Shutdown() + tb.ProcessorSpanpruningAggregationGroupSize.Record(context.Background(), 1) + tb.ProcessorSpanpruningAggregationsCreated.Add(context.Background(), 1) + tb.ProcessorSpanpruningBytesEmitted.Add(context.Background(), 1) + tb.ProcessorSpanpruningBytesReceived.Add(context.Background(), 1) + tb.ProcessorSpanpruningLeafAttributeDiversityLoss.Record(context.Background(), 1) + tb.ProcessorSpanpruningLeafAttributeLoss.Record(context.Background(), 1) + tb.ProcessorSpanpruningOutliersCorrelationsDetected.Add(context.Background(), 1) + tb.ProcessorSpanpruningOutliersDetected.Add(context.Background(), 1) + tb.ProcessorSpanpruningOutliersPreserved.Add(context.Background(), 1) + tb.ProcessorSpanpruningParentAttributeDiversityLoss.Record(context.Background(), 1) + tb.ProcessorSpanpruningParentAttributeLoss.Record(context.Background(), 1) + tb.ProcessorSpanpruningProcessingDuration.Record(context.Background(), 1) + tb.ProcessorSpanpruningSpansPruned.Add(context.Background(), 1) + tb.ProcessorSpanpruningSpansReceived.Add(context.Background(), 1) + tb.ProcessorSpanpruningTracesProcessed.Add(context.Background(), 1) + AssertEqualProcessorSpanpruningAggregationGroupSize(t, testTel, + []metricdata.HistogramDataPoint[int64]{{}}, metricdatatest.IgnoreValue(), + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningAggregationsCreated(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningBytesEmitted(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningBytesReceived(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningLeafAttributeDiversityLoss(t, testTel, + []metricdata.HistogramDataPoint[int64]{{}}, metricdatatest.IgnoreValue(), + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningLeafAttributeLoss(t, testTel, + []metricdata.HistogramDataPoint[int64]{{}}, metricdatatest.IgnoreValue(), + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningOutliersCorrelationsDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningOutliersDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningOutliersPreserved(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningParentAttributeDiversityLoss(t, testTel, + []metricdata.HistogramDataPoint[int64]{{}}, metricdatatest.IgnoreValue(), + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningParentAttributeLoss(t, testTel, + []metricdata.HistogramDataPoint[int64]{{}}, metricdatatest.IgnoreValue(), + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningProcessingDuration(t, testTel, + []metricdata.HistogramDataPoint[float64]{{}}, metricdatatest.IgnoreValue(), + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningSpansPruned(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningSpansReceived(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + AssertEqualProcessorSpanpruningTracesProcessed(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + + require.NoError(t, testTel.Shutdown(context.Background())) +} diff --git a/processor/spanpruningprocessor/metadata.yaml b/processor/spanpruningprocessor/metadata.yaml new file mode 100644 index 0000000000000..b23453a51377a --- /dev/null +++ b/processor/spanpruningprocessor/metadata.yaml @@ -0,0 +1,130 @@ +type: spanpruning + +status: + class: processor + stability: + alpha: [traces] + distributions: [contrib] + codeowners: + active: [portertech, csmarchbanks] + +telemetry: + metrics: + processor_spanpruning_aggregation_group_size: + enabled: true + description: Distribution of spans per aggregation group + unit: "{spans}" + stability: { level: development } + histogram: { value_type: int } + + processor_spanpruning_aggregations_created: + enabled: true + description: Total aggregation summary spans created + unit: "{spans}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_bytes_emitted: + enabled: false + description: Total bytes of serialized traces emitted after pruning + unit: "By" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_bytes_received: + enabled: false + description: Total bytes of serialized traces received before pruning + unit: "By" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_leaf_attribute_diversity_loss: + enabled: false + description: Attribute values lost due to diversity per leaf aggregation + unit: "{values}" + stability: { level: development } + histogram: + value_type: int + bucket_boundaries: [0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20] + + processor_spanpruning_leaf_attribute_loss: + enabled: false + description: Attribute keys lost due to absence per leaf aggregation + unit: "{keys}" + stability: { level: development } + histogram: + value_type: int + bucket_boundaries: [0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20] + + processor_spanpruning_outliers_correlations_detected: + enabled: true + description: Groups where outliers had correlated attributes + unit: "{groups}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_outliers_detected: + enabled: true + description: Spans identified as outliers by analysis + unit: "{spans}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_outliers_preserved: + enabled: true + description: Outlier spans kept (excluded from aggregation) + unit: "{spans}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_parent_attribute_diversity_loss: + enabled: false + description: Attribute values lost due to diversity per parent aggregation + unit: "{values}" + stability: { level: development } + histogram: + value_type: int + bucket_boundaries: [0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20] + + processor_spanpruning_parent_attribute_loss: + enabled: false + description: Attribute keys lost due to absence per parent aggregation + unit: "{keys}" + stability: { level: development } + histogram: + value_type: int + bucket_boundaries: [0, 1, 2, 3, 4, 5, 6, 8, 10, 15, 20] + + processor_spanpruning_processing_duration: + enabled: true + description: Time to process each batch of traces + unit: s + stability: { level: development } + histogram: { value_type: double } + + processor_spanpruning_spans_pruned: + enabled: true + description: Total spans pruned/removed by aggregation + unit: "{spans}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_spans_received: + enabled: true + description: Total spans received by the processor + unit: "{spans}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + + processor_spanpruning_traces_processed: + enabled: true + description: Total traces processed + unit: "{traces}" + stability: { level: development } + sum: { value_type: int, monotonic: true } + +tests: + config: + group_by_attributes: + - "db.operation" + min_spans_to_aggregate: 2 diff --git a/processor/spanpruningprocessor/outlier.go b/processor/spanpruningprocessor/outlier.go new file mode 100644 index 0000000000000..73d16a43d55ec --- /dev/null +++ b/processor/spanpruningprocessor/outlier.go @@ -0,0 +1,361 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "cmp" + "fmt" + "slices" + "sort" + "strings" + "time" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// outlierAnalysisResult contains outlier analysis and attribute correlations. +type outlierAnalysisResult struct { + median time.Duration + correlations []attributeCorrelation + outlierIndices []int // indices of outlier spans (sorted by duration desc) + normalIndices []int // indices of normal spans + hasOutliers bool // true if any outliers detected +} + +// attributeCorrelation represents an attribute value that distinguishes outliers. +type attributeCorrelation struct { + key string + value string + outlierOccurrence float64 // fraction of outliers with this value + normalOccurrence float64 // fraction of normal spans with this value + score float64 // outlierOccurrence - normalOccurrence +} + +// analyzeOutliers performs outlier detection and attribute correlation. +// Returns nil if group is too small or no meaningful correlations found. +func analyzeOutliers(nodes []*spanNode, cfg OutlierAnalysisConfig) *outlierAnalysisResult { + n := len(nodes) + if n < cfg.MinGroupSize { + return nil + } + + // Collect and sort durations + durations := make([]indexedDuration, n) + for i, node := range nodes { + // Use raw timestamps to avoid time.Time allocations + durations[i] = indexedDuration{ + index: i, + duration: time.Duration(node.span.EndTimestamp() - node.span.StartTimestamp()), + } + } + sort.Slice(durations, func(i, j int) bool { + return durations[i].duration < durations[j].duration + }) + + // Determine method (default to IQR) + method := cfg.Method + if method == "" { + method = OutlierMethodIQR + } + + var outlierIndices, normalIndices []int + var median time.Duration + + switch method { + case OutlierMethodMAD: + outlierIndices, normalIndices, median = detectOutliersMAD(durations, cfg.MADMultiplier, cfg.MinOutlierThresholdPercent) + default: // IQR + outlierIndices, normalIndices, median = detectOutliersIQR(durations, cfg.IQRMultiplier, cfg.MinOutlierThresholdPercent) + } + + hasOutliers := len(outlierIndices) > 0 + + // Sort outlier indices by duration descending (most extreme first) + if hasOutliers { + sort.Slice(outlierIndices, func(i, j int) bool { + iDur := getDuration(nodes[outlierIndices[i]]) + jDur := getDuration(nodes[outlierIndices[j]]) + return iDur > jDur + }) + } + + // Need both outliers and normals for correlation + if len(outlierIndices) == 0 || len(normalIndices) == 0 { + return &outlierAnalysisResult{ + median: median, + outlierIndices: outlierIndices, + normalIndices: normalIndices, + hasOutliers: hasOutliers, + } + } + + // Analyze attribute correlations + correlations := findCorrelations( + nodes, + outlierIndices, + normalIndices, + cfg.CorrelationMinOccurrence, + cfg.CorrelationMaxNormalOccurrence, + cfg.MaxCorrelatedAttributes, + ) + + return &outlierAnalysisResult{ + median: median, + correlations: correlations, + outlierIndices: outlierIndices, + normalIndices: normalIndices, + hasOutliers: true, + } +} + +// indexedDuration pairs an index with its duration for sorting. +type indexedDuration struct { + index int + duration time.Duration +} + +// detectOutliersIQR identifies outliers using Interquartile Range method. +// Returns (outlierIndices, normalIndices, median). +func detectOutliersIQR(durations []indexedDuration, multiplier, minThresholdPercent float64) ([]int, []int, time.Duration) { + n := len(durations) + + // Calculate median + var median time.Duration + if n%2 == 1 { + median = durations[n/2].duration + } else { + median = (durations[n/2-1].duration + durations[n/2].duration) / 2 + } + + // Calculate IQR + q1 := durations[n/4].duration + q3 := durations[3*n/4].duration + iqr := q3 - q1 + + // Calculate thresholds + statisticalThreshold := q3 + time.Duration(float64(iqr)*multiplier) + minimumThreshold := time.Duration(float64(median) * (1 + minThresholdPercent)) + upperThreshold := max(statisticalThreshold, minimumThreshold) + + // Classify spans (pre-allocate: outliers typically <20%) + outlierIndices := make([]int, 0, n/5+1) + normalIndices := make([]int, 0, n) + + for _, d := range durations { + if d.duration > upperThreshold { + outlierIndices = append(outlierIndices, d.index) + } else { + normalIndices = append(normalIndices, d.index) + } + } + + return outlierIndices, normalIndices, median +} + +// madScaleFactor converts MAD to a consistent scale with standard deviation. +// For normally distributed data, MAD ≈ 0.6745 * σ, so multiplying by 1.4826 +// makes MAD comparable to standard deviation. +const madScaleFactor = 1.4826 + +// detectOutliersMAD identifies outliers using Median Absolute Deviation method. +// Returns (outlierIndices, normalIndices, median). +// MAD is more robust to extreme outliers than IQR. +func detectOutliersMAD(durations []indexedDuration, multiplier, minThresholdPercent float64) ([]int, []int, time.Duration) { + n := len(durations) + + // Calculate median (durations are already sorted) + var median time.Duration + if n%2 == 1 { + median = durations[n/2].duration + } else { + median = (durations[n/2-1].duration + durations[n/2].duration) / 2 + } + + // Calculate absolute deviations from median + deviations := make([]time.Duration, n) + for i, d := range durations { + dev := d.duration - median + if dev < 0 { + dev = -dev + } + deviations[i] = dev + } + + // Sort deviations to find MAD (median of absolute deviations) + slices.SortFunc(deviations, cmp.Compare) + + var mad time.Duration + if n%2 == 1 { + mad = deviations[n/2] + } else { + mad = (deviations[n/2-1] + deviations[n/2]) / 2 + } + + // Calculate thresholds + statisticalThreshold := median + time.Duration(multiplier*madScaleFactor*float64(mad)) + minimumThreshold := time.Duration(float64(median) * (1 + minThresholdPercent)) + upperThreshold := max(statisticalThreshold, minimumThreshold) + + // Classify spans (pre-allocate: outliers typically <20%) + outlierIndices := make([]int, 0, n/5+1) + normalIndices := make([]int, 0, n) + for _, d := range durations { + if d.duration > upperThreshold { + outlierIndices = append(outlierIndices, d.index) + } else { + normalIndices = append(normalIndices, d.index) + } + } + + return outlierIndices, normalIndices, median +} + +// findCorrelations identifies attributes that distinguish outliers from normal spans. +func findCorrelations( + nodes []*spanNode, + outlierIndices []int, + normalIndices []int, + minOccurrence float64, + maxNormalOccurrence float64, + maxAttributes int, +) []attributeCorrelation { + outlierCounts := countAttributeValues(nodes, outlierIndices) + normalCounts := countAttributeValues(nodes, normalIndices) + + numOutliers := float64(len(outlierIndices)) + numNormals := float64(len(normalIndices)) + + var correlations []attributeCorrelation + + for key, valueCounts := range outlierCounts { + for value, outlierCount := range valueCounts { + outlierOcc := float64(outlierCount) / numOutliers + if outlierOcc < minOccurrence { + continue + } + + normalCount := 0 + if normalVals, exists := normalCounts[key]; exists { + normalCount = normalVals[value] + } + normalOcc := float64(normalCount) / numNormals + + if normalOcc > maxNormalOccurrence { + continue + } + + correlations = append(correlations, attributeCorrelation{ + key: key, + value: value, + outlierOccurrence: outlierOcc, + normalOccurrence: normalOcc, + score: outlierOcc - normalOcc, + }) + } + } + + if len(correlations) == 0 { + return nil + } + + // Sort by score descending, then key ascending for stability + sort.Slice(correlations, func(i, j int) bool { + if correlations[i].score != correlations[j].score { + return correlations[i].score > correlations[j].score + } + return correlations[i].key < correlations[j].key + }) + + if len(correlations) > maxAttributes { + correlations = correlations[:maxAttributes] + } + + return correlations +} + +// countAttributeValues counts key-value occurrences for given node indices. +func countAttributeValues(nodes []*spanNode, indices []int) map[string]map[string]int { + result := make(map[string]map[string]int) + for _, idx := range indices { + nodes[idx].span.Attributes().Range(func(k string, v pcommon.Value) bool { + if result[k] == nil { + result[k] = make(map[string]int) + } + result[k][v.AsString()]++ + return true + }) + } + return result +} + +// formatCorrelations produces "key=value(outlier%/normal%), ..." string. +func formatCorrelations(correlations []attributeCorrelation) string { + if len(correlations) == 0 { + return "" + } + + var sb strings.Builder + for i, c := range correlations { + if i > 0 { + sb.WriteString(", ") + } + fmt.Fprintf(&sb, "%s=%s(%.0f%%/%.0f%%)", + c.key, c.value, + c.outlierOccurrence*100, + c.normalOccurrence*100) + } + return sb.String() +} + +// getDuration calculates span duration efficiently. +func getDuration(node *spanNode) time.Duration { + return time.Duration(node.span.EndTimestamp() - node.span.StartTimestamp()) +} + +// filterOutlierNodes returns (normalNodes, outlierNodes) based on analysis. +// outlierNodes are sorted by duration descending (most extreme first). +func filterOutlierNodes( + nodes []*spanNode, + analysis *outlierAnalysisResult, + cfg OutlierAnalysisConfig, +) ([]*spanNode, []*spanNode) { + if analysis == nil || !cfg.PreserveOutliers || !analysis.hasOutliers { + return nodes, nil // No filtering + } + + // Skip preservation if no correlation found and that's required + if cfg.PreserveOnlyWithCorrelation && len(analysis.correlations) == 0 { + return nodes, nil + } + + // Limit preserved outliers if configured + preservedIndices := analysis.outlierIndices + if cfg.MaxPreservedOutliers > 0 && len(analysis.outlierIndices) > cfg.MaxPreservedOutliers { + preservedIndices = analysis.outlierIndices[:cfg.MaxPreservedOutliers] + } + + // Build set for O(1) lookup + outlierSet := make(map[int]struct{}, len(preservedIndices)) + for _, idx := range preservedIndices { + outlierSet[idx] = struct{}{} + } + + normalNodes := make([]*spanNode, 0, len(nodes)-len(preservedIndices)) + outlierNodes := make([]*spanNode, 0, len(preservedIndices)) + + for i, node := range nodes { + if _, isOutlier := outlierSet[i]; isOutlier { + outlierNodes = append(outlierNodes, node) + } else { + normalNodes = append(normalNodes, node) + } + } + + // Sort outlierNodes by duration descending to match preservedIndices order + sort.Slice(outlierNodes, func(i, j int) bool { + return getDuration(outlierNodes[i]) > getDuration(outlierNodes[j]) + }) + + return normalNodes, outlierNodes +} diff --git a/processor/spanpruningprocessor/outlier_test.go b/processor/spanpruningprocessor/outlier_test.go new file mode 100644 index 0000000000000..dd233ac6ead14 --- /dev/null +++ b/processor/spanpruningprocessor/outlier_test.go @@ -0,0 +1,746 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +func TestAnalyzeOutliers(t *testing.T) { + ms := time.Millisecond + + defaultCfg := OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + } + + tests := []struct { + name string + durations []time.Duration + attrs []map[string]string + cfg OutlierAnalysisConfig + wantMedian time.Duration + wantCorrelations int + wantTopKey string + wantTopValue string + }{ + { + name: "clear outliers with correlation", + durations: []time.Duration{ + 5 * ms, 6 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, // normal + 500 * ms, 600 * ms, // outliers + }, + attrs: []map[string]string{ + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "true"}, + {"db.cache_hit": "false"}, // outlier + {"db.cache_hit": "false"}, // outlier + }, + cfg: defaultCfg, + wantMedian: (8*ms + 9*ms) / 2, + wantCorrelations: 1, + wantTopKey: "db.cache_hit", + wantTopValue: "false", + }, + { + name: "no outliers", + durations: []time.Duration{ + 5 * ms, 6 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, + }, + attrs: []map[string]string{ + {"key": "a"}, + {"key": "b"}, + {"key": "c"}, + {"key": "d"}, + {"key": "e"}, + {"key": "f"}, + {"key": "g"}, + }, + cfg: defaultCfg, + wantMedian: 7 * ms, + wantCorrelations: 0, + }, + { + name: "group too small", + durations: []time.Duration{5 * ms, 100 * ms, 200 * ms}, + attrs: []map[string]string{{"a": "1"}, {"a": "2"}, {"a": "3"}}, + cfg: defaultCfg, + wantMedian: 0, // nil result + }, + { + name: "all same duration - no outliers", + durations: []time.Duration{ + 10 * ms, 10 * ms, 10 * ms, 10 * ms, 10 * ms, 10 * ms, 10 * ms, + }, + attrs: []map[string]string{ + {"a": "1"}, + {"a": "2"}, + {"a": "3"}, + {"a": "4"}, + {"a": "5"}, + {"a": "6"}, + {"a": "7"}, + }, + cfg: defaultCfg, + wantMedian: 10 * ms, + wantCorrelations: 0, + }, + { + name: "outliers but no strong correlation", + durations: []time.Duration{ + 5 * ms, 6 * ms, 6 * ms, 7 * ms, 8 * ms, + 150 * ms, 200 * ms, + }, + attrs: []map[string]string{ + {"shard": "1"}, + {"shard": "2"}, + {"shard": "3"}, + {"shard": "1"}, + {"shard": "2"}, + {"shard": "1"}, + {"shard": "2"}, // outliers have same distribution as normals + }, + cfg: defaultCfg, + wantMedian: 7 * ms, + wantCorrelations: 0, // no strong correlation + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + nodes := makeNodesWithAttrs(tt.durations, tt.attrs) + result := analyzeOutliers(nodes, tt.cfg) + + if tt.wantMedian == 0 { + require.Nil(t, result) + return + } + + require.NotNil(t, result) + assert.Equal(t, tt.wantMedian, result.median) + assert.Len(t, result.correlations, tt.wantCorrelations) + + if tt.wantCorrelations > 0 { + assert.Equal(t, tt.wantTopKey, result.correlations[0].key) + assert.Equal(t, tt.wantTopValue, result.correlations[0].value) + } + }) + } +} + +func TestAnalyzeOutliers_IQRZeroDetectsSpikeOutlier(t *testing.T) { + ms := time.Millisecond + + cfg := OutlierAnalysisConfig{ + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + } + + durations := []time.Duration{10 * ms, 10 * ms, 10 * ms, 10 * ms, 10 * ms, 10 * ms, 1000 * ms} + attrs := []map[string]string{ + {"cache_hit": "true"}, + {"cache_hit": "true"}, + {"cache_hit": "true"}, + {"cache_hit": "true"}, + {"cache_hit": "true"}, + {"cache_hit": "true"}, + {"cache_hit": "false"}, // spike outlier + } + + nodes := makeNodesWithAttrs(durations, attrs) + result := analyzeOutliers(nodes, cfg) + + require.NotNil(t, result) + assert.Equal(t, 10*ms, result.median) + require.True(t, result.hasOutliers) + require.Len(t, result.outlierIndices, 1) + assert.Equal(t, 6, result.outlierIndices[0]) + require.Len(t, result.normalIndices, 6) + + // Ensure correlation is computed (outliers=1, normals=6) + require.NotEmpty(t, result.correlations) + assert.Equal(t, "cache_hit", result.correlations[0].key) + assert.Equal(t, "false", result.correlations[0].value) +} + +func TestAnalyzeOutliers_AllOutliersStillReturnsIndices(t *testing.T) { + ms := time.Millisecond + + // Negative multipliers are rejected by config validation, but analyzeOutliers + // should still behave consistently if called with a malformed config. + // With min_outlier_threshold_percent=0, anything above median is an outlier. + cfg := OutlierAnalysisConfig{ + IQRMultiplier: -100, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + MinOutlierThresholdPercent: 0, // Anything above median is outlier + } + + durations := []time.Duration{5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms} + nodes := makeNodesWithAttrs(durations, nil) + result := analyzeOutliers(nodes, cfg) + + // With median=8ms and threshold=8ms (min_outlier_threshold_percent=0), + // spans 9ms, 10ms, 11ms are outliers (3 outliers, 4 normals) + require.NotNil(t, result) + require.True(t, result.hasOutliers) + assert.Len(t, result.outlierIndices, 3) + assert.Len(t, result.normalIndices, 4) +} + +func TestFormatCorrelations(t *testing.T) { + tests := []struct { + name string + correlations []attributeCorrelation + want string + }{ + { + name: "empty", + correlations: nil, + want: "", + }, + { + name: "single", + correlations: []attributeCorrelation{ + {key: "db.cache_hit", value: "false", outlierOccurrence: 1.0, normalOccurrence: 0.0}, + }, + want: "db.cache_hit=false(100%/0%)", + }, + { + name: "multiple", + correlations: []attributeCorrelation{ + {key: "db.cache_hit", value: "false", outlierOccurrence: 1.0, normalOccurrence: 0.0}, + {key: "db.shard", value: "7", outlierOccurrence: 0.8, normalOccurrence: 0.1}, + }, + want: "db.cache_hit=false(100%/0%), db.shard=7(80%/10%)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := formatCorrelations(tt.correlations) + assert.Equal(t, tt.want, got) + }) + } +} + +// makeNodesWithAttrs creates spanNodes with specified durations and attributes. +func makeNodesWithAttrs(durations []time.Duration, attrs []map[string]string) []*spanNode { + nodes := make([]*spanNode, len(durations)) + baseTime := pcommon.NewTimestampFromTime(time.Now()) + + for i, dur := range durations { + span := ptrace.NewSpan() + span.SetName("test") + span.SetStartTimestamp(baseTime) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(baseTime.AsTime().Add(dur))) + + if i < len(attrs) { + for k, v := range attrs[i] { + span.Attributes().PutStr(k, v) + } + } + + nodes[i] = &spanNode{span: span} + } + return nodes +} + +func TestFilterOutlierNodes(t *testing.T) { + ms := time.Millisecond + + tests := []struct { + name string + durations []time.Duration + attrs []map[string]string + cfg OutlierAnalysisConfig + wantNormalCount int + wantOutlierCount int + wantPreservedDurations []time.Duration // Most extreme first + }{ + { + name: "preserves top 2 outliers", + durations: []time.Duration{ + 5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, 12 * ms, 13 * ms, 14 * ms, // normal (10 spans) + 500 * ms, 600 * ms, // outliers (2 spans) - ~17% of data, well outside normal range + }, + attrs: []map[string]string{ + {"key": "a"}, + {"key": "b"}, + {"key": "c"}, + {"key": "d"}, + {"key": "e"}, + {"key": "f"}, + {"key": "g"}, + {"key": "h"}, + {"key": "i"}, + {"key": "j"}, + {"key": "k"}, + {"key": "l"}, + }, + cfg: OutlierAnalysisConfig{ + PreserveOutliers: true, + MaxPreservedOutliers: 1, + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + }, + wantNormalCount: 11, // 10 normal + 1 outlier not preserved + wantOutlierCount: 1, + wantPreservedDurations: []time.Duration{600 * ms}, + }, + { + name: "preserve disabled returns all as normal", + durations: []time.Duration{ + 5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, 12 * ms, 13 * ms, 14 * ms, 500 * ms, + }, + attrs: []map[string]string{ + {"key": "a"}, + {"key": "b"}, + {"key": "c"}, + {"key": "d"}, + {"key": "e"}, + {"key": "f"}, + {"key": "g"}, + {"key": "h"}, + {"key": "i"}, + {"key": "j"}, + {"key": "k"}, + }, + cfg: OutlierAnalysisConfig{ + PreserveOutliers: false, + MinGroupSize: 7, + IQRMultiplier: 1.5, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + }, + wantNormalCount: 11, + wantOutlierCount: 0, + }, + { + name: "preserves all outliers when max is 0", + durations: []time.Duration{ + 5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, 12 * ms, 13 * ms, 14 * ms, + 500 * ms, 600 * ms, + }, + attrs: []map[string]string{ + {"key": "a"}, + {"key": "b"}, + {"key": "c"}, + {"key": "d"}, + {"key": "e"}, + {"key": "f"}, + {"key": "g"}, + {"key": "h"}, + {"key": "i"}, + {"key": "j"}, + {"key": "k"}, + {"key": "l"}, + }, + cfg: OutlierAnalysisConfig{ + PreserveOutliers: true, + MaxPreservedOutliers: 0, // 0 = preserve all + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + }, + wantNormalCount: 10, + wantOutlierCount: 2, + wantPreservedDurations: []time.Duration{600 * ms, 500 * ms}, + }, + { + name: "skip preservation without correlation", + durations: []time.Duration{ + 5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, 12 * ms, 13 * ms, 14 * ms, + 500 * ms, 600 * ms, + }, + attrs: []map[string]string{ + // No distinguishing attributes - varied values + {"shard": "1"}, + {"shard": "2"}, + {"shard": "3"}, + {"shard": "1"}, + {"shard": "2"}, + {"shard": "3"}, + {"shard": "1"}, + {"shard": "2"}, + {"shard": "3"}, + {"shard": "1"}, + {"shard": "2"}, + {"shard": "3"}, + }, + cfg: OutlierAnalysisConfig{ + PreserveOutliers: true, + PreserveOnlyWithCorrelation: true, + MaxPreservedOutliers: 3, + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + wantNormalCount: 12, // All returned as normal (no correlation found) + wantOutlierCount: 0, + }, + { + name: "preserves with correlation when required", + durations: []time.Duration{ + 5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, 12 * ms, 13 * ms, 14 * ms, + 500 * ms, 600 * ms, + }, + attrs: []map[string]string{ + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "hit"}, + {"cache": "miss"}, + {"cache": "miss"}, // outliers + }, + cfg: OutlierAnalysisConfig{ + PreserveOutliers: true, + PreserveOnlyWithCorrelation: true, + MaxPreservedOutliers: 3, + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + }, + wantNormalCount: 10, + wantOutlierCount: 2, + wantPreservedDurations: []time.Duration{600 * ms, 500 * ms}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + nodes := makeNodesWithAttrs(tt.durations, tt.attrs) + analysis := analyzeOutliers(nodes, tt.cfg) + normal, outliers := filterOutlierNodes(nodes, analysis, tt.cfg) + + assert.Len(t, normal, tt.wantNormalCount) + assert.Len(t, outliers, tt.wantOutlierCount) + + if tt.wantPreservedDurations != nil { + for i, want := range tt.wantPreservedDurations { + got := getDuration(outliers[i]) + assert.Equal(t, want, got, "outlier %d duration", i) + } + } + }) + } +} + +func TestGetDuration(t *testing.T) { + baseTime := pcommon.NewTimestampFromTime(time.Now()) + + span := ptrace.NewSpan() + span.SetStartTimestamp(baseTime) + span.SetEndTimestamp(pcommon.NewTimestampFromTime(baseTime.AsTime().Add(100 * time.Millisecond))) + + node := &spanNode{span: span} + dur := getDuration(node) + + assert.Equal(t, 100*time.Millisecond, dur) +} + +func TestDetectOutliersMAD_Basic(t *testing.T) { + ms := time.Millisecond + + // Create sorted durations with clear outliers + durations := []indexedDuration{ + {0, 5 * ms}, + {1, 6 * ms}, + {2, 7 * ms}, + {3, 8 * ms}, + {4, 9 * ms}, + {5, 10 * ms}, + {6, 11 * ms}, + {7, 12 * ms}, + {8, 500 * ms}, + {9, 600 * ms}, // outliers + } + + outlierIndices, normalIndices, median := detectOutliersMAD(durations, 3.0, 0.1) + + // n=10, median = (durations[4] + durations[5]) / 2 = (9ms + 10ms) / 2 = 9.5ms + assert.Equal(t, (9*ms+10*ms)/2, median) + assert.Len(t, outlierIndices, 2) + assert.Len(t, normalIndices, 8) + assert.Contains(t, outlierIndices, 8) + assert.Contains(t, outlierIndices, 9) +} + +func TestDetectOutliersMAD_ZeroMAD(t *testing.T) { + ms := time.Millisecond + + // All same value except one spike + durations := []indexedDuration{ + {0, 10 * ms}, + {1, 10 * ms}, + {2, 10 * ms}, + {3, 10 * ms}, + {4, 10 * ms}, + {5, 10 * ms}, + {6, 1000 * ms}, // spike + } + + outlierIndices, normalIndices, median := detectOutliersMAD(durations, 3.0, 0.1) + + assert.Equal(t, 10*ms, median) + // With MAD=0 and 10% min threshold, threshold = 10ms * 1.1 = 11ms + // 1000ms > 11ms, so it's still an outlier + assert.Len(t, outlierIndices, 1) + assert.Equal(t, 6, outlierIndices[0]) + assert.Len(t, normalIndices, 6) +} + +func TestDetectOutliersMAD_BimodalDistribution(t *testing.T) { + ms := time.Millisecond + + // Cache hit/miss pattern: bimodal distribution + // Fast (cache hits): 5-15ms + // Slow (cache misses): 100-120ms + durations := []indexedDuration{ + {0, 5 * ms}, + {1, 7 * ms}, + {2, 8 * ms}, + {3, 10 * ms}, + {4, 12 * ms}, + {5, 15 * ms}, // cache hits + {6, 100 * ms}, + {7, 110 * ms}, + {8, 120 * ms}, // cache misses + } + + outlierIndices, normalIndices, median := detectOutliersMAD(durations, 3.0, 0.1) + + // Median should be around 12ms + assert.Equal(t, 12*ms, median) + // The slow cache misses should be outliers + assert.Len(t, outlierIndices, 3) + assert.Len(t, normalIndices, 6) +} + +func TestDetectOutliersMAD_SmallGroup(t *testing.T) { + ms := time.Millisecond + + // 7 spans (minimum valid group size) + durations := []indexedDuration{ + {0, 5 * ms}, + {1, 6 * ms}, + {2, 7 * ms}, + {3, 8 * ms}, + {4, 9 * ms}, + {5, 10 * ms}, + {6, 500 * ms}, // outlier + } + + outlierIndices, normalIndices, median := detectOutliersMAD(durations, 3.0, 0.1) + + assert.Equal(t, 8*ms, median) + assert.Len(t, outlierIndices, 1) + assert.Equal(t, 6, outlierIndices[0]) + assert.Len(t, normalIndices, 6) +} + +func TestAnalyzeOutliers_MethodSelection(t *testing.T) { + ms := time.Millisecond + + durations := []time.Duration{ + 5 * ms, 6 * ms, 7 * ms, 8 * ms, 9 * ms, 10 * ms, 11 * ms, 12 * ms, + 500 * ms, 600 * ms, // outliers + } + attrs := []map[string]string{ + {"key": "a"}, + {"key": "b"}, + {"key": "c"}, + {"key": "d"}, + {"key": "e"}, + {"key": "f"}, + {"key": "g"}, + {"key": "h"}, + {"key": "i"}, + {"key": "j"}, + } + + tests := []struct { + name string + method OutlierMethod + }{ + { + name: "default (empty) uses IQR", + method: "", + }, + { + name: "explicit IQR", + method: OutlierMethodIQR, + }, + { + name: "explicit MAD", + method: OutlierMethodMAD, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + nodes := makeNodesWithAttrs(durations, attrs) + cfg := OutlierAnalysisConfig{ + Method: tt.method, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + } + + result := analyzeOutliers(nodes, cfg) + + require.NotNil(t, result) + assert.True(t, result.hasOutliers) + }) + } +} + +func TestMADvsIQR_Comparison(t *testing.T) { + ms := time.Millisecond + + // Distribution with moderate outliers + // MAD should be more sensitive to this pattern + durations := []time.Duration{ + 10 * ms, 11 * ms, 12 * ms, 13 * ms, 14 * ms, + 15 * ms, 16 * ms, 17 * ms, 18 * ms, 19 * ms, + 100 * ms, // moderate outlier + } + + nodes := makeNodesWithAttrs(durations, nil) + + iqrCfg := OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + } + + madCfg := OutlierAnalysisConfig{ + Method: OutlierMethodMAD, + IQRMultiplier: 1.5, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + } + + iqrResult := analyzeOutliers(nodes, iqrCfg) + madResult := analyzeOutliers(nodes, madCfg) + + require.NotNil(t, iqrResult) + require.NotNil(t, madResult) + + // Both should detect the 100ms outlier + assert.True(t, iqrResult.hasOutliers) + assert.True(t, madResult.hasOutliers) + + // Both should have the same median + assert.Equal(t, iqrResult.median, madResult.median) +} + +func TestMinOutlierThresholdPercent(t *testing.T) { + ms := time.Millisecond + + // All same value except one that's slightly above (5% above median) + // With 10% threshold, it should NOT be an outlier + // With 0% threshold, it SHOULD be an outlier + durations := []indexedDuration{ + {0, 100 * ms}, + {1, 100 * ms}, + {2, 100 * ms}, + {3, 100 * ms}, + {4, 100 * ms}, + {5, 100 * ms}, + {6, 105 * ms}, // 5% above median + } + + t.Run("IQR with 10% threshold excludes 5% deviation", func(t *testing.T) { + // IQR=0, so statistical threshold = Q3 = 100ms + // Minimum threshold = 100ms * 1.10 = 110ms + // upperThreshold = max(100ms, 110ms) = 110ms + // 105ms is NOT > 110ms, so no outlier + outlierIndices, normalIndices, median := detectOutliersIQR(durations, 1.5, 0.10) + assert.Equal(t, 100*ms, median) + assert.Empty(t, outlierIndices) + assert.Len(t, normalIndices, 7) + }) + + t.Run("IQR with 0% threshold includes 5% deviation", func(t *testing.T) { + // IQR=0, so statistical threshold = Q3 = 100ms + // Minimum threshold = 100ms * 1.0 = 100ms + // upperThreshold = max(100ms, 100ms) = 100ms + // 105ms > 100ms, so it's an outlier + outlierIndices, normalIndices, median := detectOutliersIQR(durations, 1.5, 0.0) + assert.Equal(t, 100*ms, median) + assert.Len(t, outlierIndices, 1) + assert.Equal(t, 6, outlierIndices[0]) + assert.Len(t, normalIndices, 6) + }) + + t.Run("MAD with 10% threshold excludes 5% deviation", func(t *testing.T) { + // MAD=0, so statistical threshold = median = 100ms + // Minimum threshold = 100ms * 1.10 = 110ms + // upperThreshold = max(100ms, 110ms) = 110ms + // 105ms is NOT > 110ms, so no outlier + outlierIndices, normalIndices, median := detectOutliersMAD(durations, 3.0, 0.10) + assert.Equal(t, 100*ms, median) + assert.Empty(t, outlierIndices) + assert.Len(t, normalIndices, 7) + }) + + t.Run("MAD with 0% threshold includes 5% deviation", func(t *testing.T) { + // MAD=0, so statistical threshold = median = 100ms + // Minimum threshold = 100ms * 1.0 = 100ms + // upperThreshold = max(100ms, 100ms) = 100ms + // 105ms > 100ms, so it's an outlier + outlierIndices, normalIndices, median := detectOutliersMAD(durations, 3.0, 0.0) + assert.Equal(t, 100*ms, median) + assert.Len(t, outlierIndices, 1) + assert.Equal(t, 6, outlierIndices[0]) + assert.Len(t, normalIndices, 6) + }) +} diff --git a/processor/spanpruningprocessor/processor.go b/processor/spanpruningprocessor/processor.go new file mode 100644 index 0000000000000..860b8fc01d021 --- /dev/null +++ b/processor/spanpruningprocessor/processor.go @@ -0,0 +1,410 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "context" + "fmt" + "math/rand/v2" + "time" + + "github.com/gobwas/glob" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.opentelemetry.io/collector/processor" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" +) + +// spanInfo pairs a span with its ScopeSpans container for in-place edits. +type spanInfo struct { + span ptrace.Span + scopeSpans ptrace.ScopeSpans +} + +// attributePattern caches a compiled glob used for attribute key matching. +type attributePattern struct { + glob glob.Glob +} + +// spanPruningProcessor aggregates similar leaf spans (and eligible parents) +// according to configuration while emitting telemetry about pruning actions. +type spanPruningProcessor struct { + config *Config + logger *zap.Logger + attributePatterns []attributePattern + telemetryBuilder *metadata.TelemetryBuilder + enableAttributeLossAnalysis bool + enableBytesMetrics bool +} + +func newSpanPruningProcessor(set processor.Settings, cfg *Config, telemetryBuilder *metadata.TelemetryBuilder) (*spanPruningProcessor, error) { + // Compile glob patterns for group_by_attributes + patterns := make([]attributePattern, 0, len(cfg.GroupByAttributes)) + for _, pattern := range cfg.GroupByAttributes { + g, err := glob.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid glob pattern %q: %w", pattern, err) + } + patterns = append(patterns, attributePattern{ + glob: g, + }) + } + + return &spanPruningProcessor{ + config: cfg, + logger: set.Logger, + attributePatterns: patterns, + telemetryBuilder: telemetryBuilder, + enableAttributeLossAnalysis: cfg.EnableAttributeLossAnalysis, + enableBytesMetrics: cfg.EnableBytesMetrics, + }, nil +} + +// shutdown releases processor resources, including telemetry providers. +func (p *spanPruningProcessor) shutdown(_ context.Context) error { + p.telemetryBuilder.Shutdown() + return nil +} + +// shouldSampleAttributeLossExemplar decides whether to attach exemplars to +// attribute-loss metrics based on the configured sampling rate. +func (p *spanPruningProcessor) shouldSampleAttributeLossExemplar() bool { + rate := p.config.AttributeLossExemplarSampleRate + if rate <= 0 { + return false + } + if rate >= 1 { + return true + } + return rand.Float64() < rate +} + +// createExemplarContext creates a context with span context for exemplar attachment. +// Uses direct type casting since pcommon and trace ID types are identical byte arrays. +func createExemplarContext(ctx context.Context, traceID pcommon.TraceID, spanID pcommon.SpanID) context.Context { + return trace.ContextWithSpanContext(ctx, trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: trace.TraceID(traceID), + SpanID: trace.SpanID(spanID), + TraceFlags: trace.FlagsSampled, + })) +} + +// processTraces runs aggregation for each trace batch and records processor +// telemetry about received, pruned, and aggregated spans. +func (p *spanPruningProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { + start := time.Now() + + // Measure bytes received before processing + if p.enableBytesMetrics { + var m ptrace.ProtoMarshaler + p.telemetryBuilder.ProcessorSpanpruningBytesReceived.Add(ctx, int64(m.TracesSize(td))) + } + + // Count incoming spans + totalSpans := int64(0) + for i := 0; i < td.ResourceSpans().Len(); i++ { + for j := 0; j < td.ResourceSpans().At(i).ScopeSpans().Len(); j++ { + totalSpans += int64(td.ResourceSpans().At(i).ScopeSpans().At(j).Spans().Len()) + } + } + p.telemetryBuilder.ProcessorSpanpruningSpansReceived.Add(ctx, totalSpans) + + // Group spans by TraceID + traceSpans := p.groupSpansByTraceID(td) + + // Process each trace independently + tracesProcessed := int64(0) + for _, spans := range traceSpans { + p.processTrace(ctx, spans) + tracesProcessed++ + } + + // Record telemetry only when actual work was done + if tracesProcessed > 0 { + p.telemetryBuilder.ProcessorSpanpruningTracesProcessed.Add(ctx, tracesProcessed) + p.telemetryBuilder.ProcessorSpanpruningProcessingDuration.Record(ctx, + time.Since(start).Seconds()) + } + + // Measure bytes emitted after processing + if p.enableBytesMetrics { + var m ptrace.ProtoMarshaler + p.telemetryBuilder.ProcessorSpanpruningBytesEmitted.Add(ctx, int64(m.TracesSize(td))) + } + + return td, nil +} + +// groupSpansByTraceID flattens incoming data into a TraceID-indexed map so +// each trace can be analyzed independently. +func (*spanPruningProcessor) groupSpansByTraceID(td ptrace.Traces) map[pcommon.TraceID][]spanInfo { + traceSpans := make(map[pcommon.TraceID][]spanInfo) + + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ils := ilss.At(j) + spans := ils.Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + traceID := span.TraceID() + traceSpans[traceID] = append(traceSpans[traceID], spanInfo{ + span: span, + scopeSpans: ils, + }) + } + } + } + + return traceSpans +} + +// processTrace applies the pruning algorithm to a single trace: +// 1) analyze aggregation candidates bottom-up, 2) build a top-down execution +// plan, and 3) create summary spans while removing originals. +func (p *spanPruningProcessor) processTrace(ctx context.Context, spans []spanInfo) { + // Build trace tree + tree := p.buildTraceTree(spans) + if len(tree.nodeByID) == 0 { + return + } + + // Phase 1: Analyze aggregations (bottom-up) + aggregationGroups := p.analyzeAggregationsWithTree(ctx, tree) + if len(aggregationGroups) == 0 { + return + } + + // Phase 2: Build aggregation plan (order top-down) + plan := p.buildAggregationPlan(aggregationGroups) + + // Phase 3: Execute aggregations (top-down) and record pruned spans + prunedCount := p.executeAggregations(plan) + + // Record telemetry after aggregation is complete + p.telemetryBuilder.ProcessorSpanpruningSpansPruned.Add(ctx, int64(prunedCount)) + p.telemetryBuilder.ProcessorSpanpruningAggregationsCreated.Add(ctx, int64(len(plan.groups))) + for i := range plan.groups { + p.telemetryBuilder.ProcessorSpanpruningAggregationGroupSize.Record(ctx, int64(len(plan.groups[i].nodes))) + } +} + +// analyzeAggregationsWithTree performs Phase 1 using tree structure +// Uses markedForRemoval field on nodes instead of separate map for better performance +// Optimized to walk up from marked nodes instead of scanning all nodes +func (p *spanPruningProcessor) analyzeAggregationsWithTree(ctx context.Context, tree *traceTree) map[string]aggregationGroup { + // Step 1: Get pre-computed leaf nodes + leafNodes := tree.getLeaves() + if len(leafNodes) == 0 { + return nil + } + + // Step 2: Group similar leaf nodes + leafGroups := p.groupLeafNodesByKey(leafNodes) + + // Step 3: Filter groups meeting minimum threshold and mark nodes + // Pre-size based on expected number of groups + aggregationGroups := make(map[string]aggregationGroup, len(leafGroups)/2) + + // Track nodes marked in this round for candidate collection + var markedNodes []*spanNode + + for groupKey, nodes := range leafGroups { + if len(nodes) < p.config.MinSpansToAggregate { + continue + } + // Outlier analysis and filtering FIRST (before attribute loss) + var outlierResult *outlierAnalysisResult + var preservedOutliers []*spanNode + aggregateNodes := nodes + + if p.config.EnableOutlierAnalysis { + outlierResult = analyzeOutliers(nodes, p.config.OutlierAnalysis) + + // Record outlier metrics + if outlierResult != nil && outlierResult.hasOutliers { + p.telemetryBuilder.ProcessorSpanpruningOutliersDetected.Add(ctx, int64(len(outlierResult.outlierIndices))) + if len(outlierResult.correlations) > 0 { + p.telemetryBuilder.ProcessorSpanpruningOutliersCorrelationsDetected.Add(ctx, 1) + } + } + + // Filter out outliers to preserve them + if p.config.OutlierAnalysis.PreserveOutliers && outlierResult != nil { + aggregateNodes, preservedOutliers = filterOutlierNodes( + nodes, + outlierResult, + p.config.OutlierAnalysis, + ) + + // Record preserved outliers + if len(preservedOutliers) > 0 { + p.telemetryBuilder.ProcessorSpanpruningOutliersPreserved.Add(ctx, int64(len(preservedOutliers))) + } + + // Skip aggregation if too few normal spans remain + if len(aggregateNodes) < p.config.MinSpansToAggregate { + continue + } + } + } + + // Find template from filtered nodes (excludes preserved outliers) + templateNode := findLongestDurationNode(aggregateNodes) + + // Analyze attribute loss on filtered nodes with correct template + var lossInfo attributeLossSummary + if p.enableAttributeLossAnalysis { + lossInfo = analyzeAttributeLoss(aggregateNodes, templateNode) + + // Determine context for recording (with or without exemplar) + recordCtx := ctx + if p.shouldSampleAttributeLossExemplar() { + exemplarSpan := templateNode.span + recordCtx = createExemplarContext(ctx, exemplarSpan.TraceID(), exemplarSpan.SpanID()) + } + + if !lossInfo.isEmpty() { + p.telemetryBuilder.ProcessorSpanpruningLeafAttributeDiversityLoss.Record( + recordCtx, + int64(len(lossInfo.diverse)), + ) + p.telemetryBuilder.ProcessorSpanpruningLeafAttributeLoss.Record( + recordCtx, + int64(len(lossInfo.missing)), + ) + } + } + + aggregationGroups[groupKey] = aggregationGroup{ + nodes: aggregateNodes, + depth: 0, + lossInfo: lossInfo, + templateNode: templateNode, + outlierAnalysis: outlierResult, + preservedOutliers: preservedOutliers, + } + + // Mark only normal spans for removal + for _, node := range aggregateNodes { + node.markedForRemoval = true + } + markedNodes = append(markedNodes, aggregateNodes...) + + // Mark outliers as preserved (not marked for removal) + for _, outlier := range preservedOutliers { + outlier.isPreservedOutlier = true + } + } + + if len(aggregationGroups) == 0 { + return nil + } + + // Step 4: Walk up the tree to find eligible parent spans recursively + // Respect MaxParentDepth: 0 = no parent aggregation, -1 = unlimited, >0 = limit + if p.config.MaxParentDepth == 0 { + return aggregationGroups + } + + // Collect initial parent candidates from marked leaf nodes + candidates := collectParentCandidates(markedNodes) + + depth := 1 + for len(candidates) > 0 { + // Check if we've reached the maximum parent depth limit + if p.config.MaxParentDepth > 0 && depth > p.config.MaxParentDepth { + break + } + + // Find eligible parents from candidates (walks up from marked nodes) + eligibleParents := p.findEligibleParentNodesFromCandidates(candidates) + if len(eligibleParents) == 0 { + break + } + + // Group parent candidates by name + status + parentGroups := make(map[string][]*spanNode) + for _, node := range eligibleParents { + parentKey := p.buildParentGroupKey(node.span) + parentGroups[parentKey] = append(parentGroups[parentKey], node) + } + + // Add parent groups (at least 2 parents to aggregate) + markedNodes = markedNodes[:0] // reset for this round + for parentKey, nodes := range parentGroups { + if len(nodes) < 2 { + continue + } + // Outlier analysis FIRST (before attribute loss) for consistency + var outlierResult *outlierAnalysisResult + if p.config.EnableOutlierAnalysis { + outlierResult = analyzeOutliers(nodes, p.config.OutlierAnalysis) + + // Record outlier metrics for parent groups + if outlierResult != nil && outlierResult.hasOutliers { + p.telemetryBuilder.ProcessorSpanpruningOutliersDetected.Add(ctx, int64(len(outlierResult.outlierIndices))) + if len(outlierResult.correlations) > 0 { + p.telemetryBuilder.ProcessorSpanpruningOutliersCorrelationsDetected.Add(ctx, 1) + } + } + } + + // Find the template node (longest duration) for this group + templateNode := findLongestDurationNode(nodes) + + // Analyze attribute loss for parent aggregation (only when enabled) + var lossInfo attributeLossSummary + if p.enableAttributeLossAnalysis { + lossInfo = analyzeAttributeLoss(nodes, templateNode) + + // Determine context for recording (with or without exemplar) + recordCtx := ctx + if p.shouldSampleAttributeLossExemplar() { + exemplarSpan := templateNode.span + recordCtx = createExemplarContext(ctx, exemplarSpan.TraceID(), exemplarSpan.SpanID()) + } + + if !lossInfo.isEmpty() { + p.telemetryBuilder.ProcessorSpanpruningParentAttributeDiversityLoss.Record( + recordCtx, + int64(len(lossInfo.diverse)), + ) + p.telemetryBuilder.ProcessorSpanpruningParentAttributeLoss.Record( + recordCtx, + int64(len(lossInfo.missing)), + ) + } + } + + aggregationGroups[parentKey] = aggregationGroup{ + nodes: nodes, + depth: depth, + lossInfo: lossInfo, + templateNode: templateNode, + outlierAnalysis: outlierResult, + } + // Mark parent nodes for removal + for _, node := range nodes { + node.markedForRemoval = true + } + markedNodes = append(markedNodes, nodes...) + } + + if len(markedNodes) == 0 { + break + } + + // Collect next round of candidates from newly marked nodes + candidates = collectParentCandidates(markedNodes) + depth++ + } + + return aggregationGroups +} diff --git a/processor/spanpruningprocessor/processor_benchmark_test.go b/processor/spanpruningprocessor/processor_benchmark_test.go new file mode 100644 index 0000000000000..d9f8ae637031e --- /dev/null +++ b/processor/spanpruningprocessor/processor_benchmark_test.go @@ -0,0 +1,271 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "testing" + + "go.opentelemetry.io/collector/pdata/ptrace" + "go.opentelemetry.io/collector/processor/processortest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" +) + +// BenchmarkProcessTrace_SmallTrace benchmarks processing a small trace (10 spans). +func BenchmarkProcessTrace_SmallTrace(b *testing.B) { + benchmarkProcessTrace(b, 10, 5) +} + +// BenchmarkProcessTrace_MediumTrace benchmarks processing a medium trace (100 spans). +func BenchmarkProcessTrace_MediumTrace(b *testing.B) { + benchmarkProcessTrace(b, 100, 20) +} + +// BenchmarkProcessTrace_LargeTrace benchmarks processing a large trace (1000 spans). +func BenchmarkProcessTrace_LargeTrace(b *testing.B) { + benchmarkProcessTrace(b, 1000, 50) +} + +// BenchmarkProcessTrace_SparseAggregation benchmarks sparse aggregation (~10% aggregate). +func BenchmarkProcessTrace_SparseAggregation(b *testing.B) { + benchmarkProcessTraceSparse(b, 1000, 5) +} + +// BenchmarkDeepTrace_Depth1 benchmarks deep trace with max_parent_depth=1. +func BenchmarkDeepTrace_Depth1(b *testing.B) { + benchmarkDeepTrace(b, 20, 3, 5, 1000, 1) +} + +// BenchmarkDeepTrace_Depth5 benchmarks deep trace with max_parent_depth=5. +func BenchmarkDeepTrace_Depth5(b *testing.B) { + benchmarkDeepTrace(b, 20, 3, 5, 1000, 5) +} + +// BenchmarkDeepTrace_Depth10 benchmarks deep trace with max_parent_depth=10. +func BenchmarkDeepTrace_Depth10(b *testing.B) { + benchmarkDeepTrace(b, 20, 3, 5, 1000, 10) +} + +// BenchmarkBuildTraceTree benchmarks tree construction. +func BenchmarkBuildTraceTree(b *testing.B) { + proc := newBenchmarkProcessor(b, 5) + spans := generateTestSpans(1000, 50) + + b.ResetTimer() + for b.Loop() { + _ = proc.buildTraceTree(spans) + } +} + +// BenchmarkGroupLeafNodes benchmarks leaf node grouping. +func BenchmarkGroupLeafNodes(b *testing.B) { + proc := newBenchmarkProcessor(b, 5) + spans := generateTestSpans(1000, 50) + tree := proc.buildTraceTree(spans) + leaves := tree.getLeaves() + + b.ResetTimer() + for b.Loop() { + for _, leaf := range leaves { + leaf.groupKey = "" + } + _ = proc.groupLeafNodesByKey(leaves) + } +} + +// BenchmarkFindEligibleParents benchmarks parent candidate discovery. +func BenchmarkFindEligibleParents(b *testing.B) { + proc := newBenchmarkProcessor(b, 5) + spans := generateTestSpans(1000, 50) + tree := proc.buildTraceTree(spans) + leaves := tree.getLeaves() + + for _, leaf := range leaves { + leaf.markedForRemoval = true + } + candidates := collectParentCandidates(leaves) + + b.ResetTimer() + for b.Loop() { + for _, c := range candidates { + c.markedForRemoval = false + } + _ = proc.findEligibleParentNodesFromCandidates(candidates) + } +} + +// BenchmarkBuildGroupKey benchmarks group key construction. +func BenchmarkBuildGroupKey(b *testing.B) { + proc := newBenchmarkProcessor(b, 1) + td := generateTestTrace(200, 5) + span := td.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(2) + + b.ResetTimer() + for b.Loop() { + _ = proc.buildGroupKey(span) + } +} + +// BenchmarkExecuteAggregations benchmarks the aggregation execution phase. +func BenchmarkExecuteAggregations(b *testing.B) { + proc := newBenchmarkProcessor(b, 1) + base := generateTestTrace(500, 5) + + b.ResetTimer() + for range b.N { //nolint:modernize // Manual timer control required, b.Loop() doesn't work with StopTimer/StartTimer + td := ptrace.NewTraces() + base.CopyTo(td) + + spans := spanInfosFromTraces(td) + tree := proc.buildTraceTree(spans) + leafGroups := proc.groupLeafNodesByKey(tree.getLeaves()) + + groups := make(map[string]aggregationGroup, len(leafGroups)) + for key, nodes := range leafGroups { + if len(nodes) >= proc.config.MinSpansToAggregate { + templateNode := findLongestDurationNode(nodes) + groups[key] = aggregationGroup{nodes: nodes, depth: 0, templateNode: templateNode} + } + } + + plan := proc.buildAggregationPlan(groups) + + b.StartTimer() + proc.executeAggregations(plan) + b.StopTimer() + } +} + +// newBenchmarkProcessor creates a processor configured for benchmarking. +func newBenchmarkProcessor(b *testing.B, maxParentDepth int) *spanPruningProcessor { + b.Helper() + + cfg := createDefaultConfig().(*Config) + cfg.GroupByAttributes = []string{"http.*", "db.*"} + cfg.MinSpansToAggregate = 5 + cfg.MaxParentDepth = maxParentDepth + + set := processortest.NewNopSettings(metadata.Type) + telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings) + if err != nil { + b.Fatal(err) + } + + proc, err := newSpanPruningProcessor(set, cfg, telemetryBuilder) + if err != nil { + b.Fatal(err) + } + return proc +} + +func spanInfosFromTraces(td ptrace.Traces) []spanInfo { + var spans []spanInfo + + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + ilss := rss.At(i).ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ss := ilss.At(j) + ssSpans := ss.Spans() + for k := 0; k < ssSpans.Len(); k++ { + spans = append(spans, spanInfo{ + span: ssSpans.At(k), + scopeSpans: ss, + }) + } + } + } + + return spans +} + +func benchmarkProcessTrace(b *testing.B, numSpans, minSpans int) { + cfg := createDefaultConfig().(*Config) + cfg.MinSpansToAggregate = minSpans + cfg.GroupByAttributes = []string{"http.*"} + + set := processortest.NewNopSettings(metadata.Type) + telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings) + if err != nil { + b.Fatal(err) + } + + proc, err := newSpanPruningProcessor(set, cfg, telemetryBuilder) + if err != nil { + b.Fatal(err) + } + + td := generateTestTrace(numSpans, minSpans) + + b.ResetTimer() + for b.Loop() { + cloned := ptrace.NewTraces() + td.CopyTo(cloned) + _, err := proc.processTraces(b.Context(), cloned) + if err != nil { + b.Fatal(err) + } + } +} + +func benchmarkProcessTraceSparse(b *testing.B, numSpans, minSpans int) { + cfg := createDefaultConfig().(*Config) + cfg.MinSpansToAggregate = minSpans + cfg.GroupByAttributes = []string{"db.*"} + cfg.MaxParentDepth = 3 + + set := processortest.NewNopSettings(metadata.Type) + telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings) + if err != nil { + b.Fatal(err) + } + + proc, err := newSpanPruningProcessor(set, cfg, telemetryBuilder) + if err != nil { + b.Fatal(err) + } + + td := generateSparseTrace(numSpans, minSpans) + + b.ResetTimer() + for b.Loop() { + cloned := ptrace.NewTraces() + td.CopyTo(cloned) + _, err := proc.processTraces(b.Context(), cloned) + if err != nil { + b.Fatal(err) + } + } +} + +func benchmarkDeepTrace(b *testing.B, depth, branchingFactor, leafsPerBranch, maxSpans, maxParentDepth int) { + cfg := createDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.*"} + cfg.MaxParentDepth = maxParentDepth + + set := processortest.NewNopSettings(metadata.Type) + telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings) + if err != nil { + b.Fatal(err) + } + + proc, err := newSpanPruningProcessor(set, cfg, telemetryBuilder) + if err != nil { + b.Fatal(err) + } + + td := generateDeepTrace(depth, branchingFactor, leafsPerBranch, maxSpans) + b.ReportMetric(float64(td.ResourceSpans().At(0).ScopeSpans().At(0).Spans().Len()), "spans") + + b.ResetTimer() + for b.Loop() { + cloned := ptrace.NewTraces() + td.CopyTo(cloned) + _, err := proc.processTraces(b.Context(), cloned) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/processor/spanpruningprocessor/processor_test.go b/processor/spanpruningprocessor/processor_test.go new file mode 100644 index 0000000000000..3e1444eed3627 --- /dev/null +++ b/processor/spanpruningprocessor/processor_test.go @@ -0,0 +1,2521 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.opentelemetry.io/collector/processor/processortest" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadata" + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor/internal/metadatatest" +) + +func TestNewTraces(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + require.NotNil(t, tp) +} + +func TestLeafSpanPruning_BasicAggregation(t *testing.T) { + // Test: 3 identical leaf spans should be aggregated into 1 summary span + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithLeafSpans(t, 3, "SELECT", map[string]string{"db.operation": "select"}) + originalSpanCount := countSpans(td) + assert.Equal(t, 4, originalSpanCount) // 1 parent + 3 leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After processing: should have 1 parent + 1 summary span + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) + + // Verify summary span exists with aggregation attributes + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan, "summary span should exist") + + // Check aggregation attributes + attrs := summarySpan.Attributes() + spanCount, exists := attrs.Get("aggregation.span_count") + assert.True(t, exists, "aggregation.span_count should exist") + assert.Equal(t, int64(3), spanCount.Int()) +} + +func TestLeafSpanPruning_BelowThreshold(t *testing.T) { + // Test: 1 leaf span with min_spans_to_aggregate=2 should not be aggregated + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithLeafSpans(t, 1, "SELECT", map[string]string{"db.operation": "select"}) + originalSpanCount := countSpans(td) + assert.Equal(t, 2, originalSpanCount) // 1 parent + 1 leaf span + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Should remain unchanged + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) +} + +func TestLeafSpanPruning_MixedLeafAndNonLeaf(t *testing.T) { + // Test: only aggregate leaf spans, not spans with children + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace: root -> intermediate -> 3 leaf spans + td := createTestTraceWithIntermediateSpan(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 5, originalSpanCount) // 1 root + 1 intermediate + 3 leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 1 root + 1 intermediate + 1 summary + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) +} + +func TestLeafSpanPruning_DifferentGroups(t *testing.T) { + // Test: spans with different attributes should stay in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.operation"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with mixed operations: 3 SELECT + 2 INSERT + td := createTestTraceWithMixedOperations(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 6, originalSpanCount) // 1 parent + 3 SELECT + 2 INSERT + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 1 parent + 1 SELECT summary + 1 INSERT summary + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) +} + +func TestLeafSpanPruning_EmptyTrace(t *testing.T) { + // Test: empty trace should be handled gracefully + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := ptrace.NewTraces() + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + assert.Equal(t, 0, countSpans(td)) +} + +func TestLeafSpanPruning_SingleSpanTrace(t *testing.T) { + // Test: single span trace (root only) should not be modified + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createSingleSpanTrace(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 1, originalSpanCount) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Should remain unchanged + finalSpanCount := countSpans(td) + assert.Equal(t, 1, finalSpanCount) +} + +func TestLeafSpanPruning_StatusAggregation(t *testing.T) { + // Test: spans with different status codes should be in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with 4 OK spans and 2 Error spans (same name) + td := createTestTraceWithMixedStatusSpans(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 7, originalSpanCount) // 1 parent + 4 OK + 2 Error + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 1 parent + 1 OK summary (4 spans) + 1 Error summary (2 spans) + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) + + // Verify we have both an OK summary and an Error summary + okSummary := findSpanByNameAndStatus(td, "SELECT", ptrace.StatusCodeOk) + require.NotNil(t, okSummary) + okCount, _ := okSummary.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(4), okCount.Int()) + + errorSummary := findSpanByNameAndStatus(td, "SELECT", ptrace.StatusCodeError) + require.NotNil(t, errorSummary) + errorCount, _ := errorSummary.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(2), errorCount.Int()) +} + +func TestLeafSpanPruning_StatusBelowThreshold(t *testing.T) { + // Test: 1 OK span + 1 Error span should not aggregate (each group below threshold) + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithErrorSpan(t) // Creates 1 OK + 1 Error span + originalSpanCount := countSpans(td) + assert.Equal(t, 3, originalSpanCount) // 1 parent + 1 OK + 1 Error + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Should remain unchanged - neither group meets threshold + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) +} + +func TestLeafSpanPruning_DurationStats(t *testing.T) { + // Test: verify duration statistics are calculated correctly + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create spans with known durations: 100ns, 200ns, 300ns + td := createTestTraceWithKnownDurations(t, []int64{100, 200, 300}) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + + attrs := summarySpan.Attributes() + + minDuration, _ := attrs.Get("aggregation.duration_min_ns") + assert.Equal(t, int64(100), minDuration.Int()) + + maxDuration, _ := attrs.Get("aggregation.duration_max_ns") + assert.Equal(t, int64(300), maxDuration.Int()) + + avgDuration, _ := attrs.Get("aggregation.duration_avg_ns") + assert.Equal(t, int64(200), avgDuration.Int()) // (100+200+300)/3 = 200 + + totalDuration, _ := attrs.Get("aggregation.duration_total_ns") + assert.Equal(t, int64(600), totalDuration.Int()) +} + +// Helper functions + +func createTestTraceWithLeafSpans(t *testing.T, numLeafSpans int, spanName string, attrs map[string]string) ptrace.Traces { //nolint:unparam + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Create parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // Create leaf spans + for i := range numLeafSpans { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName(spanName) + span.SetStartTimestamp(pcommon.Timestamp(1000000000 + int64(i)*100)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100 + int64(i)*100)) + for k, v := range attrs { + span.Attributes().PutStr(k, v) + } + } + + return td +} + +// TestBytesMetrics_Enabled tests that bytes metrics are recorded when enabled +func TestBytesMetrics_Enabled(t *testing.T) { + testTel := componenttest.NewTelemetry() + defer func() { require.NoError(t, testTel.Shutdown(t.Context())) }() + + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.EnableBytesMetrics = true + + tp, err := factory.CreateTraces(t.Context(), metadatatest.NewSettings(testTel), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with 3 leaf spans that will be aggregated + td := createTestTraceWithLeafSpans(t, 3, "SELECT", map[string]string{"db.operation": "select"}) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Verify bytes_received metric was recorded (value > 0) + metadatatest.AssertEqualProcessorSpanpruningBytesReceived(t, testTel, + []metricdata.DataPoint[int64]{{}}, + metricdatatest.IgnoreTimestamp(), + metricdatatest.IgnoreValue()) // Just verify metric exists + + // Verify bytes_emitted metric was recorded (value > 0) + metadatatest.AssertEqualProcessorSpanpruningBytesEmitted(t, testTel, + []metricdata.DataPoint[int64]{{}}, + metricdatatest.IgnoreTimestamp(), + metricdatatest.IgnoreValue()) // Just verify metric exists +} + +// TestBytesMetrics_Disabled tests that bytes metrics are NOT recorded when disabled (default) +func TestBytesMetrics_Disabled(t *testing.T) { + testTel := componenttest.NewTelemetry() + defer func() { require.NoError(t, testTel.Shutdown(t.Context())) }() + + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + // EnableBytesMetrics defaults to false + + tp, err := factory.CreateTraces(t.Context(), metadatatest.NewSettings(testTel), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with 3 leaf spans that will be aggregated + td := createTestTraceWithLeafSpans(t, 3, "SELECT", map[string]string{"db.operation": "select"}) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Verify bytes_received metric was NOT recorded + _, err = testTel.GetMetric("otelcol_processor_spanpruning_bytes_received") + assert.Error(t, err, "bytes_received metric should not exist when disabled") + + // Verify bytes_emitted metric was NOT recorded + _, err = testTel.GetMetric("otelcol_processor_spanpruning_bytes_emitted") + assert.Error(t, err, "bytes_emitted metric should not exist when disabled") +} + +// TestOutlierMetrics_IQR tests that outlier metrics are recorded correctly with IQR method +func TestOutlierMetrics_IQR(t *testing.T) { + testTel := componenttest.NewTelemetry() + defer func() { require.NoError(t, testTel.Shutdown(t.Context())) }() + + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 5 + cfg.GroupByAttributes = []string{"db.operation"} + cfg.EnableOutlierAnalysis = true + cfg.OutlierAnalysis = OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + PreserveOutliers: true, + MaxPreservedOutliers: 2, + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + } + + tp, err := factory.CreateTraces(t.Context(), metadatatest.NewSettings(testTel), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with outliers (using existing helper) + td := createTestTraceWithOutliers(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Verify outliers_detected metric (2 outliers from the trace) + metadatatest.AssertEqualProcessorSpanpruningOutliersDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 2}}, + metricdatatest.IgnoreTimestamp()) + + // Verify outliers_preserved metric (2 outliers preserved due to MaxPreservedOutliers=2) + metadatatest.AssertEqualProcessorSpanpruningOutliersPreserved(t, testTel, + []metricdata.DataPoint[int64]{{Value: 2}}, + metricdatatest.IgnoreTimestamp()) + + // Verify outliers_correlations_detected metric (1 group with correlation: cache_hit=false) + metadatatest.AssertEqualProcessorSpanpruningOutliersCorrelationsDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) +} + +// TestOutlierMetrics_MAD tests that outlier metrics are recorded correctly with MAD method +func TestOutlierMetrics_MAD(t *testing.T) { + testTel := componenttest.NewTelemetry() + defer func() { require.NoError(t, testTel.Shutdown(t.Context())) }() + + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 5 + cfg.GroupByAttributes = []string{"db.operation"} + cfg.EnableOutlierAnalysis = true + cfg.OutlierAnalysis = OutlierAnalysisConfig{ + Method: OutlierMethodMAD, + PreserveOutliers: true, + MaxPreservedOutliers: 2, + MADMultiplier: 3.0, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + } + + tp, err := factory.CreateTraces(t.Context(), metadatatest.NewSettings(testTel), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with outliers + td := createTestTraceWithOutliers(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Verify outliers_detected metric (2 outliers from the trace) + metadatatest.AssertEqualProcessorSpanpruningOutliersDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 2}}, + metricdatatest.IgnoreTimestamp()) + + // Verify outliers_preserved metric + metadatatest.AssertEqualProcessorSpanpruningOutliersPreserved(t, testTel, + []metricdata.DataPoint[int64]{{Value: 2}}, + metricdatatest.IgnoreTimestamp()) + + // Verify correlations detected + metadatatest.AssertEqualProcessorSpanpruningOutliersCorrelationsDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) +} + +// TestOutlierMetrics_NoPreservation tests metrics when outliers are detected but not preserved +func TestOutlierMetrics_NoPreservation(t *testing.T) { + testTel := componenttest.NewTelemetry() + defer func() { require.NoError(t, testTel.Shutdown(t.Context())) }() + + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 5 + cfg.GroupByAttributes = []string{"db.operation"} + cfg.EnableOutlierAnalysis = true + cfg.OutlierAnalysis = OutlierAnalysisConfig{ + Method: OutlierMethodIQR, + PreserveOutliers: false, // Outliers detected but NOT preserved + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + } + + tp, err := factory.CreateTraces(t.Context(), metadatatest.NewSettings(testTel), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with outliers + td := createTestTraceWithOutliers(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Verify outliers_detected metric (outliers still detected) + metadatatest.AssertEqualProcessorSpanpruningOutliersDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 2}}, + metricdatatest.IgnoreTimestamp()) + + // Verify correlations detected (analysis still runs) + metadatatest.AssertEqualProcessorSpanpruningOutliersCorrelationsDetected(t, testTel, + []metricdata.DataPoint[int64]{{Value: 1}}, + metricdatatest.IgnoreTimestamp()) + + // outliers_preserved should NOT be recorded when PreserveOutliers=false + _, err = testTel.GetMetric("otelcol_processor_spanpruning_outliers_preserved") + assert.Error(t, err, "outliers_preserved should not be recorded when PreserveOutliers=false") +} + +func createTestTraceWithIntermediateSpan(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + rootSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + intermediateSpanID := pcommon.SpanID([8]byte{2, 0, 0, 0, 0, 0, 0, 0}) + + // Root span + rootSpan := ss.Spans().AppendEmpty() + rootSpan.SetTraceID(traceID) + rootSpan.SetSpanID(rootSpanID) + rootSpan.SetName("root") + + // Intermediate span (child of root, parent of leaves) + intermediateSpan := ss.Spans().AppendEmpty() + intermediateSpan.SetTraceID(traceID) + intermediateSpan.SetSpanID(intermediateSpanID) + intermediateSpan.SetParentSpanID(rootSpanID) + intermediateSpan.SetName("intermediate") + + // 3 leaf spans (children of intermediate) + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(intermediateSpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithMixedOperations(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 3 SELECT spans + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Attributes().PutStr("db.operation", "select") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 INSERT spans + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Attributes().PutStr("db.operation", "insert") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createSingleSpanTrace(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + span := ss.Spans().AppendEmpty() + span.SetTraceID(pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})) + span.SetSpanID(pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0})) + span.SetName("root") + + return td +} + +func createTestTraceWithErrorSpan(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // Leaf span with OK status + span1 := ss.Spans().AppendEmpty() + span1.SetTraceID(traceID) + span1.SetSpanID(pcommon.SpanID([8]byte{2, 0, 0, 0, 0, 0, 0, 0})) + span1.SetParentSpanID(parentSpanID) + span1.SetName("SELECT") + span1.Status().SetCode(ptrace.StatusCodeOk) + span1.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span1.SetEndTimestamp(pcommon.Timestamp(1000000100)) + + // Leaf span with Error status + span2 := ss.Spans().AppendEmpty() + span2.SetTraceID(traceID) + span2.SetSpanID(pcommon.SpanID([8]byte{2, 1, 0, 0, 0, 0, 0, 0})) + span2.SetParentSpanID(parentSpanID) + span2.SetName("SELECT") + span2.Status().SetCode(ptrace.StatusCodeError) + span2.Status().SetMessage("query failed") + span2.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span2.SetEndTimestamp(pcommon.Timestamp(1000000100)) + + return td +} + +func createTestTraceWithKnownDurations(t *testing.T, durationsNs []int64) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // Leaf spans with specific durations + baseTime := int64(1000000000) + for i, duration := range durationsNs { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + duration)) + } + + return td +} + +func countSpans(td ptrace.Traces) int { + count := 0 + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + ilss := rss.At(i).ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + count += ilss.At(j).Spans().Len() + } + } + return count +} + +// findSummarySpan finds the first summary span (with is_summary attribute set to true) +func findSummarySpan(td ptrace.Traces) ptrace.Span { + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + ilss := rss.At(i).ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + spans := ilss.At(j).Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + isSummary, exists := span.Attributes().Get("aggregation.is_summary") + if exists && isSummary.Bool() { + return span + } + } + } + } + return ptrace.Span{} +} + +func findSpanByNameAndStatus(td ptrace.Traces, spanName string, statusCode ptrace.StatusCode) ptrace.Span { + // findSpanByNameAndStatus finds a summary span by exact name and status code + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + ilss := rss.At(i).ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + spans := ilss.At(j).Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + isSummary, exists := span.Attributes().Get("aggregation.is_summary") + if exists && isSummary.Bool() && span.Name() == spanName && span.Status().Code() == statusCode { + return span + } + } + } + } + return ptrace.Span{} +} + +func createTestTraceWithMixedStatusSpans(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 4 leaf spans with OK status + for i := range 4 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.Status().SetCode(ptrace.StatusCodeOk) + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 leaf spans with Error status + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.Status().SetCode(ptrace.StatusCodeError) + span.Status().SetMessage("query failed") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +// Glob pattern matching tests + +func TestLeafSpanPruning_GlobPatternWildcard(t *testing.T) { + // Test: "db.*" pattern matches db.operation, db.name, db.statement + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.*"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with spans having multiple db.* attributes + td := createTestTraceWithMultipleDbAttrs(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 4, originalSpanCount) // 1 parent + 3 leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // All 3 leaf spans have same db.* values, should aggregate to 1 + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) // 1 parent + 1 summary + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + + attrs := summarySpan.Attributes() + spanCount, _ := attrs.Get("aggregation.span_count") + assert.Equal(t, int64(3), spanCount.Int()) +} + +func TestLeafSpanPruning_GlobPatternSeparatesGroups(t *testing.T) { + // Test: spans with different db.* values should be in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.*"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with spans having different db.operation values + td := createTestTraceWithDifferentDbOperations(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 5, originalSpanCount) // 1 parent + 2 select + 2 insert + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // 2 select spans -> 1 summary, 2 insert spans -> 1 summary + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) // 1 parent + 2 summaries +} + +func TestLeafSpanPruning_GlobPatternMultiplePatterns(t *testing.T) { + // Test: multiple glob patterns ["db.*", "http.*"] + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.*", "http.*"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithDbAndHTTPAttrs(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 4, originalSpanCount) // 1 parent + 3 leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // All spans have same db.* and http.* values, should aggregate + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) +} + +func TestLeafSpanPruning_GlobPatternExactMatch(t *testing.T) { + // Test: pattern without wildcard "db.operation" matches exactly + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.operation"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithMultipleDbAttrs(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Should still group by db.operation exactly + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) +} + +func TestLeafSpanPruning_InvalidGlobPattern(t *testing.T) { + // Test: invalid glob pattern should return error during creation + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.GroupByAttributes = []string{"[invalid"} + + _, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid glob pattern") +} + +// Helper functions for glob pattern tests + +func createTestTraceWithMultipleDbAttrs(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 3 leaf spans with identical db.* attributes + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Attributes().PutStr("db.operation", "select") + span.Attributes().PutStr("db.name", "users") + span.Attributes().PutStr("db.system", "postgresql") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithDifferentDbOperations(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 2 SELECT spans + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Attributes().PutStr("db.operation", "select") + span.Attributes().PutStr("db.name", "users") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 INSERT spans + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.Attributes().PutStr("db.operation", "insert") + span.Attributes().PutStr("db.name", "users") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithDbAndHTTPAttrs(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 3 leaf spans with both db.* and http.* attributes + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("api_call") + span.Attributes().PutStr("db.operation", "select") + span.Attributes().PutStr("db.name", "users") + span.Attributes().PutStr("http.method", "GET") + span.Attributes().PutStr("http.route", "/api/users") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func TestLeafSpanPruningProcessorWithHistogram(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + buckets []time.Duration + expectedBuckets []float64 + spanDurations []time.Duration + expectedBucketCounts []int64 + minSpansToAggregate int + expectedSpanCount int + shouldHaveHistogramAttributes bool + }{ + { + name: "aggregate 3 spans with histogram", + buckets: []time.Duration{ + 10 * time.Millisecond, + 50 * time.Millisecond, + 100 * time.Millisecond, + }, + expectedBuckets: []float64{0.01, 0.05, 0.1}, + spanDurations: []time.Duration{ + 5 * time.Millisecond, // Should fall in bucket 0 (<=10ms) + 15 * time.Millisecond, // Should fall in bucket 1 (<=50ms) + 25 * time.Millisecond, // Should fall in bucket 1 (<=50ms) + 75 * time.Millisecond, // Should fall in bucket 2 (<=100ms) + 150 * time.Millisecond, // Should fall in bucket 3 (+Inf) + }, + expectedBucketCounts: []int64{1, 3, 4, 5}, + minSpansToAggregate: 2, + expectedSpanCount: 2, // 1 original + 1 summary + shouldHaveHistogramAttributes: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a simple trace + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + rs.Resource().Attributes().PutStr("service.name", "test-service") + + // Create spans with specified durations + ils := rs.ScopeSpans().AppendEmpty() + spans := ils.Spans() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Create parent span + parentSpan := spans.AppendEmpty() + parentSpan.SetName("parent") + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + + // Create leaf spans with varying durations + for i, duration := range tt.spanDurations { + span := spans.AppendEmpty() + span.SetName("test-span") + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetKind(ptrace.SpanKindInternal) + + startTime := pcommon.Timestamp(1000000000) + endTime := pcommon.Timestamp(1000000000 + int64(duration)) + span.SetStartTimestamp(startTime) + span.SetEndTimestamp(endTime) + + // Add attributes to ensure they don't interfere with grouping + span.Attributes().PutStr("db.operation", "SELECT") + } + + // Create processor with custom buckets + cfg := &Config{ + GroupByAttributes: []string{"db.operation"}, + MinSpansToAggregate: tt.minSpansToAggregate, + AggregationAttributePrefix: "aggregation.", + AggregationHistogramBuckets: tt.buckets, + } + + ctx := t.Context() + set := processortest.NewNopSettings(metadata.Type) + + telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings) + require.NoError(t, err) + + p, err := newSpanPruningProcessor(set, cfg, telemetryBuilder) + assert.NoError(t, err) + + resultTd, err := p.processTraces(ctx, td) + assert.NoError(t, err) + + // Get the spans + spanCount := 0 + rss := resultTd.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ils := ilss.At(j) + spans := ils.Spans() + spanCount += spans.Len() + } + } + + // Verify we have the expected number of spans + assert.Equal(t, tt.expectedSpanCount, spanCount) + + // If we have a summary span, check its histogram attributes + var summarySpan ptrace.Span + foundSummary := false + rss = resultTd.ResourceSpans() + for i := 0; i < rss.Len() && !foundSummary; i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len() && !foundSummary; j++ { + ils := ilss.At(j) + spans := ils.Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + isSummary, exists := span.Attributes().Get("aggregation.is_summary") + if exists && isSummary.Bool() { + summarySpan = span + foundSummary = true + break + } + } + } + } + + // If we are expecting histogram attributes, verify them + if tt.shouldHaveHistogramAttributes { + // Check if bucket bounds are present and correct + bounds, exists := summarySpan.Attributes().Get("aggregation.histogram_bucket_bounds_s") + assert.True(t, exists) + assert.Equal(t, len(tt.expectedBuckets), bounds.Slice().Len()) + + counts, exists := summarySpan.Attributes().Get("aggregation.histogram_bucket_counts") + assert.True(t, exists) + assert.Equal(t, len(tt.expectedBucketCounts), counts.Slice().Len()) + } + }) + } +} + +// TestLeafSpanPruning_RecursiveParentAggregation tests that parent spans are aggregated +// when all their children are aggregated +func TestLeafSpanPruning_RecursiveParentAggregation(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.op"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create the complex trace from the plan example + td := createTestTraceWithRecursiveAggregation(t) + + // Before: 1 root + 3 OK handlers + 3 OK SELECTs + 2 Error handlers + 2 Error SELECTs + 1 OK handler + 1 INSERT + 1 worker + 1 SELECT = 15 spans + originalSpanCount := countSpans(td) + assert.Equal(t, 15, originalSpanCount) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 1 root + 1 OK handler_aggregated + 1 OK SELECT_aggregated + 1 Error handler_aggregated + 1 Error SELECT_aggregated + 1 OK handler + 1 INSERT + 1 worker + 1 SELECT = 9 spans + finalSpanCount := countSpans(td) + assert.Equal(t, 9, finalSpanCount) + + // Verify aggregated spans exist + handlerOKAgg, found := findSpanByName(td, "handler", "Ok") + require.True(t, found, "OK handler summary should exist") + + handlerErrorAgg, found := findSpanByName(td, "handler", "Error") + require.True(t, found, "Error handler summary should exist") + + selectOKAgg, found := findSpanByName(td, "SELECT", "Ok") + require.True(t, found, "OK SELECT summary should exist") + + selectErrorAgg, found := findSpanByName(td, "SELECT", "Error") + require.True(t, found, "Error SELECT summary should exist") + + // Verify span counts + handlerOKCount, _ := handlerOKAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(3), handlerOKCount.Int()) + + handlerErrorCount, _ := handlerErrorAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(2), handlerErrorCount.Int()) + + selectOKCount, _ := selectOKAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(3), selectOKCount.Int()) + + selectErrorCount, _ := selectErrorAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(2), selectErrorCount.Int()) + + // Verify parent-child relationships + // SELECT_aggregated (OK) should be child of handler_aggregated (OK) + assert.Equal(t, handlerOKAgg.SpanID(), selectOKAgg.ParentSpanID()) + + // SELECT_aggregated (Error) should be child of handler_aggregated (Error) + assert.Equal(t, handlerErrorAgg.SpanID(), selectErrorAgg.ParentSpanID()) + + // Verify non-aggregated spans still exist + foundInsert := findSpanByExactName(td, "INSERT") + require.True(t, foundInsert, "INSERT span should still exist") + + foundWorker := findSpanByExactName(td, "worker") + require.True(t, foundWorker, "worker span should still exist") +} + +// TestLeafSpanPruning_ParentNotAggregatedIfChildrenMixed tests that parents are not +// aggregated if some children are aggregated but others are not +func TestLeafSpanPruning_ParentNotAggregatedIfChildrenMixed(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithMixedChildren(t) + + // Before: 1 root + 2 handlers + 3 SELECTs + 1 INSERT = 7 spans + originalSpanCount := countSpans(td) + assert.Equal(t, 7, originalSpanCount) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 1 root + 2 handlers + 1 SELECT_aggregated + 1 INSERT = 5 spans + // Handlers should NOT be aggregated because one has a non-aggregated child (INSERT) + finalSpanCount := countSpans(td) + assert.Equal(t, 5, finalSpanCount) + + // Verify handler_aggregated does NOT exist + _, found := findSummarySpanByName(td, "handler") + assert.False(t, found, "handler summary should NOT exist") + + // Verify SELECT_aggregated exists + selectAgg, found := findSummarySpanByName(td, "SELECT") + require.True(t, found, "SELECT summary should exist") + + selectCount, _ := selectAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(3), selectCount.Int()) + + // Verify original handler spans still exist + handlers := findAllSpansByExactName(td, "handler") + assert.Len(t, handlers, 2, "both handler spans should still exist") +} + +// TestLeafSpanPruning_RootSpansNotAggregated tests that root spans (with no parent) +// are never aggregated +func TestLeafSpanPruning_RootSpansNotAggregated(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithMultipleRoots(t) + + // Before: 3 root spans + 6 leaf spans (2 per root) = 9 spans + originalSpanCount := countSpans(td) + assert.Equal(t, 9, originalSpanCount) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 3 root spans + 1 SELECT_aggregated = 4 spans + // Root spans should NOT be aggregated even though all children are aggregated + finalSpanCount := countSpans(td) + assert.Equal(t, 4, finalSpanCount) + + // Verify all root spans still exist + roots := findAllSpansByExactName(td, "root") + assert.Len(t, roots, 3, "all root spans should still exist") + + // Verify SELECT_aggregated exists + selectAgg, found := findSummarySpanByName(td, "SELECT") + require.True(t, found, "SELECT summary should exist") + + selectCount, _ := selectAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(6), selectCount.Int()) +} + +// TestLeafSpanPruning_ThreeLevelAggregation tests aggregation across three levels +func TestLeafSpanPruning_ThreeLevelAggregation(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.MaxParentDepth = -1 // unlimited for this test + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithThreeLevels(t) + + // Before: 1 root + 2 middleware + 4 handlers (2 per middleware) + 8 SELECTs (2 per handler) = 15 spans + originalSpanCount := countSpans(td) + assert.Equal(t, 15, originalSpanCount) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // After: 1 root + 1 middleware_aggregated + 1 handler_aggregated + 1 SELECT_aggregated = 4 spans + finalSpanCount := countSpans(td) + assert.Equal(t, 4, finalSpanCount) + + // Verify all aggregated spans exist + middlewareAgg, found := findSummarySpanByName(td, "middleware") + require.True(t, found, "middleware summary should exist") + + handlerAgg, found := findSummarySpanByName(td, "handler") + require.True(t, found, "handler summary should exist") + + selectAgg, found := findSummarySpanByName(td, "SELECT") + require.True(t, found, "SELECT summary should exist") + + // Verify parent-child relationships + // handler summary should be child of middleware summary + assert.Equal(t, middlewareAgg.SpanID(), handlerAgg.ParentSpanID()) + + // SELECT summary should be child of handler summary + assert.Equal(t, handlerAgg.SpanID(), selectAgg.ParentSpanID()) + + // Verify span counts + middlewareCount, _ := middlewareAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(2), middlewareCount.Int()) + + handlerCount, _ := handlerAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(4), handlerCount.Int()) + + selectCount, _ := selectAgg.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(8), selectCount.Int()) +} + +// Helper functions for new tests + +func createTestTraceWithRecursiveAggregation(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + rootSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(traceID) + root.SetSpanID(rootSpanID) + root.SetName("root") + root.Status().SetCode(ptrace.StatusCodeOk) + + // 3x handler (OK) -> SELECT (OK, db.op=select) + for i := range 3 { + handlerID := pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0}) + handler := ss.Spans().AppendEmpty() + handler.SetTraceID(traceID) + handler.SetSpanID(handlerID) + handler.SetParentSpanID(rootSpanID) + handler.SetName("handler") + handler.Status().SetCode(ptrace.StatusCodeOk) + + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + selectSpan.SetParentSpanID(handlerID) + selectSpan.SetName("SELECT") + selectSpan.Attributes().PutStr("db.op", "select") + selectSpan.Status().SetCode(ptrace.StatusCodeOk) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2x handler (Error) -> SELECT (Error, db.op=select) + for i := range 2 { + handlerID := pcommon.SpanID([8]byte{4, byte(i), 0, 0, 0, 0, 0, 0}) + handler := ss.Spans().AppendEmpty() + handler.SetTraceID(traceID) + handler.SetSpanID(handlerID) + handler.SetParentSpanID(rootSpanID) + handler.SetName("handler") + handler.Status().SetCode(ptrace.StatusCodeError) + + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{5, byte(i), 0, 0, 0, 0, 0, 0})) + selectSpan.SetParentSpanID(handlerID) + selectSpan.SetName("SELECT") + selectSpan.Attributes().PutStr("db.op", "select") + selectSpan.Status().SetCode(ptrace.StatusCodeError) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 1x handler (OK) -> INSERT (OK, db.op=insert) - below threshold + handlerID := pcommon.SpanID([8]byte{6, 0, 0, 0, 0, 0, 0, 0}) + handler := ss.Spans().AppendEmpty() + handler.SetTraceID(traceID) + handler.SetSpanID(handlerID) + handler.SetParentSpanID(rootSpanID) + handler.SetName("handler") + handler.Status().SetCode(ptrace.StatusCodeOk) + + insertSpan := ss.Spans().AppendEmpty() + insertSpan.SetTraceID(traceID) + insertSpan.SetSpanID(pcommon.SpanID([8]byte{7, 0, 0, 0, 0, 0, 0, 0})) + insertSpan.SetParentSpanID(handlerID) + insertSpan.SetName("INSERT") + insertSpan.Attributes().PutStr("db.op", "insert") + insertSpan.Status().SetCode(ptrace.StatusCodeOk) + insertSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + insertSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + + // 1x worker (OK) -> SELECT (OK, db.op=select) - different parent name + workerID := pcommon.SpanID([8]byte{8, 0, 0, 0, 0, 0, 0, 0}) + worker := ss.Spans().AppendEmpty() + worker.SetTraceID(traceID) + worker.SetSpanID(workerID) + worker.SetParentSpanID(rootSpanID) + worker.SetName("worker") + worker.Status().SetCode(ptrace.StatusCodeOk) + + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{9, 0, 0, 0, 0, 0, 0, 0})) + selectSpan.SetParentSpanID(workerID) + selectSpan.SetName("SELECT") + selectSpan.Attributes().PutStr("db.op", "select") + selectSpan.Status().SetCode(ptrace.StatusCodeOk) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + + return td +} + +func createTestTraceWithMixedChildren(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + rootSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(traceID) + root.SetSpanID(rootSpanID) + root.SetName("root") + + // Handler 1 with 3 SELECTs (will be aggregated) + handler1ID := pcommon.SpanID([8]byte{2, 0, 0, 0, 0, 0, 0, 0}) + handler1 := ss.Spans().AppendEmpty() + handler1.SetTraceID(traceID) + handler1.SetSpanID(handler1ID) + handler1.SetParentSpanID(rootSpanID) + handler1.SetName("handler") + handler1.Status().SetCode(ptrace.StatusCodeOk) + + for i := range 3 { + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + selectSpan.SetParentSpanID(handler1ID) + selectSpan.SetName("SELECT") + selectSpan.Status().SetCode(ptrace.StatusCodeOk) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // Handler 2 with 1 INSERT (not aggregated - mixed children) + handler2ID := pcommon.SpanID([8]byte{4, 0, 0, 0, 0, 0, 0, 0}) + handler2 := ss.Spans().AppendEmpty() + handler2.SetTraceID(traceID) + handler2.SetSpanID(handler2ID) + handler2.SetParentSpanID(rootSpanID) + handler2.SetName("handler") + handler2.Status().SetCode(ptrace.StatusCodeOk) + + insertSpan := ss.Spans().AppendEmpty() + insertSpan.SetTraceID(traceID) + insertSpan.SetSpanID(pcommon.SpanID([8]byte{5, 0, 0, 0, 0, 0, 0, 0})) + insertSpan.SetParentSpanID(handler2ID) + insertSpan.SetName("INSERT") + insertSpan.Status().SetCode(ptrace.StatusCodeOk) + insertSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + insertSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + + return td +} + +func createTestTraceWithMultipleRoots(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + + // 3 root spans, each with 2 SELECT children + for i := range 3 { + rootID := pcommon.SpanID([8]byte{byte(i + 1), 0, 0, 0, 0, 0, 0, 0}) + root := ss.Spans().AppendEmpty() + root.SetTraceID(traceID) + root.SetSpanID(rootID) + root.SetName("root") + root.Status().SetCode(ptrace.StatusCodeOk) + + for j := range 2 { + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{byte(i + 4), byte(j), 0, 0, 0, 0, 0, 0})) + selectSpan.SetParentSpanID(rootID) + selectSpan.SetName("SELECT") + selectSpan.Status().SetCode(ptrace.StatusCodeOk) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + } + + return td +} + +func createTestTraceWithThreeLevels(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + rootSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Root span + root := ss.Spans().AppendEmpty() + root.SetTraceID(traceID) + root.SetSpanID(rootSpanID) + root.SetName("root") + + spanIDCounter := byte(2) + + // 2 middleware spans + for range 2 { + middlewareID := pcommon.SpanID([8]byte{spanIDCounter, 0, 0, 0, 0, 0, 0, 0}) + spanIDCounter++ + + middleware := ss.Spans().AppendEmpty() + middleware.SetTraceID(traceID) + middleware.SetSpanID(middlewareID) + middleware.SetParentSpanID(rootSpanID) + middleware.SetName("middleware") + middleware.Status().SetCode(ptrace.StatusCodeOk) + + // Each middleware has 2 handler spans + for range 2 { + handlerID := pcommon.SpanID([8]byte{spanIDCounter, 0, 0, 0, 0, 0, 0, 0}) + spanIDCounter++ + + handler := ss.Spans().AppendEmpty() + handler.SetTraceID(traceID) + handler.SetSpanID(handlerID) + handler.SetParentSpanID(middlewareID) + handler.SetName("handler") + handler.Status().SetCode(ptrace.StatusCodeOk) + + // Each handler has 2 SELECT spans + for range 2 { + selectSpan := ss.Spans().AppendEmpty() + selectSpan.SetTraceID(traceID) + selectSpan.SetSpanID(pcommon.SpanID([8]byte{spanIDCounter, 0, 0, 0, 0, 0, 0, 0})) + spanIDCounter++ + selectSpan.SetParentSpanID(handlerID) + selectSpan.SetName("SELECT") + selectSpan.Status().SetCode(ptrace.StatusCodeOk) + selectSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + selectSpan.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + } + } + + return td +} + +func findSpanByName(td ptrace.Traces, nameSubstring, statusCode string) (ptrace.Span, bool) { + // findSpanByName finds a summary span by name substring and status code string + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ils := ilss.At(j) + spans := ils.Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + isSummary, exists := span.Attributes().Get("aggregation.is_summary") + if strings.Contains(span.Name(), nameSubstring) && span.Status().Code().String() == statusCode && exists && isSummary.Bool() { + return span, true + } + } + } + } + return ptrace.Span{}, false +} + +func findSpanByExactName(td ptrace.Traces, name string) bool { + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ils := ilss.At(j) + spans := ils.Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + if span.Name() == name { + return true + } + } + } + } + return false +} + +// findSummarySpanByName finds a summary span (with is_summary attribute) by exact name +func findSummarySpanByName(td ptrace.Traces, name string) (ptrace.Span, bool) { + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ils := ilss.At(j) + spans := ils.Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + isSummary, exists := span.Attributes().Get("aggregation.is_summary") + if span.Name() == name && exists && isSummary.Bool() { + return span, true + } + } + } + } + return ptrace.Span{}, false +} + +func findAllSpansByExactName(td ptrace.Traces, name string) []ptrace.Span { + var result []ptrace.Span + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + rs := rss.At(i) + ilss := rs.ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + ils := ilss.At(j) + spans := ils.Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + if span.Name() == name { + result = append(result, span) + } + } + } + } + return result +} + +// Tree-based edge case tests + +func TestLeafSpanPruning_OrphanSpans(t *testing.T) { + // Test: orphan spans (parent not in trace) should still be processed as potential leaves + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithOrphanSpans(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 4, originalSpanCount) // 1 root + 3 orphan leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Orphan spans with same name should still be aggregated + // 3 orphan SELECT spans -> 1 summary + 1 root + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + + attrs := summarySpan.Attributes() + spanCount, _ := attrs.Get("aggregation.span_count") + assert.Equal(t, int64(3), spanCount.Int()) +} + +func TestLeafSpanPruning_MultipleRootSpans(t *testing.T) { + // Test: multiple root spans (no parent) in a trace should be handled gracefully + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithMultipleRootsTree(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 5, originalSpanCount) // 2 roots + 3 leaf spans under first root + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // The 3 leaf spans under first root should aggregate + // Final: 2 roots + 1 summary = 3 + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + + attrs := summarySpan.Attributes() + spanCount, _ := attrs.Get("aggregation.span_count") + assert.Equal(t, int64(3), spanCount.Int()) +} + +func TestLeafSpanPruning_NoRootSpan(t *testing.T) { + // Test: trace with no root span (all spans have parents not in trace) + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithNoRoot(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 3, originalSpanCount) // 3 orphan leaf spans (all point to missing parent) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Should still aggregate the 3 orphan spans + finalSpanCount := countSpans(td) + assert.Equal(t, 1, finalSpanCount) // 1 summary + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + + attrs := summarySpan.Attributes() + spanCount, _ := attrs.Get("aggregation.span_count") + assert.Equal(t, int64(3), spanCount.Int()) +} + +// Helper functions for tree edge case tests + +func createTestTraceWithOrphanSpans(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + rootSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + missingParentID := pcommon.SpanID([8]byte{99, 0, 0, 0, 0, 0, 0, 0}) // Not in trace + + // Root span + rootSpan := ss.Spans().AppendEmpty() + rootSpan.SetTraceID(traceID) + rootSpan.SetSpanID(rootSpanID) + rootSpan.SetName("root") + rootSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + rootSpan.SetEndTimestamp(pcommon.Timestamp(1000000500)) + + // 3 orphan leaf spans (parent not in trace) + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(missingParentID) // Parent not in trace + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(1000000100)) + span.SetEndTimestamp(pcommon.Timestamp(1000000200)) + } + + return td +} + +func createTestTraceWithMultipleRootsTree(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + root1SpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + root2SpanID := pcommon.SpanID([8]byte{2, 0, 0, 0, 0, 0, 0, 0}) + + // First root span (earlier timestamp) + root1 := ss.Spans().AppendEmpty() + root1.SetTraceID(traceID) + root1.SetSpanID(root1SpanID) + root1.SetName("root1") + root1.SetStartTimestamp(pcommon.Timestamp(1000000000)) + root1.SetEndTimestamp(pcommon.Timestamp(1000000500)) + + // Second root span (later timestamp) + root2 := ss.Spans().AppendEmpty() + root2.SetTraceID(traceID) + root2.SetSpanID(root2SpanID) + root2.SetName("root2") + root2.SetStartTimestamp(pcommon.Timestamp(1000000100)) + root2.SetEndTimestamp(pcommon.Timestamp(1000000600)) + + // 3 leaf spans under first root + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(root1SpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(1000000100)) + span.SetEndTimestamp(pcommon.Timestamp(1000000200)) + } + + return td +} + +func createTestTraceWithNoRoot(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + missingParentID := pcommon.SpanID([8]byte{99, 0, 0, 0, 0, 0, 0, 0}) // Not in trace + + // 3 orphan leaf spans all pointing to missing parent + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(missingParentID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(1000000100 + uint64(i*10))) + span.SetEndTimestamp(pcommon.Timestamp(1000000200 + uint64(i*10))) + } + + return td +} + +// TestLeafSpanPruning_LongestDurationTemplate tests that the span with the longest +// duration is used as the template for the summary span +func TestLeafSpanPruning_LongestDurationTemplate(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + cfg.GroupByAttributes = []string{"db.operation"} + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with spans of varying durations and unique identifying attributes + // The span with the longest duration should become the template + td := createTestTraceWithVaryingDurations(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Find the summary span + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan, "summary span should exist") + + // Verify the template attribute from the longest-duration span is present + // The span with 500ns duration should be the template + identifier, exists := summarySpan.Attributes().Get("span.identifier") + require.True(t, exists, "span.identifier attribute should exist") + assert.Equal(t, "longest", identifier.Str(), "summary should use attributes from longest-duration span") + + // Verify duration stats + attrs := summarySpan.Attributes() + minDuration, _ := attrs.Get("aggregation.duration_min_ns") + assert.Equal(t, int64(100), minDuration.Int()) + + maxDuration, _ := attrs.Get("aggregation.duration_max_ns") + assert.Equal(t, int64(500), maxDuration.Int()) +} + +func createTestTraceWithVaryingDurations(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // Define spans with different durations and unique identifiers + // Duration order: 100ns, 500ns, 200ns - the 500ns span should be template + spanConfigs := []struct { + duration int64 + identifier string + }{ + {100, "short"}, + {500, "longest"}, // This one should be the template + {200, "medium"}, + } + + baseTime := int64(1000000000) + for i, cfg := range spanConfigs { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("db_query") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + cfg.duration)) + span.Attributes().PutStr("db.operation", "select") // Grouping key - same for all + span.Attributes().PutStr("span.identifier", cfg.identifier) // Unique per span + } + + return td +} + +// TestProcessorPreservesOutlierSpans tests that outlier spans are preserved as individual +// spans when preserve_outliers is enabled, while normal spans are aggregated. +func TestProcessorPreservesOutlierSpans(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 5 + cfg.GroupByAttributes = []string{"db.operation"} + cfg.EnableOutlierAnalysis = true + cfg.OutlierAnalysis = OutlierAnalysisConfig{ + PreserveOutliers: true, + MaxPreservedOutliers: 2, + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.75, + CorrelationMaxNormalOccurrence: 0.25, + MaxCorrelatedAttributes: 5, + } + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with outliers + td := createTestTraceWithOutliers(t) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Collect all spans from the result + var spans []ptrace.Span + for i := 0; i < td.ResourceSpans().Len(); i++ { + rs := td.ResourceSpans().At(i) + for j := 0; j < rs.ScopeSpans().Len(); j++ { + ss := rs.ScopeSpans().At(j) + for k := 0; k < ss.Spans().Len(); k++ { + spans = append(spans, ss.Spans().At(k)) + } + } + } + + // Find summary span + var summarySpan ptrace.Span + var outlierSpans []ptrace.Span + + for _, span := range spans { + if isSummary, exists := span.Attributes().Get("aggregation.is_summary"); exists && isSummary.Bool() { + summarySpan = span + } + if isOutlier, exists := span.Attributes().Get("aggregation.is_preserved_outlier"); exists && isOutlier.Bool() { + outlierSpans = append(outlierSpans, span) + } + } + + // Verify we have the expected structure: + // - 1 parent span (not aggregated) + // - 1 summary span (aggregated normal spans) + // - 2 preserved outlier spans + // Total: 4 spans + require.NotNil(t, summarySpan, "summary span should exist") + assert.Len(t, outlierSpans, 2, "should have 2 preserved outliers") + + // Verify summary span has correct count (10 total - 2 outliers = 8 aggregated) + spanCount, exists := summarySpan.Attributes().Get("aggregation.span_count") + require.True(t, exists) + assert.Equal(t, int64(8), spanCount.Int(), "summary should aggregate 8 normal spans") + + // Verify summary span has preserved outlier count + outlierCount, exists := summarySpan.Attributes().Get("aggregation.preserved_outlier_count") + require.True(t, exists) + assert.Equal(t, int64(2), outlierCount.Int(), "summary should track 2 preserved outliers") + + // Verify preserved outliers reference the summary span + summarySpanIDStr := summarySpan.SpanID().String() + for _, outlier := range outlierSpans { + summaryRef, exists := outlier.Attributes().Get("aggregation.summary_span_id") + require.True(t, exists, "preserved outlier should reference summary span") + assert.Equal(t, summarySpanIDStr, summaryRef.Str()) + } +} + +func createTestTraceWithOutliers(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("handler") + parentSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + parentSpan.SetEndTimestamp(pcommon.Timestamp(1001000000)) + + // Create 10 SELECT spans: 8 normal (5-12ms) and 2 outliers (500ms, 600ms) + // The outliers are ~50x slower than normal spans, well outside IQR threshold + baseTime := int64(1000000000) + ms := int64(1000000) // nanoseconds per millisecond + + normalDurations := []int64{5, 6, 7, 8, 9, 10, 11, 12} // 8 normal spans (ms) + outlierDurations := []int64{500, 600} // 2 outlier spans (ms) + + // Add normal spans + for i, dur := range normalDurations { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + dur*ms)) + span.Attributes().PutStr("db.operation", "SELECT") + span.Attributes().PutStr("cache_hit", "true") + } + + // Add outlier spans + for i, dur := range outlierDurations { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + dur*ms)) + span.Attributes().PutStr("db.operation", "SELECT") + span.Attributes().PutStr("cache_hit", "false") // Distinguishing attribute for outliers + } + + return td +} + +// TestProcessorSkipsAggregationWhenTooFewNormalSpans tests that aggregation is skipped +// when preserving outliers would leave too few normal spans to aggregate. +func TestProcessorSkipsAggregationWhenTooFewNormalSpans(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 11 // Require 11 spans to aggregate + cfg.GroupByAttributes = []string{"db.operation"} + cfg.EnableOutlierAnalysis = true + cfg.OutlierAnalysis = OutlierAnalysisConfig{ + PreserveOutliers: true, + MaxPreservedOutliers: 0, // Preserve all outliers + IQRMultiplier: 1.5, + MinGroupSize: 7, + CorrelationMinOccurrence: 0.5, + CorrelationMaxNormalOccurrence: 0.5, + MaxCorrelatedAttributes: 5, + } + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + // Create trace with 13 leaf spans: 10 normal + 3 outliers + // After filtering outliers, only 10 normal remain, which is < 11 (MinSpansToAggregate) + // So aggregation should be skipped entirely + td := createTestTraceWithManyOutliers(t) + + initialSpanCount := countSpans(td) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + finalSpanCount := countSpans(td) + + // No aggregation should occur - all spans preserved + assert.Equal(t, initialSpanCount, finalSpanCount, "no spans should be aggregated") + + // Verify no summary span exists by checking span count matches expectations + // If aggregation happened, we'd have fewer spans (aggregated + summary instead of all original) + // Since span counts are equal, no aggregation occurred +} + +// TraceState grouping tests for Consistent Probability Sampling (CPS) compatibility + +func TestLeafSpanPruning_TraceStateGrouping_SameTraceState(t *testing.T) { + // Test: spans with identical TraceState should be aggregated together + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithSameTraceState(t, "ot=th:fd70a4;rv:12345") + originalSpanCount := countSpans(td) + assert.Equal(t, 4, originalSpanCount) // 1 parent + 3 leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // All 3 leaf spans have same TraceState, should aggregate to 1 + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) // 1 parent + 1 summary + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + + attrs := summarySpan.Attributes() + spanCount, _ := attrs.Get("aggregation.span_count") + assert.Equal(t, int64(3), spanCount.Int()) + + // Verify TraceState is preserved in summary span + assert.Equal(t, "ot=th:fd70a4;rv:12345", summarySpan.TraceState().AsRaw()) +} + +func TestLeafSpanPruning_TraceStateGrouping_DifferentThresholds(t *testing.T) { + // Test: spans with different th (threshold) values should be in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithDifferentTraceStates(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 6, originalSpanCount) // 1 parent + 3 spans (th:fd70a4) + 2 spans (th:fa00) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // 3 spans with th:fd70a4 -> 1 summary + // 2 spans with th:fa00 -> 1 summary + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) // 1 parent + 2 summaries + + // Verify we have two summary spans with different TraceState values + summaries := findAllSummarySpans(td) + assert.Len(t, summaries, 2, "should have 2 summary spans") + + // Collect TraceState values from summaries + traceStates := make(map[string]int64) + for _, summary := range summaries { + ts := summary.TraceState().AsRaw() + count, _ := summary.Attributes().Get("aggregation.span_count") + traceStates[ts] = count.Int() + } + + assert.Equal(t, int64(3), traceStates["ot=th:fd70a4;rv:12345"], "th:fd70a4 group should have 3 spans") + assert.Equal(t, int64(2), traceStates["ot=th:fa00;rv:12345"], "th:fa00 group should have 2 spans") +} + +func TestLeafSpanPruning_TraceStateGrouping_MixedWithEmpty(t *testing.T) { + // Test: spans with TraceState and spans without should be in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithMixedTraceState(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 6, originalSpanCount) // 1 parent + 3 spans (with TraceState) + 2 spans (empty) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // 3 spans with TraceState -> 1 summary + // 2 spans with empty TraceState -> 1 summary + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) // 1 parent + 2 summaries + + summaries := findAllSummarySpans(td) + assert.Len(t, summaries, 2, "should have 2 summary spans") + + // Verify TraceState values are preserved correctly + var withTS, withoutTS ptrace.Span + for _, s := range summaries { + if s.TraceState().AsRaw() == "" { + withoutTS = s + } else { + withTS = s + } + } + + assert.Equal(t, "ot=th:fd70a4;rv:12345", withTS.TraceState().AsRaw()) + count1, _ := withTS.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(3), count1.Int()) + + assert.Empty(t, withoutTS.TraceState().AsRaw()) + count2, _ := withoutTS.Attributes().Get("aggregation.span_count") + assert.Equal(t, int64(2), count2.Int()) +} + +func TestLeafSpanPruning_TraceStateGrouping_EmptyTraceState(t *testing.T) { + // Test: spans with empty TraceState should be grouped together + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithSameTraceState(t, "") // Empty TraceState + originalSpanCount := countSpans(td) + assert.Equal(t, 4, originalSpanCount) // 1 parent + 3 leaf spans + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // All 3 leaf spans have empty TraceState, should aggregate to 1 + finalSpanCount := countSpans(td) + assert.Equal(t, 2, finalSpanCount) + + summarySpan := findSummarySpan(td) + require.NotNil(t, summarySpan) + assert.Empty(t, summarySpan.TraceState().AsRaw()) + + attrs := summarySpan.Attributes() + spanCount, _ := attrs.Get("aggregation.span_count") + assert.Equal(t, int64(3), spanCount.Int()) +} + +func TestLeafSpanPruning_TraceStateGrouping_DifferentRVValues(t *testing.T) { + // Test: spans with different rv (randomness value) should be in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithDifferentRVValues(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 5, originalSpanCount) // 1 parent + 2 spans (rv:11111) + 2 spans (rv:22222) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // 2 spans with rv:11111 -> 1 summary + // 2 spans with rv:22222 -> 1 summary + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) // 1 parent + 2 summaries + + summaries := findAllSummarySpans(td) + assert.Len(t, summaries, 2, "should have 2 summary spans") +} + +func TestLeafSpanPruning_TraceStateGrouping_VendorKeys(t *testing.T) { + // Test: spans with different vendor-specific keys should be in separate groups + factory := NewFactory() + cfg := factory.CreateDefaultConfig().(*Config) + cfg.MinSpansToAggregate = 2 + + tp, err := factory.CreateTraces(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, consumertest.NewNop()) + require.NoError(t, err) + + td := createTestTraceWithVendorTraceState(t) + originalSpanCount := countSpans(td) + assert.Equal(t, 5, originalSpanCount) // 1 parent + 2 spans (vendor=a) + 2 spans (vendor=b) + + err = tp.ConsumeTraces(t.Context(), td) + require.NoError(t, err) + + // Different vendor keys should result in separate groups + finalSpanCount := countSpans(td) + assert.Equal(t, 3, finalSpanCount) // 1 parent + 2 summaries + + summaries := findAllSummarySpans(td) + assert.Len(t, summaries, 2, "should have 2 summary spans") +} + +// Helper functions for TraceState tests + +func createTestTraceWithSameTraceState(t *testing.T, traceState string) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 3 leaf spans with identical TraceState + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw(traceState) + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithDifferentTraceStates(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 3 leaf spans with th:fd70a4 (1% sampling) + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fd70a4;rv:12345") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 leaf spans with th:fa00 (2% sampling) + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fa00;rv:12345") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithMixedTraceState(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 3 leaf spans with TraceState + for i := range 3 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fd70a4;rv:12345") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 leaf spans WITHOUT TraceState (empty) + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + // No TraceState set (empty) + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithDifferentRVValues(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 2 leaf spans with rv:11111 + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fd70a4;rv:11111") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 leaf spans with rv:22222 + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fd70a4;rv:22222") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func createTestTraceWithVendorTraceState(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("parent") + + // 2 leaf spans with vendor=a + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fd70a4,vendor=a") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + // 2 leaf spans with vendor=b + for i := range 2 { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.TraceState().FromRaw("ot=th:fd70a4,vendor=b") + span.SetStartTimestamp(pcommon.Timestamp(1000000000)) + span.SetEndTimestamp(pcommon.Timestamp(1000000100)) + } + + return td +} + +func findAllSummarySpans(td ptrace.Traces) []ptrace.Span { + var result []ptrace.Span + rss := td.ResourceSpans() + for i := 0; i < rss.Len(); i++ { + ilss := rss.At(i).ScopeSpans() + for j := 0; j < ilss.Len(); j++ { + spans := ilss.At(j).Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + isSummary, exists := span.Attributes().Get("aggregation.is_summary") + if exists && isSummary.Bool() { + result = append(result, span) + } + } + } + } + return result +} + +func createTestTraceWithManyOutliers(t *testing.T) ptrace.Traces { + t.Helper() + td := ptrace.NewTraces() + rs := td.ResourceSpans().AppendEmpty() + ss := rs.ScopeSpans().AppendEmpty() + + traceID := pcommon.TraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) + parentSpanID := pcommon.SpanID([8]byte{1, 0, 0, 0, 0, 0, 0, 0}) + + // Parent span + parentSpan := ss.Spans().AppendEmpty() + parentSpan.SetTraceID(traceID) + parentSpan.SetSpanID(parentSpanID) + parentSpan.SetName("handler") + parentSpan.SetStartTimestamp(pcommon.Timestamp(1000000000)) + parentSpan.SetEndTimestamp(pcommon.Timestamp(1001000000)) + + baseTime := int64(1000000000) + ms := int64(1000000) + + // 10 normal spans (tight distribution for small IQR) + normalDurations := []int64{5, 6, 6, 7, 7, 8, 8, 9, 9, 10} + for i, dur := range normalDurations { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{2, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + dur*ms)) + span.Attributes().PutStr("db.operation", "SELECT") + } + + // 3 outlier spans (way outside IQR threshold) + outlierDurations := []int64{500, 600, 700} + for i, dur := range outlierDurations { + span := ss.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pcommon.SpanID([8]byte{3, byte(i), 0, 0, 0, 0, 0, 0})) + span.SetParentSpanID(parentSpanID) + span.SetName("SELECT") + span.SetStartTimestamp(pcommon.Timestamp(baseTime)) + span.SetEndTimestamp(pcommon.Timestamp(baseTime + dur*ms)) + span.Attributes().PutStr("db.operation", "SELECT") + } + + return td +} diff --git a/processor/spanpruningprocessor/stats.go b/processor/spanpruningprocessor/stats.go new file mode 100644 index 0000000000000..fa88b5ed5cdf9 --- /dev/null +++ b/processor/spanpruningprocessor/stats.go @@ -0,0 +1,89 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "time" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// aggregationData tracks statistics and time ranges for a group of spans in +// a single pass, replacing separate calculations for efficiency. +type aggregationData struct { + count int64 + minDuration time.Duration + maxDuration time.Duration + sumDuration time.Duration + bucketCounts []int64 + earliestStart pcommon.Timestamp + latestEnd pcommon.Timestamp +} + +// calculateAggregationData derives span counts, duration stats, and histogram +// bucket counts for the provided nodes in one traversal. +func (p *spanPruningProcessor) calculateAggregationData(nodes []*spanNode) aggregationData { + data := aggregationData{ + count: int64(len(nodes)), + } + + // Initialize histogram bucket counts + if len(p.config.AggregationHistogramBuckets) > 0 { + data.bucketCounts = make([]int64, len(p.config.AggregationHistogramBuckets)+1) + } + + for i, node := range nodes { + span := node.span + data.updateWithSpan(span, i == 0, p.config.AggregationHistogramBuckets) + } + + return data +} + +// updateWithSpan incorporates a single span into the aggregation statistics +// and histogram buckets, tracking min/max times and cumulative counts. +func (data *aggregationData) updateWithSpan(span ptrace.Span, isFirst bool, histogramBuckets []time.Duration) { + startTime := span.StartTimestamp().AsTime() + endTime := span.EndTimestamp().AsTime() + duration := endTime.Sub(startTime) + + // Calculate duration statistics + if isFirst { + data.minDuration = duration + data.maxDuration = duration + data.earliestStart = span.StartTimestamp() + data.latestEnd = span.EndTimestamp() + } else { + if duration < data.minDuration { + data.minDuration = duration + } + if duration > data.maxDuration { + data.maxDuration = duration + } + if span.StartTimestamp() < data.earliestStart { + data.earliestStart = span.StartTimestamp() + } + if span.EndTimestamp() > data.latestEnd { + data.latestEnd = span.EndTimestamp() + } + } + data.sumDuration += duration + + // Update histogram bucket counts (cumulative) + if len(histogramBuckets) > 0 { + // Find which bucket this duration belongs to + bucketIndex := len(histogramBuckets) // default to +Inf bucket + for j, bucket := range histogramBuckets { + if duration <= bucket { + bucketIndex = j + break + } + } + // Increment all buckets from bucketIndex to the end (cumulative histogram) + for j := bucketIndex; j < len(data.bucketCounts); j++ { + data.bucketCounts[j]++ + } + } +} diff --git a/processor/spanpruningprocessor/testdata/config.yaml b/processor/spanpruningprocessor/testdata/config.yaml new file mode 100644 index 0000000000000..8109f4f24332b --- /dev/null +++ b/processor/spanpruningprocessor/testdata/config.yaml @@ -0,0 +1,12 @@ +spanpruning: + group_by_attributes: + - "db.operation" + min_spans_to_aggregate: 5 + aggregation_attribute_prefix: "aggregation." + +spanpruning/custom: + group_by_attributes: + - "db.operation" + - "db.name" + min_spans_to_aggregate: 3 + aggregation_attribute_prefix: "batch." diff --git a/processor/spanpruningprocessor/tree.go b/processor/spanpruningprocessor/tree.go new file mode 100644 index 0000000000000..e713aac779eec --- /dev/null +++ b/processor/spanpruningprocessor/tree.go @@ -0,0 +1,173 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package spanpruningprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanpruningprocessor" + +import ( + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.uber.org/zap" +) + +// spanNode models a span in the trace tree with cached relationships and +// aggregation bookkeeping. +type spanNode struct { + span ptrace.Span + scopeSpans ptrace.ScopeSpans + parent *spanNode + children []*spanNode + groupKey string // cached group key for leaf spans + isLeaf bool // true if node has no children + markedForRemoval bool // true if node will be aggregated + isPreservedOutlier bool // true if preserved as outlier (not aggregated) +} + +// traceTree holds span nodes indexed by ID plus quick leaf/orphan lists for +// efficient aggregation analysis. +type traceTree struct { + nodeByID map[pcommon.SpanID]*spanNode + leaves []*spanNode // nodes with no children, populated during build + orphans []*spanNode // spans whose parent is not in the trace +} + +// buildTraceTree constructs parent/child links for a trace and records +// leaves, roots, and orphans so aggregation decisions can account for +// incomplete traces. +func (p *spanPruningProcessor) buildTraceTree(spans []spanInfo) *traceTree { + tree := &traceTree{ + nodeByID: make(map[pcommon.SpanID]*spanNode, len(spans)), + } + + if len(spans) == 0 { + return tree + } + + // First pass: create nodes for all spans, initially mark all as leaves + for _, info := range spans { + node := &spanNode{ + span: info.span, + scopeSpans: info.scopeSpans, + isLeaf: true, // assume leaf until a child links to it + } + tree.nodeByID[info.span.SpanID()] = node + } + + // Second pass: link parent-child relationships and update leaf status + // Pre-allocate slices with reasonable capacity + tree.orphans = make([]*spanNode, 0, len(spans)/10) + var rootCount int + + for _, node := range tree.nodeByID { + parentID := node.span.ParentSpanID() + if parentID.IsEmpty() { + // This is a root span (no parent) + rootCount++ + } else if parent, exists := tree.nodeByID[parentID]; exists { + // Link to parent and mark parent as non-leaf + node.parent = parent + parent.isLeaf = false + if parent.children == nil { + parent.children = make([]*spanNode, 0, 4) + } + parent.children = append(parent.children, node) + } else { + // Parent not in trace - this is an orphan + tree.orphans = append(tree.orphans, node) + } + } + + // Third pass: collect leaves (nodes still marked as leaf) + tree.leaves = make([]*spanNode, 0, len(spans)/4) + for _, node := range tree.nodeByID { + if node.isLeaf { + tree.leaves = append(tree.leaves, node) + } + } + + // Log warnings for incomplete traces + if rootCount > 1 { + p.logger.Debug("multiple root spans found", + zap.Int("rootCount", rootCount)) + } else if rootCount == 0 && len(tree.orphans) > 0 { + p.logger.Debug("no root span found, trace may be incomplete") + } + + if len(tree.orphans) > 0 { + p.logger.Debug("orphaned spans detected", + zap.Int("orphanCount", len(tree.orphans))) + } + + return tree +} + +// getLeaves returns the pre-computed leaf nodes (spans with no children). +func (t *traceTree) getLeaves() []*spanNode { + return t.leaves +} + +// findEligibleParentNodesFromCandidates filters candidate parents to those +// whose children are all marked for aggregation and that are themselves +// aggregate-able. +func (p *spanPruningProcessor) findEligibleParentNodesFromCandidates(candidates []*spanNode) []*spanNode { + if len(candidates) == 0 { + return nil + } + + eligibleParents := make([]*spanNode, 0, len(candidates)/4) + for _, node := range candidates { + if p.isEligibleForParentAggregation(node) { + eligibleParents = append(eligibleParents, node) + } + } + return eligibleParents +} + +// collectParentCandidates returns unique parents of marked nodes for the +// next aggregation depth iteration. +func collectParentCandidates(markedNodes []*spanNode) []*spanNode { + if len(markedNodes) == 0 { + return nil + } + + seen := make(map[*spanNode]struct{}, len(markedNodes)/2) + candidates := make([]*spanNode, 0, len(markedNodes)/2) + + for _, node := range markedNodes { + if node.parent != nil { + if _, exists := seen[node.parent]; !exists { + seen[node.parent] = struct{}{} + candidates = append(candidates, node.parent) + } + } + } + + return candidates +} + +// isEligibleForParentAggregation verifies that a node meets the criteria for +// parent aggregation (not root, all children marked or preserved, not already marked). +func (*spanPruningProcessor) isEligibleForParentAggregation(node *spanNode) bool { + // Must have children (not a leaf) + if node.isLeaf { + return false + } + + // Must have a parent (not root) + if node.parent == nil { + return false + } + + // Must not already be marked for removal + if node.markedForRemoval { + return false + } + + // All children must be either marked for removal OR preserved as outliers + for _, child := range node.children { + if !child.markedForRemoval && !child.isPreservedOutlier { + return false + } + } + + return true +}