diff --git a/docs/sources/reference/components/otelcol/otelcol.connector.servicegraph.md b/docs/sources/reference/components/otelcol/otelcol.connector.servicegraph.md index 307c0e1f1b7..d4d42086391 100644 --- a/docs/sources/reference/components/otelcol/otelcol.connector.servicegraph.md +++ b/docs/sources/reference/components/otelcol/otelcol.connector.servicegraph.md @@ -68,6 +68,8 @@ You can use the following arguments with `otelcol.connector.servicegraph`: | `latency_histogram_buckets` | `list(duration)` | Buckets for latency histogram metrics. | `["2ms", "4ms", "6ms", "8ms", "10ms", "50ms", "100ms", "200ms", "400ms", "800ms", "1s", "1400ms", "2s", "5s", "10s", "15s"]` | no | | `metrics_flush_interval` | `duration` | The interval at which metrics are flushed to downstream components. | `"60s"` | no | | `store_expiration_loop` | `duration` | The time to expire old entries from the store periodically. | `"2s"` | no | +| `virtual_node_extra_label` | `bool` | Adds an extra `virtual_node` label with an optional value of `client` or `server`, indicating which node is the uninstrumented one. | `false` | no | +| `virtual_node_peer_attributes` | `list(string)` | The list of attributes used to identify virtual node peer. | `["peer.service", "db.name", "db.system"]` | no | Service graphs work by inspecting traces and looking for spans with parent-children relationship that represent a request. `otelcol.connector.servicegraph` uses OpenTelemetry semantic conventions to detect a myriad of requests. @@ -114,6 +116,19 @@ When `metrics_flush_interval` is set to `0s`, metrics will be flushed on every r The attributes in `database_name_attributes` are tried in order, selecting the first match. +`virtual_node_peer_attributes` is useful when an OTel-instrumented client sends a request to a service that isn't OTel-instrumented. +Normally, `otelcol.connector.servicegraph` can't pair the client span with the server span. +When an edge expires, `otelcol.connector.servicegraph` checks if it has peer attributes listed in `virtual_node_peer_attributes`. +If it finds an attribute, `otelcol.connector.servicegraph` aggregates the metrics with a virtual node. + +If no client span is found and `virtual_node_peer_attributes` is not an empty list, +then the service span will be paired with a virtual node called `client="user"`. +This is useful when a client that isn't OTel-instrumented (like a web browser) sends a request to an OTel-instrumented service. +Without a virtual node, the client span is missing, and the server span expires without being paired. + +Attributes configured in the `virtual_node_peer_attributes` argument are ordered by priority, with earlier attributes having higher priority. +An empty list disables the creation of a virtual node. + [Span Kind]: https://opentelemetry.io/docs/concepts/signals/traces/#span-kind ## Blocks diff --git a/internal/component/otelcol/connector/servicegraph/servicegraph.go b/internal/component/otelcol/connector/servicegraph/servicegraph.go index 68d33ceddf2..38a3755dd8e 100644 --- a/internal/component/otelcol/connector/servicegraph/servicegraph.go +++ b/internal/component/otelcol/connector/servicegraph/servicegraph.go @@ -13,6 +13,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/connector/servicegraphconnector" otelcomponent "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/pipeline" + semconv "go.opentelemetry.io/otel/semconv/v1.25.0" ) func init() { @@ -50,9 +51,9 @@ type Arguments struct { // StoreExpirationLoop defines how often to expire old entries from the store. StoreExpirationLoop time.Duration `alloy:"store_expiration_loop,attr,optional"` // VirtualNodePeerAttributes the list of attributes need to match, the higher the front, the higher the priority. - //TODO: Add VirtualNodePeerAttributes when it's no longer controlled by - // the "processor.servicegraph.virtualNode" feature gate. - // VirtualNodePeerAttributes []string `alloy:"virtual_node_peer_attributes,attr,optional"` + VirtualNodePeerAttributes []string `alloy:"virtual_node_peer_attributes,attr,optional"` + // VirtualNodeExtraLabel enables the `virtual_node` label to be added to the spans. + VirtualNodeExtraLabel bool `alloy:"virtual_node_extra_label,attr,optional"` // MetricsFlushInterval is the interval at which metrics are flushed to the exporter. // If set to 0, metrics are flushed on every received batch of traces. @@ -115,20 +116,11 @@ func (args *Arguments) SetToDefault() { Dimensions: []string{}, CacheLoop: 1 * time.Minute, StoreExpirationLoop: 2 * time.Second, - DatabaseNameAttributes: []string{"db.name"}, - MetricsFlushInterval: 60 * time.Second, - //TODO: Add VirtualNodePeerAttributes when it's no longer controlled by - // the "processor.servicegraph.virtualNode" feature gate. - // VirtualNodePeerAttributes: []string{ - // semconv.AttributeDBName, - // semconv.AttributeNetSockPeerAddr, - // semconv.AttributeNetPeerName, - // semconv.AttributeRPCService, - // semconv.AttributeNetSockPeerName, - // semconv.AttributeNetPeerName, - // semconv.AttributeHTTPURL, - // semconv.AttributeHTTPTarget, - // }, + DatabaseNameAttributes: []string{string(semconv.DBNameKey)}, + VirtualNodePeerAttributes: []string{ + string(semconv.PeerServiceKey), string(semconv.DBNameKey), string(semconv.DBSystemKey), + }, + MetricsFlushInterval: 60 * time.Second, } args.Store.SetToDefault() args.DebugMetrics.SetToDefault() @@ -170,12 +162,11 @@ func (args Arguments) Convert() (otelcomponent.Config, error) { }, CacheLoop: args.CacheLoop, StoreExpirationLoop: args.StoreExpirationLoop, + VirtualNodePeerAttributes: args.VirtualNodePeerAttributes, + VirtualNodeExtraLabel: args.VirtualNodeExtraLabel, MetricsFlushInterval: &args.MetricsFlushInterval, DatabaseNameAttributes: args.DatabaseNameAttributes, ExponentialHistogramMaxSize: args.ExponentialHistogramMaxSize, - //TODO: Add VirtualNodePeerAttributes when it's no longer controlled by - // the "processor.servicegraph.virtualNode" feature gate. - // VirtualNodePeerAttributes: args.VirtualNodePeerAttributes, }, nil } diff --git a/internal/component/otelcol/connector/servicegraph/servicegraph_test.go b/internal/component/otelcol/connector/servicegraph/servicegraph_test.go index 33932182dec..f69e99d1d36 100644 --- a/internal/component/otelcol/connector/servicegraph/servicegraph_test.go +++ b/internal/component/otelcol/connector/servicegraph/servicegraph_test.go @@ -50,22 +50,12 @@ func TestArguments_UnmarshalAlloy(t *testing.T) { MaxItems: 1000, TTL: 2 * time.Second, }, - CacheLoop: 1 * time.Minute, - StoreExpirationLoop: 2 * time.Second, - DatabaseNameAttributes: []string{"db.name"}, - MetricsFlushInterval: ptr(60 * time.Second), - //TODO: Add VirtualNodePeerAttributes when it's no longer controlled by - // the "processor.servicegraph.virtualNode" feature gate. - // VirtualNodePeerAttributes: []string{ - // "db.name", - // "net.sock.peer.addr", - // "net.peer.name", - // "rpc.service", - // "net.sock.peer.name", - // "net.peer.name", - // "http.url", - // "http.target", - // }, + CacheLoop: 1 * time.Minute, + StoreExpirationLoop: 2 * time.Second, + VirtualNodePeerAttributes: []string{"peer.service", "db.name", "db.system"}, + VirtualNodeExtraLabel: false, + DatabaseNameAttributes: []string{"db.name"}, + MetricsFlushInterval: ptr(60 * time.Second), }, }, { @@ -79,6 +69,8 @@ func TestArguments_UnmarshalAlloy(t *testing.T) { } cache_loop = "55m" store_expiration_loop = "77s" + virtual_node_peer_attributes = ["attr1", "attr2"] + virtual_node_extra_label = true metrics_flush_interval = "5s" exponential_histogram_max_size = 160 output {} @@ -96,12 +88,11 @@ func TestArguments_UnmarshalAlloy(t *testing.T) { }, CacheLoop: 55 * time.Minute, StoreExpirationLoop: 77 * time.Second, + VirtualNodePeerAttributes: []string{"attr1", "attr2"}, + VirtualNodeExtraLabel: true, DatabaseNameAttributes: []string{"db.name"}, MetricsFlushInterval: ptr(5 * time.Second), ExponentialHistogramMaxSize: 160, - //TODO: Ad VirtualNodePeerAttributes when it's no longer controlled by - // the "processor.servicegraph.virtualNode" feature gate. - // VirtualNodePeerAttributes: []string{"attr1", "attr2"}, }, }, { diff --git a/internal/converter/internal/otelcolconvert/converter_servicegraphconnector.go b/internal/converter/internal/otelcolconvert/converter_servicegraphconnector.go index 552dc603b1d..df1d8d4bc44 100644 --- a/internal/converter/internal/otelcolconvert/converter_servicegraphconnector.go +++ b/internal/converter/internal/otelcolconvert/converter_servicegraphconnector.go @@ -61,6 +61,12 @@ func toServicegraphConnector(state *State, id componentstatus.InstanceID, cfg *s metricsFlushIntervalValue = *metricsFlushInterval } + // TODO: Some default values upstream are not picked up correctly - fix this. + // Change the upstream code to set the default values in createDefaultConfig() in factory.go. + // Currently, some defaults are set in newConnector() in connector.go. + // For example, Alloy thinks the default for virtual_node_peer_attributes should be an empty list because that's what's in factory.go. + // For now the servicegraph converter tests are configured to explicitly set some values so that we don't see the wrong default value. + return &servicegraph.Arguments{ LatencyHistogramBuckets: cfg.LatencyHistogramBuckets, Dimensions: cfg.Dimensions, @@ -68,10 +74,13 @@ func toServicegraphConnector(state *State, id componentstatus.InstanceID, cfg *s MaxItems: cfg.Store.MaxItems, TTL: cfg.Store.TTL, }, - CacheLoop: cfg.CacheLoop, - StoreExpirationLoop: cfg.StoreExpirationLoop, - MetricsFlushInterval: metricsFlushIntervalValue, - DatabaseNameAttributes: cfg.DatabaseNameAttributes, + CacheLoop: cfg.CacheLoop, + StoreExpirationLoop: cfg.StoreExpirationLoop, + MetricsFlushInterval: metricsFlushIntervalValue, + DatabaseNameAttributes: cfg.DatabaseNameAttributes, + VirtualNodeExtraLabel: cfg.VirtualNodeExtraLabel, + VirtualNodePeerAttributes: cfg.VirtualNodePeerAttributes, + ExponentialHistogramMaxSize: cfg.ExponentialHistogramMaxSize, Output: &otelcol.ConsumerArguments{ Metrics: ToTokenizedConsumers(nextMetrics), }, diff --git a/internal/converter/internal/otelcolconvert/testdata/servicegraph.alloy b/internal/converter/internal/otelcolconvert/testdata/servicegraph.alloy index c7d9db0a0db..58d7f704dd1 100644 --- a/internal/converter/internal/otelcolconvert/testdata/servicegraph.alloy +++ b/internal/converter/internal/otelcolconvert/testdata/servicegraph.alloy @@ -30,10 +30,11 @@ otelcol.connector.servicegraph "default" { max_items = 10 ttl = "1s" } - cache_loop = "2m0s" - store_expiration_loop = "5s" - metrics_flush_interval = "3m0s" - database_name_attributes = ["db_name3", "db_name4"] + cache_loop = "2m0s" + store_expiration_loop = "5s" + virtual_node_peer_attributes = ["example_attribute"] + metrics_flush_interval = "3m0s" + database_name_attributes = ["db_name3", "db_name4"] output { metrics = [otelcol.exporter.otlp.default.input] diff --git a/internal/converter/internal/otelcolconvert/testdata/servicegraph.yaml b/internal/converter/internal/otelcolconvert/testdata/servicegraph.yaml index 90346598ab0..b963d67c4c2 100644 --- a/internal/converter/internal/otelcolconvert/testdata/servicegraph.yaml +++ b/internal/converter/internal/otelcolconvert/testdata/servicegraph.yaml @@ -17,6 +17,7 @@ connectors: store_expiration_loop: 5s database_name_attributes: [db_name3, db_name4] metrics_flush_interval: 3m + virtual_node_peer_attributes: [example_attribute] exporters: otlp: