From 40d814685fb47f4845f8132e2c08a27ae4683b30 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 00:50:51 +0530 Subject: [PATCH 01/40] feat: event metrics --- .../employees/subgraph/schema.resolvers.go | 5 + demo/pkg/subgraphs/subgraphs.go | 5 +- router-tests/testenv/testenv.go | 3 +- router/core/factoryresolver.go | 3 + router/core/graph_server.go | 21 +++ router/core/router.go | 2 + router/pkg/config/config.go | 2 + router/pkg/config/config.schema.json | 10 ++ router/pkg/metric/config.go | 4 +- router/pkg/metric/event_measurements.go | 156 ++++++++++++++++++ router/pkg/metric/event_metric_store.go | 120 ++++++++++++++ router/pkg/metric/noop_event_metrics.go | 19 +++ router/pkg/metric/oltp_event_metric_store.go | 87 ++++++++++ router/pkg/metric/prom_event_metric_store.go | 84 ++++++++++ router/pkg/pubsub/datasource/mocks.go | 2 +- router/pkg/pubsub/datasource/provider.go | 7 +- router/pkg/pubsub/kafka/adapter.go | 29 ++-- router/pkg/pubsub/kafka/provider_builder.go | 10 +- router/pkg/pubsub/nats/adapter.go | 19 ++- router/pkg/pubsub/nats/provider_builder.go | 8 +- router/pkg/pubsub/pubsub.go | 31 ++-- router/pkg/pubsub/pubsub_test.go | 6 +- router/pkg/pubsub/redis/adapter.go | 29 ++-- router/pkg/pubsub/redis/provider_builder.go | 4 +- 24 files changed, 605 insertions(+), 61 deletions(-) create mode 100644 router/pkg/metric/event_measurements.go create mode 100644 router/pkg/metric/event_metric_store.go create mode 100644 router/pkg/metric/noop_event_metrics.go create mode 100644 router/pkg/metric/oltp_event_metric_store.go create mode 100644 router/pkg/metric/prom_event_metric_store.go diff --git a/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go b/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go index 4e78eb6c9f..497fc42fb8 100644 --- a/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go +++ b/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go @@ -255,11 +255,16 @@ func (r *subscriptionResolver) CountEmp2(ctx context.Context, max int, intervalM defer close(ch) for i := 0; i <= max; i++ { + fmt.Println("EE") select { case <-ctx.Done(): return case ch <- i: time.Sleep(time.Duration(intervalMilliseconds) * time.Millisecond) + fmt.Println("Dobne") + if i == 2 { + //panic("panicing") + } } } }() diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index e89e2a5def..7a833b8099 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" "io" "log" "net/http" @@ -210,13 +211,13 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID := map[string]natsPubsub.Adapter{} - defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test") + defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{}) if err != nil { return nil, fmt.Errorf("failed to create default nats adapter: %w", err) } natsPubSubByProviderID["default"] = defaultAdapter - myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test") + myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{}) if err != nil { return nil, fmt.Errorf("failed to create my-nats adapter: %w", err) } diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 9a3d953ac2..b76d67b77d 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -10,6 +10,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" "io" "log" "math/rand" @@ -2817,7 +2818,7 @@ func subgraphOptions(ctx context.Context, t testing.TB, logger *zap.Logger, nats } natsPubSubByProviderID := make(map[string]pubsubNats.Adapter, len(DemoNatsProviders)) for _, sourceName := range DemoNatsProviders { - adapter, err := pubsubNats.NewAdapter(ctx, logger, natsData.Params[0].Url, natsData.Params[0].Opts, "hostname", "listenaddr") + adapter, err := pubsubNats.NewAdapter(ctx, logger, natsData.Params[0].Url, natsData.Params[0].Opts, "hostname", "listenaddr", datasource.ProviderOpts{}) require.NoError(t, err) require.NoError(t, adapter.Startup(ctx)) t.Cleanup(func() { diff --git a/router/core/factoryresolver.go b/router/core/factoryresolver.go index 41f0c4d425..6355015c82 100644 --- a/router/core/factoryresolver.go +++ b/router/core/factoryresolver.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "net/http" "net/url" "slices" @@ -211,6 +212,7 @@ type RouterEngineConfiguration struct { Headers *config.HeaderRules Events config.EventsConfiguration SubgraphErrorPropagation config.SubgraphErrorPropagationConfiguration + EventMetricStore *rmetric.EventMetrics } func mapProtoFilterToPlanFilter(input *nodev1.SubscriptionFilterCondition, output *plan.SubscriptionFilterCondition) *plan.SubscriptionFilterCondition { @@ -470,6 +472,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod factoryProviders, factoryDataSources, err := pubsub.BuildProvidersAndDataSources( l.ctx, routerEngineConfig.Events, + routerEngineConfig.EventMetricStore, l.logger, pubSubDS, l.resolver.InstanceData().HostName, diff --git a/router/core/graph_server.go b/router/core/graph_server.go index 372bdc03a8..86d65f205d 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -514,6 +514,7 @@ type graphMux struct { metricStore rmetric.Store prometheusCacheMetrics *rmetric.CacheMetrics otelCacheMetrics *rmetric.CacheMetrics + eventMetricStore *rmetric.EventMetrics } // buildOperationCaches creates the caches for the graph mux. @@ -755,6 +756,12 @@ func (s *graphMux) Shutdown(ctx context.Context) error { } } + if s.eventMetricStore != nil { + if aErr := s.eventMetricStore.Shutdown(ctx); aErr != nil { + err = errors.Join(err, aErr) + } + } + if err != nil { return fmt.Errorf("shutdown graph mux: %w", err) } @@ -868,6 +875,19 @@ func (s *graphServer) buildGraphMux( } } + if s.metricConfig.OpenTelemetry.EventMetrics || s.metricConfig.Prometheus.EventMetrics { + store, err := rmetric.NewEventMetricStore( + s.logger, + baseMetricAttributes, + s.otlpMeterProvider, + s.promMeterProvider, + s.metricConfig) + if err != nil { + return nil, err + } + gm.eventMetricStore = store + } + subgraphs, err := configureSubgraphOverwrites( opts.EngineConfig, opts.ConfigSubgraphs, @@ -1110,6 +1130,7 @@ func (s *graphServer) buildGraphMux( Headers: s.headerRules, Events: s.eventsConfig, SubgraphErrorPropagation: s.subgraphErrorPropagation, + EventMetricStore: gm.eventMetricStore, } // map[string]*http.Transport cannot be coerced into map[string]http.RoundTripper, unfortunately diff --git a/router/core/router.go b/router/core/router.go index a4c050312b..354dc5da43 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -2239,6 +2239,7 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { }, Exporters: openTelemetryExporters, CircuitBreaker: cfg.Metrics.OTLP.CircuitBreaker, + EventMetrics: cfg.Metrics.OTLP.EventMetrics, ExcludeMetrics: cfg.Metrics.OTLP.ExcludeMetrics, ExcludeMetricLabels: cfg.Metrics.OTLP.ExcludeMetricLabels, }, @@ -2254,6 +2255,7 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { CircuitBreaker: cfg.Metrics.Prometheus.CircuitBreaker, ExcludeMetrics: cfg.Metrics.Prometheus.ExcludeMetrics, ExcludeMetricLabels: cfg.Metrics.Prometheus.ExcludeMetricLabels, + EventMetrics: cfg.Metrics.Prometheus.EventMetrics, ExcludeScopeInfo: cfg.Metrics.Prometheus.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: cfg.Metrics.Prometheus.SchemaFieldUsage.Enabled, diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index 7ce0ecb156..7669abbeb7 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -100,6 +100,7 @@ type Prometheus struct { ListenAddr string `yaml:"listen_addr" envDefault:"127.0.0.1:8088" env:"PROMETHEUS_LISTEN_ADDR"` GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"PROMETHEUS_GRAPHQL_CACHE"` ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"PROMETHEUS_CONNECTION_STATS"` + EventMetrics bool `yaml:"event_metrics" envDefault:"false" env:"METRICS_OTLP_EVENT_METRICS"` EngineStats EngineStats `yaml:"engine_stats" envPrefix:"PROMETHEUS_"` CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"PROMETHEUS_CIRCUIT_BREAKER"` ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"PROMETHEUS_EXCLUDE_METRICS"` @@ -137,6 +138,7 @@ type MetricsOTLP struct { ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"METRICS_OTLP_CONNECTION_STATS"` EngineStats EngineStats `yaml:"engine_stats" envPrefix:"METRICS_OTLP_"` CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"METRICS_OTLP_CIRCUIT_BREAKER"` + EventMetrics bool `yaml:"event_metrics" envDefault:"false" env:"METRICS_OTLP_EVENT_METRICS"` ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"METRICS_OTLP_EXCLUDE_METRICS"` ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"METRICS_OTLP_EXCLUDE_METRIC_LABELS"` Exporters []MetricsOTLPExporter `yaml:"exporters"` diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 2cc435b214..3e27ee946d 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -1058,6 +1058,11 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, + "event_metrics": { + "type": "boolean", + "default": false, + "description": "Enable the collection of event metrics. The default value is false." + }, "circuit_breaker": { "type": "boolean", "default": false, @@ -1163,6 +1168,11 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, + "event_metrics": { + "type": "boolean", + "default": false, + "description": "Enable the collection of event metrics. The default value is false." + }, "circuit_breaker": { "type": "boolean", "default": false, diff --git a/router/pkg/metric/config.go b/router/pkg/metric/config.go index 02b8fb87bd..351e59b198 100644 --- a/router/pkg/metric/config.go +++ b/router/pkg/metric/config.go @@ -35,6 +35,7 @@ type PrometheusConfig struct { ExcludeScopeInfo bool // Prometheus schema field usage configuration PromSchemaFieldUsage PrometheusSchemaFieldUsage + EventMetrics bool } type PrometheusSchemaFieldUsage struct { @@ -78,7 +79,8 @@ type OpenTelemetry struct { // Metric labels to exclude from the OTLP exporter. ExcludeMetricLabels []*regexp.Regexp // TestReader is used for testing purposes. If set, the reader will be used instead of the configured exporters. - TestReader sdkmetric.Reader + TestReader sdkmetric.Reader + EventMetrics bool } func GetDefaultExporter(cfg *Config) *OpenTelemetryExporter { diff --git a/router/pkg/metric/event_measurements.go b/router/pkg/metric/event_measurements.go new file mode 100644 index 0000000000..9f93ef37d7 --- /dev/null +++ b/router/pkg/metric/event_measurements.go @@ -0,0 +1,156 @@ +package metric + +import ( + "fmt" + + otelmetric "go.opentelemetry.io/otel/metric" +) + +// Event (Kafka/Redis/NATS) metric constants +const ( + kafkaPublishMessages = "router.kafka.publish.messages" + kafkaPublishFailures = "router.kafka.publish.fail" + kafkaMessagesReceived = "router.kafka.messages.received" + + redisPublishMessages = "router.redis.publish.messages" + redisPublishFailures = "router.redis.publish.fail" + redisMessagesReceived = "router.redis.messages.received" + + natsPublishMessages = "router.nats.publish.messages" + natsPublishFailures = "router.nats.publish.fail" + natsMessagesReceived = "router.nats.messages.received" +) + +var ( + kafkaPublishMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of Kafka messages published"), + } + kafkaPublishFailuresOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of Kafka publish failures"), + } + kafkaMessagesReceivedOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of Kafka messages received"), + } + + redisPublishMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of Redis messages published"), + } + redisPublishFailuresOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of Redis publish failures"), + } + redisMessagesReceivedOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of Redis messages received"), + } + + natsPublishMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of NATS messages published"), + } + natsPublishFailuresOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of NATS publish failures"), + } + natsMessagesReceivedOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of NATS messages received"), + } +) + +type eventInstruments struct { + kafkaPublishMessages otelmetric.Int64Counter + kafkaPublishFailures otelmetric.Int64Counter + kafkaMessagesReceived otelmetric.Int64Counter + + redisPublishMessages otelmetric.Int64Counter + redisPublishFailures otelmetric.Int64Counter + redisMessagesReceived otelmetric.Int64Counter + + natsPublishMessages otelmetric.Int64Counter + natsPublishFailures otelmetric.Int64Counter + natsMessagesReceived otelmetric.Int64Counter +} + +func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { + kafkaPublishMessagesCounter, err := meter.Int64Counter( + kafkaPublishMessages, + kafkaPublishMessagesOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create kafka publish messages counter: %w", err) + } + + kafkaPublishFailuresCounter, err := meter.Int64Counter( + kafkaPublishFailures, + kafkaPublishFailuresOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create kafka publish failures counter: %w", err) + } + + kafkaMessagesReceivedCounter, err := meter.Int64Counter( + kafkaMessagesReceived, + kafkaMessagesReceivedOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create kafka messages received counter: %w", err) + } + + redisPublishMessagesCounter, err := meter.Int64Counter( + redisPublishMessages, + redisPublishMessagesOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create redis publish messages counter: %w", err) + } + + redisPublishFailuresCounter, err := meter.Int64Counter( + redisPublishFailures, + redisPublishFailuresOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create redis publish failures counter: %w", err) + } + + redisMessagesReceivedCounter, err := meter.Int64Counter( + redisMessagesReceived, + redisMessagesReceivedOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create redis messages received counter: %w", err) + } + + natsPublishMessagesCounter, err := meter.Int64Counter( + natsPublishMessages, + natsPublishMessagesOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create nats publish messages counter: %w", err) + } + + natsPublishFailuresCounter, err := meter.Int64Counter( + natsPublishFailures, + natsPublishFailuresOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create nats publish failures counter: %w", err) + } + + natsMessagesReceivedCounter, err := meter.Int64Counter( + natsMessagesReceived, + natsMessagesReceivedOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create nats messages received counter: %w", err) + } + + return &eventInstruments{ + kafkaPublishMessages: kafkaPublishMessagesCounter, + kafkaPublishFailures: kafkaPublishFailuresCounter, + kafkaMessagesReceived: kafkaMessagesReceivedCounter, + + redisPublishMessages: redisPublishMessagesCounter, + redisPublishFailures: redisPublishFailuresCounter, + redisMessagesReceived: redisMessagesReceivedCounter, + + natsPublishMessages: natsPublishMessagesCounter, + natsPublishFailures: natsPublishFailuresCounter, + natsMessagesReceived: natsMessagesReceivedCounter, + }, nil +} diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go new file mode 100644 index 0000000000..c23c6a808f --- /dev/null +++ b/router/pkg/metric/event_metric_store.go @@ -0,0 +1,120 @@ +package metric + +import ( + "context" + "errors" + "fmt" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "go.uber.org/zap" +) + +// EventBackend represents supported backends +const ( + EventBackendKafka = "kafka" + EventBackendRedis = "redis" + EventBackendNats = "nats" +) + +// EventMetricProvider is the interface that wraps the basic Event metric methods. +// We maintain two providers, one for OTEL and one for Prometheus. +type EventMetricProvider interface { + Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) + PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) + MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) + Flush(ctx context.Context) error + Shutdown() error +} + +// EventMetrics is the store for Event (Kafka/Redis/NATS) metrics. +type EventMetrics struct { + baseAttributes []attribute.KeyValue + logger *zap.Logger + + otlpMetrics EventMetricProvider + promMetrics EventMetricProvider +} + +func NewEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue, otelProvider, promProvider *metric.MeterProvider, metricsConfig *Config) (*EventMetrics, error) { + store := &EventMetrics{ + baseAttributes: baseAttributes, + logger: logger, + otlpMetrics: &noopEventMetricProvider{}, + promMetrics: &noopEventMetricProvider{}, + } + + if metricsConfig.OpenTelemetry.EventMetrics { + otlpMetrics, err := newOtlpEventMetrics(logger, otelProvider, baseAttributes) + if err != nil { + return nil, fmt.Errorf("failed to create otlp event metrics: %w", err) + } + store.otlpMetrics = otlpMetrics + } + + if metricsConfig.Prometheus.EventMetrics { + promMetrics, err := newPromEventMetrics(logger, promProvider, baseAttributes) + if err != nil { + return nil, fmt.Errorf("failed to create prometheus event metrics: %w", err) + } + store.promMetrics = promMetrics + } + + return store, nil +} + +func (e *EventMetrics) Publish(ctx context.Context, backend string, count int64, attrs ...attribute.KeyValue) { + copied := append([]attribute.KeyValue{}, e.baseAttributes...) + opts := otelmetric.WithAttributes(append(copied, attrs...)...) + e.otlpMetrics.Publish(ctx, backend, count, opts) + e.promMetrics.Publish(ctx, backend, count, opts) +} + +func (e *EventMetrics) PublishFailure(ctx context.Context, backend string, count int64, attrs ...attribute.KeyValue) { + copied := append([]attribute.KeyValue{}, e.baseAttributes...) + opts := otelmetric.WithAttributes(append(copied, attrs...)...) + e.otlpMetrics.PublishFailure(ctx, backend, count, opts) + e.promMetrics.PublishFailure(ctx, backend, count, opts) +} + +func (e *EventMetrics) MessageReceived(ctx context.Context, backend string, count int64, attrs ...attribute.KeyValue) { + copied := append([]attribute.KeyValue{}, e.baseAttributes...) + opts := otelmetric.WithAttributes(append(copied, attrs...)...) + e.otlpMetrics.MessageReceived(ctx, backend, count, opts) + e.promMetrics.MessageReceived(ctx, backend, count, opts) +} + +// Flush flushes the metrics to the backend synchronously. +func (e *EventMetrics) Flush(ctx context.Context) error { + var err error + + if errOtlp := e.otlpMetrics.Flush(ctx); errOtlp != nil { + err = errors.Join(err, fmt.Errorf("failed to flush otlp metrics: %w", errOtlp)) + } + + if errProm := e.promMetrics.Flush(ctx); errProm != nil { + err = errors.Join(err, fmt.Errorf("failed to flush prometheus metrics: %w", errProm)) + } + + return err +} + +// Shutdown flushes the metrics and stops observers if any. +func (e *EventMetrics) Shutdown(ctx context.Context) error { + var err error + + if errFlush := e.Flush(ctx); errFlush != nil { + err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errFlush)) + } + + if errProm := e.promMetrics.Shutdown(); errProm != nil { + err = errors.Join(err, fmt.Errorf("failed to shutdown prom metrics: %w", errProm)) + } + + if errOtlp := e.otlpMetrics.Shutdown(); errOtlp != nil { + err = errors.Join(err, fmt.Errorf("failed to shutdown otlp metrics: %w", errOtlp)) + } + + return err +} diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go new file mode 100644 index 0000000000..91b5f6e166 --- /dev/null +++ b/router/pkg/metric/noop_event_metrics.go @@ -0,0 +1,19 @@ +package metric + +import ( + "context" + + otelmetric "go.opentelemetry.io/otel/metric" +) + +// A noop metric provider so we do not need to do nil checks for each provider call from the store +type noopEventMetricProvider struct{} + +func (n *noopEventMetricProvider) Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { +} +func (n *noopEventMetricProvider) PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { +} +func (n *noopEventMetricProvider) MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { +} +func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } +func (n *noopEventMetricProvider) Shutdown() error { return nil } diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go new file mode 100644 index 0000000000..ab5761d8db --- /dev/null +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -0,0 +1,87 @@ +package metric + +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "go.uber.org/zap" +) + +const ( + cosmoRouterEventMeterName = "cosmo.router.event" + cosmoRouterEventMeterVersion = "0.0.1" +) + +type otlpEventMetrics struct { + instruments *eventInstruments + meterProvider *metric.MeterProvider + logger *zap.Logger + meter otelmetric.Meter +} + +func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider, baseAttributes []attribute.KeyValue) (*otlpEventMetrics, error) { + meter := meterProvider.Meter( + cosmoRouterEventMeterName, + otelmetric.WithInstrumentationVersion(cosmoRouterEventMeterVersion), + ) + + instruments, err := newEventInstruments(meter) + if err != nil { + return nil, fmt.Errorf("failed to create otlp event instruments: %w", err) + } + + return &otlpEventMetrics{ + instruments: instruments, + meterProvider: meterProvider, + logger: logger, + meter: meter, + }, nil +} + +func (o *otlpEventMetrics) Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { + switch backend { + case EventBackendKafka: + o.instruments.kafkaPublishMessages.Add(ctx, count, opts...) + case EventBackendRedis: + o.instruments.redisPublishMessages.Add(ctx, count, opts...) + case EventBackendNats: + o.instruments.natsPublishMessages.Add(ctx, count, opts...) + default: + o.instruments.kafkaPublishMessages.Add(ctx, count, opts...) + } +} + +func (o *otlpEventMetrics) PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { + switch backend { + case EventBackendKafka: + o.instruments.kafkaPublishFailures.Add(ctx, count, opts...) + case EventBackendRedis: + o.instruments.redisPublishFailures.Add(ctx, count, opts...) + case EventBackendNats: + o.instruments.natsPublishFailures.Add(ctx, count, opts...) + default: + o.instruments.kafkaPublishFailures.Add(ctx, count, opts...) + } +} + +func (o *otlpEventMetrics) MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { + switch backend { + case EventBackendKafka: + o.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) + case EventBackendRedis: + o.instruments.redisMessagesReceived.Add(ctx, count, opts...) + case EventBackendNats: + o.instruments.natsMessagesReceived.Add(ctx, count, opts...) + default: + o.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) + } +} + +func (o *otlpEventMetrics) Flush(ctx context.Context) error { + return o.meterProvider.ForceFlush(ctx) +} + +func (o *otlpEventMetrics) Shutdown() error { return nil } diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go new file mode 100644 index 0000000000..828763b69e --- /dev/null +++ b/router/pkg/metric/prom_event_metric_store.go @@ -0,0 +1,84 @@ +package metric + +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "go.uber.org/zap" +) + +const ( + cosmoRouterEventPromMeterName = "cosmo.router.event.prometheus" + cosmoRouterEventPromMeterVersion = "0.0.1" +) + +type promEventMetrics struct { + instruments *eventInstruments + meterProvider *metric.MeterProvider + logger *zap.Logger + meter otelmetric.Meter +} + +func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider, baseAttributes []attribute.KeyValue) (*promEventMetrics, error) { + meter := meterProvider.Meter( + cosmoRouterEventPromMeterName, + otelmetric.WithInstrumentationVersion(cosmoRouterEventPromMeterVersion), + ) + + instruments, err := newEventInstruments(meter) + if err != nil { + return nil, fmt.Errorf("failed to create prometheus event instruments: %w", err) + } + + return &promEventMetrics{ + instruments: instruments, + meterProvider: meterProvider, + logger: logger, + meter: meter, + }, nil +} + +func (p *promEventMetrics) Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { + switch backend { + case EventBackendKafka: + p.instruments.kafkaPublishMessages.Add(ctx, count, opts...) + case EventBackendRedis: + p.instruments.redisPublishMessages.Add(ctx, count, opts...) + case EventBackendNats: + p.instruments.natsPublishMessages.Add(ctx, count, opts...) + default: + p.instruments.kafkaPublishMessages.Add(ctx, count, opts...) + } +} + +func (p *promEventMetrics) PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { + switch backend { + case EventBackendKafka: + p.instruments.kafkaPublishFailures.Add(ctx, count, opts...) + case EventBackendRedis: + p.instruments.redisPublishFailures.Add(ctx, count, opts...) + case EventBackendNats: + p.instruments.natsPublishFailures.Add(ctx, count, opts...) + default: + p.instruments.kafkaPublishFailures.Add(ctx, count, opts...) + } +} + +func (p *promEventMetrics) MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { + switch backend { + case EventBackendKafka: + p.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) + case EventBackendRedis: + p.instruments.redisMessagesReceived.Add(ctx, count, opts...) + case EventBackendNats: + p.instruments.natsMessagesReceived.Add(ctx, count, opts...) + default: + p.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) + } +} + +func (p *promEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } +func (p *promEventMetrics) Shutdown() error { return nil } diff --git a/router/pkg/pubsub/datasource/mocks.go b/router/pkg/pubsub/datasource/mocks.go index 067da9c86c..a6bbb19e18 100644 --- a/router/pkg/pubsub/datasource/mocks.go +++ b/router/pkg/pubsub/datasource/mocks.go @@ -792,7 +792,7 @@ func (_c *MockProviderBuilder_BuildEngineDataSourceFactory_Call[P, E]) RunAndRet } // BuildProvider provides a mock function for the type MockProviderBuilder -func (_mock *MockProviderBuilder[P, E]) BuildProvider(options P) (Provider, error) { +func (_mock *MockProviderBuilder[P, E]) BuildProvider(options P, providerOpts ProviderOpts) (Provider, error) { ret := _mock.Called(options) if len(ret) == 0 { diff --git a/router/pkg/pubsub/datasource/provider.go b/router/pkg/pubsub/datasource/provider.go index f90446a712..b97a3941b8 100644 --- a/router/pkg/pubsub/datasource/provider.go +++ b/router/pkg/pubsub/datasource/provider.go @@ -2,6 +2,7 @@ package datasource import ( "context" + "github.com/wundergraph/cosmo/router/pkg/metric" ) type ArgumentTemplateCallback func(tpl string) (string, error) @@ -27,7 +28,11 @@ type ProviderBuilder[P, E any] interface { // TypeID Get the provider type id (e.g. "kafka", "nats") TypeID() string // BuildProvider Build the provider and the adapter - BuildProvider(options P) (Provider, error) + BuildProvider(options P, providerOpts ProviderOpts) (Provider, error) // BuildEngineDataSourceFactory Build the data source for the given provider and event configuration BuildEngineDataSourceFactory(data E) (EngineDataSourceFactory, error) } + +type ProviderOpts struct { + EventMetricStore *metric.EventMetrics +} diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 503b8f6f37..d6cee25ac8 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "github.com/wundergraph/cosmo/router/pkg/metric" "strings" "sync" "time" @@ -33,12 +34,13 @@ type Adapter interface { // It uses a single write client to produce messages and a client per topic to consume messages. // Each client polls the Kafka topic for new records and updates the subscriptions with the new data. type ProviderAdapter struct { - ctx context.Context - opts []kgo.Opt - logger *zap.Logger - writeClient *kgo.Client - closeWg sync.WaitGroup - cancel context.CancelFunc + ctx context.Context + opts []kgo.Opt + logger *zap.Logger + writeClient *kgo.Client + closeWg sync.WaitGroup + cancel context.CancelFunc + eventMetricStore *metric.EventMetrics } // topicPoller polls the Kafka topic for new records and calls the updateTriggers function. @@ -181,6 +183,8 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } + p.eventMetricStore.Publish(ctx, "nats", 1) + return nil } @@ -222,17 +226,18 @@ func (p *ProviderAdapter) Shutdown(ctx context.Context) error { return nil } -func NewProviderAdapter(ctx context.Context, logger *zap.Logger, opts []kgo.Opt) (*ProviderAdapter, error) { +func NewProviderAdapter(ctx context.Context, logger *zap.Logger, opts []kgo.Opt, providerOpts datasource.ProviderOpts) (*ProviderAdapter, error) { ctx, cancel := context.WithCancel(ctx) if logger == nil { logger = zap.NewNop() } return &ProviderAdapter{ - ctx: ctx, - logger: logger.With(zap.String("pubsub", "kafka")), - opts: opts, - closeWg: sync.WaitGroup{}, - cancel: cancel, + ctx: ctx, + logger: logger.With(zap.String("pubsub", "kafka")), + opts: opts, + closeWg: sync.WaitGroup{}, + cancel: cancel, + eventMetricStore: providerOpts.EventMetricStore, }, nil } diff --git a/router/pkg/pubsub/kafka/provider_builder.go b/router/pkg/pubsub/kafka/provider_builder.go index 3007b1fafe..c88cf814c2 100644 --- a/router/pkg/pubsub/kafka/provider_builder.go +++ b/router/pkg/pubsub/kafka/provider_builder.go @@ -56,8 +56,8 @@ func (p *ProviderBuilder) BuildEngineDataSourceFactory(data *nodev1.KafkaEventCo }, nil } -func (p *ProviderBuilder) BuildProvider(provider config.KafkaEventSource) (datasource.Provider, error) { - adapter, pubSubProvider, err := buildProvider(p.ctx, provider, p.logger) +func (p *ProviderBuilder) BuildProvider(provider config.KafkaEventSource, providerOpts datasource.ProviderOpts) (datasource.Provider, error) { + adapter, pubSubProvider, err := buildProvider(p.ctx, provider, p.logger, providerOpts) if err != nil { return nil, err } @@ -150,12 +150,12 @@ func buildKafkaOptions(eventSource config.KafkaEventSource, logger *zap.Logger) return opts, nil } -func buildProvider(ctx context.Context, provider config.KafkaEventSource, logger *zap.Logger) (Adapter, datasource.Provider, error) { - options, err := buildKafkaOptions(provider, logger) +func buildProvider(ctx context.Context, provider config.KafkaEventSource, logger *zap.Logger, providerOpts datasource.ProviderOpts) (Adapter, datasource.Provider, error) { + kafkaOpts, err := buildKafkaOptions(provider, logger) if err != nil { return nil, nil, fmt.Errorf("failed to build options for Kafka provider with ID \"%s\": %w", provider.ID, err) } - adapter, err := NewProviderAdapter(ctx, logger, options) + adapter, err := NewProviderAdapter(ctx, logger, kafkaOpts, providerOpts) if err != nil { return nil, nil, fmt.Errorf("failed to create adapter for Kafka provider with ID \"%s\": %w", provider.ID, err) } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index a0bef13f45..c8d5b05fb2 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "github.com/wundergraph/cosmo/router/pkg/metric" "io" "sync" "time" @@ -42,6 +43,7 @@ type ProviderAdapter struct { url string opts []nats.Option flushTimeout time.Duration + eventMetricStore *metric.EventMetrics } // getInstanceIdentifier returns an identifier for the current instance. @@ -132,6 +134,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent for msg := range msgBatch.Messages() { log.Debug("subscription update", zap.String("message_subject", msg.Subject()), zap.ByteString("data", msg.Data())) + p.eventMetricStore.MessageReceived(p.ctx, "nats", 1) updater.Update(msg.Data()) // Acknowledge the message after it has been processed @@ -169,6 +172,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent select { case msg := <-msgChan: log.Debug("subscription update", zap.String("message_subject", msg.Subject), zap.ByteString("data", msg.Data)) + p.eventMetricStore.MessageReceived(p.ctx, "nats", 1) updater.Update(msg.Data) case <-p.ctx.Done(): // When the application context is done, we stop the subscriptions @@ -197,7 +201,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent return nil } -func (p *ProviderAdapter) Publish(_ context.Context, event PublishAndRequestEventConfiguration) error { +func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEventConfiguration) error { log := p.logger.With( zap.String("provider_id", event.ProviderID), zap.String("method", "publish"), @@ -216,6 +220,8 @@ func (p *ProviderAdapter) Publish(_ context.Context, event PublishAndRequestEven return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } + p.eventMetricStore.Publish(ctx, "nats", 1) + return nil } @@ -303,7 +309,15 @@ func (p *ProviderAdapter) Shutdown(ctx context.Context) error { return nil } -func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats.Option, hostName string, routerListenAddr string) (Adapter, error) { +func NewAdapter( + ctx context.Context, + logger *zap.Logger, + url string, + opts []nats.Option, + hostName string, + routerListenAddr string, + providerOpts datasource.ProviderOpts, +) (Adapter, error) { if logger == nil { logger = zap.NewNop() } @@ -317,5 +331,6 @@ func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats url: url, opts: opts, flushTimeout: 10 * time.Second, + eventMetricStore: providerOpts.EventMetricStore, }, nil } diff --git a/router/pkg/pubsub/nats/provider_builder.go b/router/pkg/pubsub/nats/provider_builder.go index e1ae0a1e70..e3ba5f7cb0 100644 --- a/router/pkg/pubsub/nats/provider_builder.go +++ b/router/pkg/pubsub/nats/provider_builder.go @@ -64,8 +64,8 @@ func (p *ProviderBuilder) BuildEngineDataSourceFactory(data *nodev1.NatsEventCon return dataSourceFactory, nil } -func (p *ProviderBuilder) BuildProvider(provider config.NatsEventSource) (datasource.Provider, error) { - adapter, pubSubProvider, err := buildProvider(p.ctx, provider, p.logger, p.hostName, p.routerListenAddr) +func (p *ProviderBuilder) BuildProvider(provider config.NatsEventSource, providerOpts datasource.ProviderOpts) (datasource.Provider, error) { + adapter, pubSubProvider, err := buildProvider(p.ctx, provider, p.logger, p.hostName, p.routerListenAddr, providerOpts) if err != nil { return nil, err } @@ -118,12 +118,12 @@ func buildNatsOptions(eventSource config.NatsEventSource, logger *zap.Logger) ([ return opts, nil } -func buildProvider(ctx context.Context, provider config.NatsEventSource, logger *zap.Logger, hostName string, routerListenAddr string) (Adapter, datasource.Provider, error) { +func buildProvider(ctx context.Context, provider config.NatsEventSource, logger *zap.Logger, hostName string, routerListenAddr string, providerOpts datasource.ProviderOpts) (Adapter, datasource.Provider, error) { options, err := buildNatsOptions(provider, logger) if err != nil { return nil, nil, fmt.Errorf("failed to build options for Nats provider with ID \"%s\": %w", provider.ID, err) } - adapter, err := NewAdapter(ctx, logger, provider.URL, options, hostName, routerListenAddr) + adapter, err := NewAdapter(ctx, logger, provider.URL, options, hostName, routerListenAddr, providerOpts) if err != nil { return nil, nil, fmt.Errorf("failed to create adapter for Nats provider with ID \"%s\": %w", provider.ID, err) } diff --git a/router/pkg/pubsub/pubsub.go b/router/pkg/pubsub/pubsub.go index c6ec29be82..f30b77abe7 100644 --- a/router/pkg/pubsub/pubsub.go +++ b/router/pkg/pubsub/pubsub.go @@ -3,6 +3,7 @@ package pubsub import ( "context" "fmt" + "github.com/wundergraph/cosmo/router/pkg/metric" "slices" "strconv" @@ -51,14 +52,7 @@ func (e *ProviderNotDefinedError) Error() string { // BuildProvidersAndDataSources is a generic function that builds providers and data sources for the given // EventsConfiguration and DataSourceConfigurationWithMetadata -func BuildProvidersAndDataSources( - ctx context.Context, - config config.EventsConfiguration, - logger *zap.Logger, - dsConfs []DataSourceConfigurationWithMetadata, - hostName string, - routerListenAddr string, -) ([]pubsub_datasource.Provider, []plan.DataSource, error) { +func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store *metric.EventMetrics, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource @@ -66,12 +60,13 @@ func BuildProvidersAndDataSources( kafkaBuilder := kafka.NewProviderBuilder(ctx, logger, hostName, routerListenAddr) kafkaDsConfsWithEvents := []dsConfAndEvents[*nodev1.KafkaEventConfiguration]{} for _, dsConf := range dsConfs { + getKafka := dsConf.Configuration.GetCustomEvents().GetKafka() kafkaDsConfsWithEvents = append(kafkaDsConfsWithEvents, dsConfAndEvents[*nodev1.KafkaEventConfiguration]{ dsConf: &dsConf, - events: dsConf.Configuration.GetCustomEvents().GetKafka(), + events: getKafka, }) } - kafkaPubSubProviders, kafkaOuts, err := build(ctx, kafkaBuilder, config.Providers.Kafka, kafkaDsConfsWithEvents) + kafkaPubSubProviders, kafkaOuts, err := build(ctx, kafkaBuilder, config.Providers.Kafka, kafkaDsConfsWithEvents, store) if err != nil { return nil, nil, err } @@ -87,7 +82,7 @@ func BuildProvidersAndDataSources( events: dsConf.Configuration.GetCustomEvents().GetNats(), }) } - natsPubSubProviders, natsOuts, err := build(ctx, natsBuilder, config.Providers.Nats, natsDsConfsWithEvents) + natsPubSubProviders, natsOuts, err := build(ctx, natsBuilder, config.Providers.Nats, natsDsConfsWithEvents, store) if err != nil { return nil, nil, err } @@ -103,7 +98,7 @@ func BuildProvidersAndDataSources( events: dsConf.Configuration.GetCustomEvents().GetRedis(), }) } - redisPubSubProviders, redisOuts, err := build(ctx, redisBuilder, config.Providers.Redis, redisDsConfsWithEvents) + redisPubSubProviders, redisOuts, err := build(ctx, redisBuilder, config.Providers.Redis, redisDsConfsWithEvents, store) if err != nil { return nil, nil, err } @@ -113,7 +108,13 @@ func BuildProvidersAndDataSources( return pubSubProviders, outs, nil } -func build[P GetID, E GetEngineEventConfiguration](ctx context.Context, builder pubsub_datasource.ProviderBuilder[P, E], providersData []P, dsConfs []dsConfAndEvents[E]) ([]pubsub_datasource.Provider, []plan.DataSource, error) { +func build[P GetID, E GetEngineEventConfiguration]( + ctx context.Context, + builder pubsub_datasource.ProviderBuilder[P, E], + providersData []P, + dsConfs []dsConfAndEvents[E], + store *metric.EventMetrics, +) ([]pubsub_datasource.Provider, []plan.DataSource, error) { var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource @@ -133,7 +134,9 @@ func build[P GetID, E GetEngineEventConfiguration](ctx context.Context, builder if !slices.Contains(usedProviderIds, providerData.GetID()) { continue } - provider, err := builder.BuildProvider(providerData) + provider, err := builder.BuildProvider(providerData, pubsub_datasource.ProviderOpts{ + EventMetricStore: store, + }) if err != nil { return nil, nil, err } diff --git a/router/pkg/pubsub/pubsub_test.go b/router/pkg/pubsub/pubsub_test.go index a76194f7c5..65d90cf385 100644 --- a/router/pkg/pubsub/pubsub_test.go +++ b/router/pkg/pubsub/pubsub_test.go @@ -290,7 +290,7 @@ func TestBuildProvidersAndDataSources_Nats_OK(t *testing.T) { {ID: "provider-1"}, }, }, - }, zap.NewNop(), dsConfs, "host", "addr") + }, nil, zap.NewNop(), dsConfs, "host", "addr") // Assertions assert.NoError(t, err) @@ -343,7 +343,7 @@ func TestBuildProvidersAndDataSources_Kafka_OK(t *testing.T) { {ID: "provider-1"}, }, }, - }, zap.NewNop(), dsConfs, "host", "addr") + }, nil, zap.NewNop(), dsConfs, "host", "addr") // Assertions assert.NoError(t, err) @@ -396,7 +396,7 @@ func TestBuildProvidersAndDataSources_Redis_OK(t *testing.T) { {ID: "provider-1"}, }, }, - }, zap.NewNop(), dsConfs, "host", "addr") + }, nil, zap.NewNop(), dsConfs, "host", "addr") // Assertions assert.NoError(t, err) diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index 3efcabbf92..916c6f29d5 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -3,6 +3,7 @@ package redis import ( "context" "fmt" + "github.com/wundergraph/cosmo/router/pkg/metric" "sync" rd "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/redis" @@ -23,25 +24,27 @@ type Adapter interface { Shutdown(ctx context.Context) error } -func NewProviderAdapter(ctx context.Context, logger *zap.Logger, urls []string, clusterEnabled bool) Adapter { +func NewProviderAdapter(ctx context.Context, logger *zap.Logger, urls []string, clusterEnabled bool, opts datasource.ProviderOpts) Adapter { ctx, cancel := context.WithCancel(ctx) return &ProviderAdapter{ - ctx: ctx, - cancel: cancel, - logger: logger, - urls: urls, - clusterEnabled: clusterEnabled, + ctx: ctx, + cancel: cancel, + logger: logger, + urls: urls, + clusterEnabled: clusterEnabled, + eventMetricStore: opts.EventMetricStore, } } type ProviderAdapter struct { - ctx context.Context - cancel context.CancelFunc - conn rd.RDCloser - logger *zap.Logger - closeWg sync.WaitGroup - urls []string - clusterEnabled bool + ctx context.Context + cancel context.CancelFunc + conn rd.RDCloser + logger *zap.Logger + closeWg sync.WaitGroup + urls []string + clusterEnabled bool + eventMetricStore *metric.EventMetrics } func (p *ProviderAdapter) Startup(ctx context.Context) error { diff --git a/router/pkg/pubsub/redis/provider_builder.go b/router/pkg/pubsub/redis/provider_builder.go index 415963b885..46340934bd 100644 --- a/router/pkg/pubsub/redis/provider_builder.go +++ b/router/pkg/pubsub/redis/provider_builder.go @@ -66,8 +66,8 @@ func (b *ProviderBuilder) BuildEngineDataSourceFactory(data *nodev1.RedisEventCo } // Providers returns the Redis PubSub providers for the given provider IDs -func (b *ProviderBuilder) BuildProvider(provider config.RedisEventSource) (datasource.Provider, error) { - adapter := NewProviderAdapter(b.ctx, b.logger, provider.URLs, provider.ClusterEnabled) +func (b *ProviderBuilder) BuildProvider(provider config.RedisEventSource, providerOpts datasource.ProviderOpts) (datasource.Provider, error) { + adapter := NewProviderAdapter(b.ctx, b.logger, provider.URLs, provider.ClusterEnabled, providerOpts) pubSubProvider := datasource.NewPubSubProvider(provider.ID, providerTypeID, adapter, b.logger) b.adapters[provider.ID] = adapter From a29ce3bcd77b0b958ca131cc3850f5073670e0ea Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 15:17:32 +0530 Subject: [PATCH 02/40] fix: refactoring --- router/pkg/metric/event_metric_store.go | 81 ++++++++++++++++---- router/pkg/metric/noop_event_metrics.go | 15 +++- router/pkg/metric/oltp_event_metric_store.go | 63 ++++++++------- router/pkg/metric/prom_event_metric_store.go | 63 ++++++++------- router/pkg/pubsub/kafka/adapter.go | 5 +- router/pkg/pubsub/nats/adapter.go | 9 ++- 6 files changed, 144 insertions(+), 92 deletions(-) diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index c23c6a808f..eebab60910 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -21,9 +21,18 @@ const ( // EventMetricProvider is the interface that wraps the basic Event metric methods. // We maintain two providers, one for OTEL and one for Prometheus. type EventMetricProvider interface { - Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) - PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) - MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) + KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) + KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) + KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) + + RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) + RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) + RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) + + NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) + NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) + NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) + Flush(ctx context.Context) error Shutdown() error } @@ -64,25 +73,63 @@ func NewEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue return store, nil } -func (e *EventMetrics) Publish(ctx context.Context, backend string, count int64, attrs ...attribute.KeyValue) { +func (e *EventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOption { copied := append([]attribute.KeyValue{}, e.baseAttributes...) - opts := otelmetric.WithAttributes(append(copied, attrs...)...) - e.otlpMetrics.Publish(ctx, backend, count, opts) - e.promMetrics.Publish(ctx, backend, count, opts) + return otelmetric.WithAttributes(append(copied, attrs...)...) } -func (e *EventMetrics) PublishFailure(ctx context.Context, backend string, count int64, attrs ...attribute.KeyValue) { - copied := append([]attribute.KeyValue{}, e.baseAttributes...) - opts := otelmetric.WithAttributes(append(copied, attrs...)...) - e.otlpMetrics.PublishFailure(ctx, backend, count, opts) - e.promMetrics.PublishFailure(ctx, backend, count, opts) +func (e *EventMetrics) KafkaPublish(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.KafkaPublish(ctx, opts) + e.promMetrics.KafkaPublish(ctx, opts) } -func (e *EventMetrics) MessageReceived(ctx context.Context, backend string, count int64, attrs ...attribute.KeyValue) { - copied := append([]attribute.KeyValue{}, e.baseAttributes...) - opts := otelmetric.WithAttributes(append(copied, attrs...)...) - e.otlpMetrics.MessageReceived(ctx, backend, count, opts) - e.promMetrics.MessageReceived(ctx, backend, count, opts) +func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.KafkaPublishFailure(ctx, opts) + e.promMetrics.KafkaPublishFailure(ctx, opts) +} + +func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.KafkaMessageReceived(ctx, opts) + e.promMetrics.KafkaMessageReceived(ctx, opts) +} + +func (e *EventMetrics) RedisPublish(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.RedisPublish(ctx, opts) + e.promMetrics.RedisPublish(ctx, opts) +} + +func (e *EventMetrics) RedisPublishFailure(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.RedisPublishFailure(ctx, opts) + e.promMetrics.RedisPublishFailure(ctx, opts) +} + +func (e *EventMetrics) RedisMessageReceived(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.RedisMessageReceived(ctx, opts) + e.promMetrics.RedisMessageReceived(ctx, opts) +} + +func (e *EventMetrics) NatsPublish(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.NatsPublish(ctx, opts) + e.promMetrics.NatsPublish(ctx, opts) +} + +func (e *EventMetrics) NatsPublishFailure(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.NatsPublishFailure(ctx, opts) + e.promMetrics.NatsPublishFailure(ctx, opts) +} + +func (e *EventMetrics) NatsMessageReceived(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.NatsMessageReceived(ctx, opts) + e.promMetrics.NatsMessageReceived(ctx, opts) } // Flush flushes the metrics to the backend synchronously. diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index 91b5f6e166..19d576cfc1 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -9,11 +9,20 @@ import ( // A noop metric provider so we do not need to do nil checks for each provider call from the store type noopEventMetricProvider struct{} -func (n *noopEventMetricProvider) Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { +func (n *noopEventMetricProvider) KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { } -func (n *noopEventMetricProvider) PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { +func (n *noopEventMetricProvider) KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { } -func (n *noopEventMetricProvider) MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { +func (n *noopEventMetricProvider) RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { +} +func (n *noopEventMetricProvider) RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { +} +func (n *noopEventMetricProvider) NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { +} +func (n *noopEventMetricProvider) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { } func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } func (n *noopEventMetricProvider) Shutdown() error { return nil } diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go index ab5761d8db..1803b9f3be 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -41,43 +41,40 @@ func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider }, nil } -func (o *otlpEventMetrics) Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { - switch backend { - case EventBackendKafka: - o.instruments.kafkaPublishMessages.Add(ctx, count, opts...) - case EventBackendRedis: - o.instruments.redisPublishMessages.Add(ctx, count, opts...) - case EventBackendNats: - o.instruments.natsPublishMessages.Add(ctx, count, opts...) - default: - o.instruments.kafkaPublishMessages.Add(ctx, count, opts...) - } +func (o *otlpEventMetrics) KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.kafkaPublishMessages.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { - switch backend { - case EventBackendKafka: - o.instruments.kafkaPublishFailures.Add(ctx, count, opts...) - case EventBackendRedis: - o.instruments.redisPublishFailures.Add(ctx, count, opts...) - case EventBackendNats: - o.instruments.natsPublishFailures.Add(ctx, count, opts...) - default: - o.instruments.kafkaPublishFailures.Add(ctx, count, opts...) - } +func (o *otlpEventMetrics) KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.kafkaPublishFailures.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { - switch backend { - case EventBackendKafka: - o.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) - case EventBackendRedis: - o.instruments.redisMessagesReceived.Add(ctx, count, opts...) - case EventBackendNats: - o.instruments.natsMessagesReceived.Add(ctx, count, opts...) - default: - o.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) - } +func (o *otlpEventMetrics) KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.kafkaMessagesReceived.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.redisPublishMessages.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.redisPublishFailures.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.redisMessagesReceived.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.natsPublishMessages.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.natsPublishFailures.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.natsMessagesReceived.Add(ctx, 1, opts...) } func (o *otlpEventMetrics) Flush(ctx context.Context) error { diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go index 828763b69e..fe1131e1b3 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_event_metric_store.go @@ -41,43 +41,40 @@ func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider }, nil } -func (p *promEventMetrics) Publish(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { - switch backend { - case EventBackendKafka: - p.instruments.kafkaPublishMessages.Add(ctx, count, opts...) - case EventBackendRedis: - p.instruments.redisPublishMessages.Add(ctx, count, opts...) - case EventBackendNats: - p.instruments.natsPublishMessages.Add(ctx, count, opts...) - default: - p.instruments.kafkaPublishMessages.Add(ctx, count, opts...) - } +func (p *promEventMetrics) KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.kafkaPublishMessages.Add(ctx, 1, opts...) } -func (p *promEventMetrics) PublishFailure(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { - switch backend { - case EventBackendKafka: - p.instruments.kafkaPublishFailures.Add(ctx, count, opts...) - case EventBackendRedis: - p.instruments.redisPublishFailures.Add(ctx, count, opts...) - case EventBackendNats: - p.instruments.natsPublishFailures.Add(ctx, count, opts...) - default: - p.instruments.kafkaPublishFailures.Add(ctx, count, opts...) - } +func (p *promEventMetrics) KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.kafkaPublishFailures.Add(ctx, 1, opts...) } -func (p *promEventMetrics) MessageReceived(ctx context.Context, backend string, count int64, opts ...otelmetric.AddOption) { - switch backend { - case EventBackendKafka: - p.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) - case EventBackendRedis: - p.instruments.redisMessagesReceived.Add(ctx, count, opts...) - case EventBackendNats: - p.instruments.natsMessagesReceived.Add(ctx, count, opts...) - default: - p.instruments.kafkaMessagesReceived.Add(ctx, count, opts...) - } +func (p *promEventMetrics) KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.kafkaMessagesReceived.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.redisPublishMessages.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.redisPublishFailures.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.redisMessagesReceived.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.natsPublishMessages.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.natsPublishFailures.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.natsMessagesReceived.Add(ctx, 1, opts...) } func (p *promEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index d6cee25ac8..fc786d207a 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -4,11 +4,12 @@ import ( "context" "errors" "fmt" - "github.com/wundergraph/cosmo/router/pkg/metric" "strings" "sync" "time" + "github.com/wundergraph/cosmo/router/pkg/metric" + "github.com/twmb/franz-go/pkg/kerr" "github.com/twmb/franz-go/pkg/kgo" "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" @@ -183,7 +184,7 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } - p.eventMetricStore.Publish(ctx, "nats", 1) + p.eventMetricStore.KafkaPublish(ctx) return nil } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index c8d5b05fb2..9ca36971f9 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -4,11 +4,12 @@ import ( "context" "errors" "fmt" - "github.com/wundergraph/cosmo/router/pkg/metric" "io" "sync" "time" + "github.com/wundergraph/cosmo/router/pkg/metric" + "github.com/cespare/xxhash/v2" "github.com/nats-io/nats.go" "github.com/nats-io/nats.go/jetstream" @@ -134,7 +135,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent for msg := range msgBatch.Messages() { log.Debug("subscription update", zap.String("message_subject", msg.Subject()), zap.ByteString("data", msg.Data())) - p.eventMetricStore.MessageReceived(p.ctx, "nats", 1) + p.eventMetricStore.NatsMessageReceived(p.ctx) updater.Update(msg.Data()) // Acknowledge the message after it has been processed @@ -172,7 +173,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent select { case msg := <-msgChan: log.Debug("subscription update", zap.String("message_subject", msg.Subject), zap.ByteString("data", msg.Data)) - p.eventMetricStore.MessageReceived(p.ctx, "nats", 1) + p.eventMetricStore.NatsMessageReceived(p.ctx) updater.Update(msg.Data) case <-p.ctx.Done(): // When the application context is done, we stop the subscriptions @@ -220,7 +221,7 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } - p.eventMetricStore.Publish(ctx, "nats", 1) + p.eventMetricStore.NatsPublish(ctx) return nil } From 7bf5f3e106690a18621e12ce8ac6da41d7c6def7 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 15:28:28 +0530 Subject: [PATCH 03/40] fix: add request --- router/pkg/metric/event_measurements.go | 35 ++++++++++++++++++++ router/pkg/metric/event_metric_store.go | 14 ++++++++ router/pkg/metric/noop_event_metrics.go | 4 +++ router/pkg/metric/oltp_event_metric_store.go | 8 +++++ router/pkg/metric/prom_event_metric_store.go | 8 +++++ router/pkg/pubsub/kafka/adapter.go | 6 ++-- router/pkg/pubsub/nats/adapter.go | 9 +++-- 7 files changed, 80 insertions(+), 4 deletions(-) diff --git a/router/pkg/metric/event_measurements.go b/router/pkg/metric/event_measurements.go index 9f93ef37d7..e12bae8291 100644 --- a/router/pkg/metric/event_measurements.go +++ b/router/pkg/metric/event_measurements.go @@ -19,6 +19,8 @@ const ( natsPublishMessages = "router.nats.publish.messages" natsPublishFailures = "router.nats.publish.fail" natsMessagesReceived = "router.nats.messages.received" + natsRequests = "router.nats.request" + natsRequestFailures = "router.nats.request.fail" ) var ( @@ -51,6 +53,14 @@ var ( natsMessagesReceivedOptions = []otelmetric.Int64CounterOption{ otelmetric.WithDescription("Number of NATS messages received"), } + + // New NATS request counter options + natsRequestsOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of NATS requests"), + } + natsRequestFailuresOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of NATS request failures"), + } ) type eventInstruments struct { @@ -65,6 +75,10 @@ type eventInstruments struct { natsPublishMessages otelmetric.Int64Counter natsPublishFailures otelmetric.Int64Counter natsMessagesReceived otelmetric.Int64Counter + + // New NATS request instruments + natsRequests otelmetric.Int64Counter + natsRequestFailures otelmetric.Int64Counter } func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { @@ -140,6 +154,23 @@ func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { return nil, fmt.Errorf("failed to create nats messages received counter: %w", err) } + // New NATS request counters + natsRequestsCounter, err := meter.Int64Counter( + natsRequests, + natsRequestsOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create nats requests counter: %w", err) + } + + natsRequestFailuresCounter, err := meter.Int64Counter( + natsRequestFailures, + natsRequestFailuresOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create nats request failures counter: %w", err) + } + return &eventInstruments{ kafkaPublishMessages: kafkaPublishMessagesCounter, kafkaPublishFailures: kafkaPublishFailuresCounter, @@ -152,5 +183,9 @@ func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { natsPublishMessages: natsPublishMessagesCounter, natsPublishFailures: natsPublishFailuresCounter, natsMessagesReceived: natsMessagesReceivedCounter, + + // NATS request instruments + natsRequests: natsRequestsCounter, + natsRequestFailures: natsRequestFailuresCounter, }, nil } diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index eebab60910..08fd1bb31f 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -32,6 +32,8 @@ type EventMetricProvider interface { NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) + NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) + NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) Flush(ctx context.Context) error Shutdown() error @@ -132,6 +134,18 @@ func (e *EventMetrics) NatsMessageReceived(ctx context.Context, attrs ...attribu e.promMetrics.NatsMessageReceived(ctx, opts) } +func (e *EventMetrics) NatsRequest(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.NatsRequest(ctx, opts) + e.promMetrics.NatsRequest(ctx, opts) +} + +func (e *EventMetrics) NatsRequestFailure(ctx context.Context, attrs ...attribute.KeyValue) { + opts := e.withAttrs(attrs...) + e.otlpMetrics.NatsRequestFailure(ctx, opts) + e.promMetrics.NatsRequestFailure(ctx, opts) +} + // Flush flushes the metrics to the backend synchronously. func (e *EventMetrics) Flush(ctx context.Context) error { var err error diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index 19d576cfc1..82173cfcc9 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -24,5 +24,9 @@ func (n *noopEventMetricProvider) NatsPublishFailure(ctx context.Context, opts . } func (n *noopEventMetricProvider) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { } + +func (n *noopEventMetricProvider) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) { +} func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } func (n *noopEventMetricProvider) Shutdown() error { return nil } diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go index 1803b9f3be..a600700076 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -77,6 +77,14 @@ func (o *otlpEventMetrics) NatsMessageReceived(ctx context.Context, opts ...otel o.instruments.natsMessagesReceived.Add(ctx, 1, opts...) } +func (o *otlpEventMetrics) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.natsRequests.Add(ctx, 1, opts...) +} + +func (o *otlpEventMetrics) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.natsRequestFailures.Add(ctx, 1, opts...) +} + func (o *otlpEventMetrics) Flush(ctx context.Context) error { return o.meterProvider.ForceFlush(ctx) } diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go index fe1131e1b3..32865ef3ac 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_event_metric_store.go @@ -77,5 +77,13 @@ func (p *promEventMetrics) NatsMessageReceived(ctx context.Context, opts ...otel p.instruments.natsMessagesReceived.Add(ctx, 1, opts...) } +func (p *promEventMetrics) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.natsRequests.Add(ctx, 1, opts...) +} + +func (p *promEventMetrics) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.natsRequestFailures.Add(ctx, 1, opts...) +} + func (p *promEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } func (p *promEventMetrics) Shutdown() error { return nil } diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index fc786d207a..74c7f5b9cf 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -91,6 +91,7 @@ func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, u r := iter.Next() p.logger.Debug("subscription update", zap.String("topic", r.Topic), zap.ByteString("data", r.Value)) + p.eventMetricStore.KafkaMessageReceived(p.ctx) updater.Update(r.Value) } } @@ -181,11 +182,12 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu if pErr != nil { log.Error("publish error", zap.Error(pErr)) + p.eventMetricStore.KafkaPublishFailure(ctx) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) + } else { + p.eventMetricStore.KafkaPublish(ctx) } - p.eventMetricStore.KafkaPublish(ctx) - return nil } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index 9ca36971f9..0dba6ed50d 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -218,11 +218,12 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv err := p.client.Publish(event.Subject, event.Data) if err != nil { log.Error("publish error", zap.Error(err)) + p.eventMetricStore.NatsPublishFailure(ctx) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) + } else { + p.eventMetricStore.NatsPublish(ctx) } - p.eventMetricStore.NatsPublish(ctx) - return nil } @@ -242,9 +243,13 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv msg, err := p.client.RequestWithContext(ctx, event.Subject, event.Data) if err != nil { log.Error("request error", zap.Error(err)) + p.eventMetricStore.NatsRequestFailure(ctx) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) + } else { + p.eventMetricStore.NatsRequest(ctx) } + // We don't collect metrics on err here as it's an error related to the writer _, err = w.Write(msg.Data) if err != nil { log.Error("error writing response to writer", zap.Error(err)) From 695f5290791e548e074660d0cc53f85f35aa0736 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 17:08:42 +0530 Subject: [PATCH 04/40] fix: refactoring --- router/core/factoryresolver.go | 2 +- router/core/graph_server.go | 5 +- router/pkg/metric/event_metric_store.go | 75 +++++++++++-------- router/pkg/metric/noop_event_metrics.go | 43 +++++++++++ .../metric/oltp_connection_metric_store.go | 6 +- router/pkg/metric/oltp_event_metric_store.go | 3 +- .../metric/prom_connection_metric_store.go | 6 +- router/pkg/metric/prom_event_metric_store.go | 3 +- router/pkg/otel/attributes.go | 11 ++- router/pkg/pubsub/datasource/provider.go | 2 +- router/pkg/pubsub/kafka/adapter.go | 12 +-- router/pkg/pubsub/nats/adapter.go | 14 ++-- router/pkg/pubsub/pubsub.go | 4 +- router/pkg/pubsub/redis/adapter.go | 9 ++- 14 files changed, 137 insertions(+), 58 deletions(-) diff --git a/router/core/factoryresolver.go b/router/core/factoryresolver.go index 6355015c82..75f640c559 100644 --- a/router/core/factoryresolver.go +++ b/router/core/factoryresolver.go @@ -212,7 +212,7 @@ type RouterEngineConfiguration struct { Headers *config.HeaderRules Events config.EventsConfiguration SubgraphErrorPropagation config.SubgraphErrorPropagationConfiguration - EventMetricStore *rmetric.EventMetrics + EventMetricStore rmetric.EventMetricStore } func mapProtoFilterToPlanFilter(input *nodev1.SubscriptionFilterCondition, output *plan.SubscriptionFilterCondition) *plan.SubscriptionFilterCondition { diff --git a/router/core/graph_server.go b/router/core/graph_server.go index 86d65f205d..52fba3cadd 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -514,7 +514,7 @@ type graphMux struct { metricStore rmetric.Store prometheusCacheMetrics *rmetric.CacheMetrics otelCacheMetrics *rmetric.CacheMetrics - eventMetricStore *rmetric.EventMetrics + eventMetricStore rmetric.EventMetricStore } // buildOperationCaches creates the caches for the graph mux. @@ -777,7 +777,8 @@ func (s *graphServer) buildGraphMux( opts BuildGraphMuxOptions, ) (*graphMux, error) { gm := &graphMux{ - metricStore: rmetric.NewNoopMetrics(), + metricStore: rmetric.NewNoopMetrics(), + eventMetricStore: rmetric.NewNoopEventMetricStore(), } httpRouter := chi.NewRouter() diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index 08fd1bb31f..3afef1c173 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -9,13 +9,8 @@ import ( otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/sdk/metric" "go.uber.org/zap" -) -// EventBackend represents supported backends -const ( - EventBackendKafka = "kafka" - EventBackendRedis = "redis" - EventBackendNats = "nats" + otelattrs "github.com/wundergraph/cosmo/router/pkg/otel" ) // EventMetricProvider is the interface that wraps the basic Event metric methods. @@ -32,6 +27,7 @@ type EventMetricProvider interface { NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) + NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) @@ -39,6 +35,25 @@ type EventMetricProvider interface { Shutdown() error } +type EventMetricStore interface { + KafkaPublish(ctx context.Context, providerID string, topic string) + KafkaPublishFailure(ctx context.Context, providerID string, topic string) + KafkaMessageReceived(ctx context.Context, providerID string, topic string) + + RedisPublish(ctx context.Context, providerID string, channel string) + RedisPublishFailure(ctx context.Context, providerID string, channel string) + RedisMessageReceived(ctx context.Context, providerID string, channel string) + + NatsPublish(ctx context.Context, providerID string, subject string) + NatsPublishFailure(ctx context.Context, providerID string, subject string) + NatsMessageReceived(ctx context.Context, providerID string, subject string) + NatsRequest(ctx context.Context, providerID string, subject string) + NatsRequestFailure(ctx context.Context, providerID string, subject string) + + Flush(ctx context.Context) error + Shutdown(ctx context.Context) error +} + // EventMetrics is the store for Event (Kafka/Redis/NATS) metrics. type EventMetrics struct { baseAttributes []attribute.KeyValue @@ -57,7 +72,7 @@ func NewEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue } if metricsConfig.OpenTelemetry.EventMetrics { - otlpMetrics, err := newOtlpEventMetrics(logger, otelProvider, baseAttributes) + otlpMetrics, err := newOtlpEventMetrics(logger, otelProvider) if err != nil { return nil, fmt.Errorf("failed to create otlp event metrics: %w", err) } @@ -65,7 +80,7 @@ func NewEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue } if metricsConfig.Prometheus.EventMetrics { - promMetrics, err := newPromEventMetrics(logger, promProvider, baseAttributes) + promMetrics, err := newPromEventMetrics(logger, promProvider) if err != nil { return nil, fmt.Errorf("failed to create prometheus event metrics: %w", err) } @@ -80,68 +95,68 @@ func (e *EventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOpti return otelmetric.WithAttributes(append(copied, attrs...)...) } -func (e *EventMetrics) KafkaPublish(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) KafkaPublish(ctx context.Context, providerID string, topic string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic)) e.otlpMetrics.KafkaPublish(ctx, opts) e.promMetrics.KafkaPublish(ctx, opts) } -func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic)) e.otlpMetrics.KafkaPublishFailure(ctx, opts) e.promMetrics.KafkaPublishFailure(ctx, opts) } -func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic)) e.otlpMetrics.KafkaMessageReceived(ctx, opts) e.promMetrics.KafkaMessageReceived(ctx, opts) } -func (e *EventMetrics) RedisPublish(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) RedisPublish(ctx context.Context, providerID string, channel string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel)) e.otlpMetrics.RedisPublish(ctx, opts) e.promMetrics.RedisPublish(ctx, opts) } -func (e *EventMetrics) RedisPublishFailure(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) RedisPublishFailure(ctx context.Context, providerID string, channel string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel)) e.otlpMetrics.RedisPublishFailure(ctx, opts) e.promMetrics.RedisPublishFailure(ctx, opts) } -func (e *EventMetrics) RedisMessageReceived(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) RedisMessageReceived(ctx context.Context, providerID string, channel string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel)) e.otlpMetrics.RedisMessageReceived(ctx, opts) e.promMetrics.RedisMessageReceived(ctx, opts) } -func (e *EventMetrics) NatsPublish(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) NatsPublish(ctx context.Context, providerID string, subject string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) e.otlpMetrics.NatsPublish(ctx, opts) e.promMetrics.NatsPublish(ctx, opts) } -func (e *EventMetrics) NatsPublishFailure(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) NatsPublishFailure(ctx context.Context, providerID string, subject string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) e.otlpMetrics.NatsPublishFailure(ctx, opts) e.promMetrics.NatsPublishFailure(ctx, opts) } -func (e *EventMetrics) NatsMessageReceived(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) NatsMessageReceived(ctx context.Context, providerID string, subject string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) e.otlpMetrics.NatsMessageReceived(ctx, opts) e.promMetrics.NatsMessageReceived(ctx, opts) } -func (e *EventMetrics) NatsRequest(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) NatsRequest(ctx context.Context, providerID string, subject string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) e.otlpMetrics.NatsRequest(ctx, opts) e.promMetrics.NatsRequest(ctx, opts) } -func (e *EventMetrics) NatsRequestFailure(ctx context.Context, attrs ...attribute.KeyValue) { - opts := e.withAttrs(attrs...) +func (e *EventMetrics) NatsRequestFailure(ctx context.Context, providerID string, subject string) { + opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) e.otlpMetrics.NatsRequestFailure(ctx, opts) e.promMetrics.NatsRequestFailure(ctx, opts) } diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index 82173cfcc9..246dd5b28a 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -30,3 +30,46 @@ func (n *noopEventMetricProvider) NatsRequestFailure(ctx context.Context, opts . } func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } func (n *noopEventMetricProvider) Shutdown() error { return nil } + +type NoopEventMetricStore struct{} + +func (n *NoopEventMetricStore) KafkaPublish(ctx context.Context, providerID string, topic string) {} + +func (n *NoopEventMetricStore) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { +} + +func (n *NoopEventMetricStore) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { +} + +func (n *NoopEventMetricStore) RedisPublish(ctx context.Context, providerID string, channel string) {} + +func (n *NoopEventMetricStore) RedisPublishFailure(ctx context.Context, providerID string, channel string) { +} + +func (n *NoopEventMetricStore) RedisMessageReceived(ctx context.Context, providerID string, channel string) { +} + +func (n *NoopEventMetricStore) NatsPublish(ctx context.Context, providerID string, subject string) {} + +func (n *NoopEventMetricStore) NatsPublishFailure(ctx context.Context, providerID string, subject string) { +} + +func (n *NoopEventMetricStore) NatsMessageReceived(ctx context.Context, providerID string, subject string) { +} + +func (n *NoopEventMetricStore) NatsRequest(ctx context.Context, providerID string, subject string) {} + +func (n *NoopEventMetricStore) NatsRequestFailure(ctx context.Context, providerID string, subject string) { +} + +func (n *NoopEventMetricStore) Flush(ctx context.Context) error { + return nil +} + +func (n *NoopEventMetricStore) Shutdown(ctx context.Context) error { + return nil +} + +func NewNoopEventMetricStore() *NoopEventMetricStore { + return &NoopEventMetricStore{} +} diff --git a/router/pkg/metric/oltp_connection_metric_store.go b/router/pkg/metric/oltp_connection_metric_store.go index 0a4b59e27b..b2e9c3e92a 100644 --- a/router/pkg/metric/oltp_connection_metric_store.go +++ b/router/pkg/metric/oltp_connection_metric_store.go @@ -43,7 +43,11 @@ func newOtlpConnectionMetrics(logger *zap.Logger, meterProvider *metric.MeterPro meter: meter, } - metrics.startInitMetrics(stats, baseAttributes) + err = metrics.startInitMetrics(stats, baseAttributes) + if err != nil { + logger.Error("failed to start initial connection metrics", zap.Error(err)) + } + return metrics, nil } diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go index a600700076..fd494af0a0 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -4,7 +4,6 @@ import ( "context" "fmt" - "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/sdk/metric" "go.uber.org/zap" @@ -22,7 +21,7 @@ type otlpEventMetrics struct { meter otelmetric.Meter } -func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider, baseAttributes []attribute.KeyValue) (*otlpEventMetrics, error) { +func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*otlpEventMetrics, error) { meter := meterProvider.Meter( cosmoRouterEventMeterName, otelmetric.WithInstrumentationVersion(cosmoRouterEventMeterVersion), diff --git a/router/pkg/metric/prom_connection_metric_store.go b/router/pkg/metric/prom_connection_metric_store.go index 32ecfc342e..24248bb671 100644 --- a/router/pkg/metric/prom_connection_metric_store.go +++ b/router/pkg/metric/prom_connection_metric_store.go @@ -43,7 +43,11 @@ func newPromConnectionMetrics(logger *zap.Logger, meterProvider *metric.MeterPro logger: logger, } - metrics.startInitMetrics(stats, attributes) + err = metrics.startInitMetrics(stats, attributes) + if err != nil { + logger.Error("failed to start initial connection metrics", zap.Error(err)) + } + return metrics, nil } diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go index 32865ef3ac..5e87561657 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_event_metric_store.go @@ -4,7 +4,6 @@ import ( "context" "fmt" - "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/sdk/metric" "go.uber.org/zap" @@ -22,7 +21,7 @@ type promEventMetrics struct { meter otelmetric.Meter } -func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider, baseAttributes []attribute.KeyValue) (*promEventMetrics, error) { +func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*promEventMetrics, error) { meter := meterProvider.Meter( cosmoRouterEventPromMeterName, otelmetric.WithInstrumentationVersion(cosmoRouterEventPromMeterVersion), diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index d1ee5db243..f3abcfe015 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -1,8 +1,9 @@ package otel import ( - "go.opentelemetry.io/otel/attribute" "net" + + "go.opentelemetry.io/otel/attribute" ) const ( @@ -59,6 +60,14 @@ const ( WgGraphQLParentType = attribute.Key("wg.graphql.parent_type") ) +// Event metrics attributes +const ( + WgEventProviderID = attribute.Key("wg.event.provider.id") + WgKafkaTopic = attribute.Key("wg.kafka.topic") + WgNatsSubject = attribute.Key("wg.nats.subject") + WgRedisChannel = attribute.Key("wg.redis.channel") +) + const ( CacheMetricsOperationTypeAdded = "added" CacheMetricsOperationTypeUpdated = "updated" diff --git a/router/pkg/pubsub/datasource/provider.go b/router/pkg/pubsub/datasource/provider.go index b97a3941b8..8dd3a9b609 100644 --- a/router/pkg/pubsub/datasource/provider.go +++ b/router/pkg/pubsub/datasource/provider.go @@ -34,5 +34,5 @@ type ProviderBuilder[P, E any] interface { } type ProviderOpts struct { - EventMetricStore *metric.EventMetrics + EventMetricStore metric.EventMetricStore } diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 74c7f5b9cf..2f7fca6690 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -41,11 +41,11 @@ type ProviderAdapter struct { writeClient *kgo.Client closeWg sync.WaitGroup cancel context.CancelFunc - eventMetricStore *metric.EventMetrics + eventMetricStore metric.EventMetricStore } // topicPoller polls the Kafka topic for new records and calls the updateTriggers function. -func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, updater resolve.SubscriptionUpdater) error { +func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, updater resolve.SubscriptionUpdater, providerId string) error { for { select { case <-p.ctx.Done(): // Close the poller if the application context was canceled @@ -91,7 +91,7 @@ func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, u r := iter.Next() p.logger.Debug("subscription update", zap.String("topic", r.Topic), zap.ByteString("data", r.Value)) - p.eventMetricStore.KafkaMessageReceived(p.ctx) + p.eventMetricStore.KafkaMessageReceived(p.ctx, providerId, r.Topic) updater.Update(r.Value) } } @@ -132,7 +132,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent defer p.closeWg.Done() - err := p.topicPoller(ctx, client, updater) + err := p.topicPoller(ctx, client, updater, event.ProviderID) if err != nil { if errors.Is(err, errClientClosed) || errors.Is(err, context.Canceled) { log.Debug("poller canceled", zap.Error(err)) @@ -182,10 +182,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu if pErr != nil { log.Error("publish error", zap.Error(pErr)) - p.eventMetricStore.KafkaPublishFailure(ctx) + p.eventMetricStore.KafkaPublishFailure(ctx, event.ProviderID, event.Topic) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } else { - p.eventMetricStore.KafkaPublish(ctx) + p.eventMetricStore.KafkaPublish(ctx, event.ProviderID, event.Topic) } return nil diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index 0dba6ed50d..a7e8e2df22 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -44,7 +44,7 @@ type ProviderAdapter struct { url string opts []nats.Option flushTimeout time.Duration - eventMetricStore *metric.EventMetrics + eventMetricStore metric.EventMetricStore } // getInstanceIdentifier returns an identifier for the current instance. @@ -135,7 +135,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent for msg := range msgBatch.Messages() { log.Debug("subscription update", zap.String("message_subject", msg.Subject()), zap.ByteString("data", msg.Data())) - p.eventMetricStore.NatsMessageReceived(p.ctx) + p.eventMetricStore.NatsMessageReceived(p.ctx, event.ProviderID, msg.Subject()) updater.Update(msg.Data()) // Acknowledge the message after it has been processed @@ -173,7 +173,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent select { case msg := <-msgChan: log.Debug("subscription update", zap.String("message_subject", msg.Subject), zap.ByteString("data", msg.Data)) - p.eventMetricStore.NatsMessageReceived(p.ctx) + p.eventMetricStore.NatsMessageReceived(p.ctx, event.ProviderID, msg.Subject) updater.Update(msg.Data) case <-p.ctx.Done(): // When the application context is done, we stop the subscriptions @@ -218,10 +218,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv err := p.client.Publish(event.Subject, event.Data) if err != nil { log.Error("publish error", zap.Error(err)) - p.eventMetricStore.NatsPublishFailure(ctx) + p.eventMetricStore.NatsPublishFailure(ctx, event.ProviderID, event.Subject) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } else { - p.eventMetricStore.NatsPublish(ctx) + p.eventMetricStore.NatsPublish(ctx, event.ProviderID, event.Subject) } return nil @@ -243,10 +243,10 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv msg, err := p.client.RequestWithContext(ctx, event.Subject, event.Data) if err != nil { log.Error("request error", zap.Error(err)) - p.eventMetricStore.NatsRequestFailure(ctx) + p.eventMetricStore.NatsRequestFailure(ctx, event.ProviderID, event.Subject) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) } else { - p.eventMetricStore.NatsRequest(ctx) + p.eventMetricStore.NatsRequest(ctx, event.ProviderID, event.Subject) } // We don't collect metrics on err here as it's an error related to the writer diff --git a/router/pkg/pubsub/pubsub.go b/router/pkg/pubsub/pubsub.go index f30b77abe7..9fe3be2fbc 100644 --- a/router/pkg/pubsub/pubsub.go +++ b/router/pkg/pubsub/pubsub.go @@ -52,7 +52,7 @@ func (e *ProviderNotDefinedError) Error() string { // BuildProvidersAndDataSources is a generic function that builds providers and data sources for the given // EventsConfiguration and DataSourceConfigurationWithMetadata -func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store *metric.EventMetrics, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { +func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store metric.EventMetricStore, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource @@ -113,7 +113,7 @@ func build[P GetID, E GetEngineEventConfiguration]( builder pubsub_datasource.ProviderBuilder[P, E], providersData []P, dsConfs []dsConfAndEvents[E], - store *metric.EventMetrics, + store metric.EventMetricStore, ) ([]pubsub_datasource.Provider, []plan.DataSource, error) { var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index 916c6f29d5..939b5fa109 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -3,9 +3,10 @@ package redis import ( "context" "fmt" - "github.com/wundergraph/cosmo/router/pkg/metric" "sync" + "github.com/wundergraph/cosmo/router/pkg/metric" + rd "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/redis" "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" @@ -44,7 +45,7 @@ type ProviderAdapter struct { closeWg sync.WaitGroup urls []string clusterEnabled bool - eventMetricStore *metric.EventMetrics + eventMetricStore metric.EventMetricStore } func (p *ProviderAdapter) Startup(ctx context.Context) error { @@ -110,6 +111,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent return } log.Debug("subscription update", zap.String("message_channel", msg.Channel), zap.String("data", msg.Payload)) + p.eventMetricStore.RedisMessageReceived(ctx, event.ProviderID, msg.Channel) updater.Update([]byte(msg.Payload)) case <-p.ctx.Done(): // When the application context is done, we stop the subscription if it is not already done @@ -148,7 +150,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu intCmd := p.conn.Publish(ctx, event.Channel, data) if intCmd.Err() != nil { log.Error("publish error", zap.Error(intCmd.Err())) + p.eventMetricStore.RedisPublishFailure(ctx, event.ProviderID, event.Channel) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) + } else { + p.eventMetricStore.RedisPublish(ctx, event.ProviderID, event.Channel) } return nil From 192f09537d6fbcfe6090ab0587934ec651189010 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 17:58:33 +0530 Subject: [PATCH 05/40] fix: add tests --- router-tests/event_metrics_prometheus_test.go | 64 +++++++++++++++++++ router-tests/events/events_config_test.go | 2 +- router-tests/events/kafka_events_test.go | 49 +++++--------- router-tests/events/nats_events_test.go | 2 +- router-tests/events/redis_events_test.go | 2 +- router-tests/events/util.go | 28 ++++++++ router-tests/testenv/testenv.go | 4 ++ 7 files changed, 114 insertions(+), 37 deletions(-) create mode 100644 router-tests/event_metrics_prometheus_test.go create mode 100644 router-tests/events/util.go diff --git a/router-tests/event_metrics_prometheus_test.go b/router-tests/event_metrics_prometheus_test.go new file mode 100644 index 0000000000..c5307d95b3 --- /dev/null +++ b/router-tests/event_metrics_prometheus_test.go @@ -0,0 +1,64 @@ +package integration + +import ( + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + events_test "github.com/wundergraph/cosmo/router-tests/events" + "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/pkg/trace/tracetest" + "go.opentelemetry.io/otel/sdk/metric" +) + +func TestKafkaPublishMetricsPrometheus(t *testing.T) { + t.Run("verify apache kafka publish recorded", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, + EnableKafka: true, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + events_test.EnsureTopicExists(t, xEnv, "employeeUpdated") + + // First Publish + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`, + }) + require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + + // Second Publish + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`, + }) + require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + + mf, err := promRegistry.Gather() + require.NoError(t, err) + + family := findMetricFamilyByName(mf, "router_kafka_publish_messages_total") + require.NotNil(t, family, "expected router_kafka_publish_messages_total metric family") + + metrics := family.GetMetric() + require.Len(t, metrics, 1) + require.NotEmpty(t, metrics) + + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + topic := findMetricLabelByName(metrics, "wg_kafka_topic") + + require.Equal(t, "my-kafka", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) + }) + }) +} diff --git a/router-tests/events/events_config_test.go b/router-tests/events/events_config_test.go index 50d19dbaed..f7e0739e1c 100644 --- a/router-tests/events/events_config_test.go +++ b/router-tests/events/events_config_test.go @@ -1,4 +1,4 @@ -package events_test +package events import ( "testing" diff --git a/router-tests/events/kafka_events_test.go b/router-tests/events/kafka_events_test.go index 05f4250003..2a3619e4e0 100644 --- a/router-tests/events/kafka_events_test.go +++ b/router-tests/events/kafka_events_test.go @@ -1,4 +1,4 @@ -package events_test +package events import ( "bufio" @@ -74,7 +74,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) var subscriptionOne struct { employeeUpdatedMyKafka struct { @@ -130,7 +130,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) var subscriptionOne struct { employeeUpdatedMyKafka struct { @@ -204,7 +204,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) var subscriptionOne struct { employeeUpdatedMyKafka struct { @@ -277,7 +277,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) var subscriptionOne struct { employeeUpdatedMyKafka struct { @@ -366,7 +366,7 @@ func TestKafkaEvents(t *testing.T) { engineExecutionConfiguration.WebSocketClientReadTimeout = time.Millisecond * 100 }, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) var subscriptionOne struct { employeeUpdatedMyKafka struct { @@ -431,7 +431,7 @@ func TestKafkaEvents(t *testing.T) { core.WithMultipartHeartbeatInterval(multipartHeartbeatInterval), }, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) subscribePayload := []byte(`{"query":"subscription { employeeUpdatedMyKafka(employeeID: 1) { id details { forename surname } }}"}`) @@ -497,7 +497,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) subscribePayload := []byte(`{"query":"subscription { employeeUpdatedMyKafka(employeeID: 1) { id details { forename surname } }}"}`) @@ -562,7 +562,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) subscribePayload := []byte(`{"query":"subscription { employeeUpdatedMyKafka(employeeID: 1) { id details { forename surname } }}"}`) @@ -672,7 +672,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) type subscriptionPayload struct { Data struct { @@ -739,7 +739,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) type subscriptionPayload struct { Data struct { @@ -806,7 +806,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) type subscriptionPayload struct { Data struct { @@ -861,7 +861,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) var subscriptionOne struct { employeeUpdatedMyKafka struct { @@ -932,7 +932,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) // Send a mutation to trigger the first subscription resOne := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ @@ -980,7 +980,7 @@ func TestKafkaEvents(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, }, func(t *testing.T, xEnv *testenv.Environment) { - ensureTopicExists(t, xEnv, topics...) + EnsureTopicExists(t, xEnv, topics...) type subscriptionPayload struct { Data struct { @@ -1042,25 +1042,6 @@ func TestKafkaEvents(t *testing.T) { }) } -func ensureTopicExists(t *testing.T, xEnv *testenv.Environment, topics ...string) { - // Delete topic for idempotency - deleteCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - prefixedTopics := make([]string, len(topics)) - for _, topic := range topics { - prefixedTopics = append(prefixedTopics, xEnv.GetPubSubName(topic)) - } - - _, err := xEnv.KafkaAdminClient.DeleteTopics(deleteCtx, prefixedTopics...) - require.NoError(t, err) - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - _, err = xEnv.KafkaAdminClient.CreateTopics(ctx, 1, 1, nil, prefixedTopics...) - require.NoError(t, err) -} - func produceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() diff --git a/router-tests/events/nats_events_test.go b/router-tests/events/nats_events_test.go index e442ccf9be..d3235643b4 100644 --- a/router-tests/events/nats_events_test.go +++ b/router-tests/events/nats_events_test.go @@ -1,4 +1,4 @@ -package events_test +package events import ( "bufio" diff --git a/router-tests/events/redis_events_test.go b/router-tests/events/redis_events_test.go index 407e9a9348..aeaa1989be 100644 --- a/router-tests/events/redis_events_test.go +++ b/router-tests/events/redis_events_test.go @@ -1,4 +1,4 @@ -package events_test +package events import ( "bufio" diff --git a/router-tests/events/util.go b/router-tests/events/util.go new file mode 100644 index 0000000000..2bd3d055fe --- /dev/null +++ b/router-tests/events/util.go @@ -0,0 +1,28 @@ +package events + +import ( + "context" + "github.com/stretchr/testify/require" + "github.com/wundergraph/cosmo/router-tests/testenv" + "testing" + "time" +) + +func EnsureTopicExists(t *testing.T, xEnv *testenv.Environment, topics ...string) { + // Delete topic for idempotency + deleteCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + prefixedTopics := make([]string, len(topics)) + for _, topic := range topics { + prefixedTopics = append(prefixedTopics, xEnv.GetPubSubName(topic)) + } + + _, err := xEnv.KafkaAdminClient.DeleteTopics(deleteCtx, prefixedTopics...) + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + _, err = xEnv.KafkaAdminClient.CreateTopics(ctx, 1, 1, nil, prefixedTopics...) + require.NoError(t, err) +} diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index b76d67b77d..3924d08baf 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -270,8 +270,10 @@ type MetricOptions struct { PrometheusSchemaFieldUsage PrometheusSchemaFieldUsage EnableOTLPConnectionMetrics bool EnableOTLPCircuitBreakerMetrics bool + EnableOTLPEventMetrics bool EnablePrometheusConnectionMetrics bool EnablePrometheusCircuitBreakerMetrics bool + EnablePrometheusEventMetrics bool } type PrometheusSchemaFieldUsage struct { @@ -1503,6 +1505,7 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node CircuitBreaker: testConfig.MetricOptions.EnablePrometheusCircuitBreakerMetrics, ExcludeMetrics: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetrics, ExcludeMetricLabels: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetricLabels, + EventMetrics: testConfig.MetricOptions.EnablePrometheusEventMetrics, ExcludeScopeInfo: testConfig.MetricOptions.MetricExclusions.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: testConfig.MetricOptions.PrometheusSchemaFieldUsage.Enabled, @@ -1525,6 +1528,7 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node Enabled: true, RouterRuntime: testConfig.MetricOptions.EnableRuntimeMetrics, GraphqlCache: testConfig.MetricOptions.EnableOTLPRouterCache, + EventMetrics: testConfig.MetricOptions.EnableOTLPEventMetrics, ConnectionStats: testConfig.MetricOptions.EnableOTLPConnectionMetrics, EngineStats: config.EngineStats{ Subscriptions: testConfig.MetricOptions.OTLPEngineStatsOptions.EnableSubscription, From c4fddad8a82d3933e9fca0c874bea48dacc59df9 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 18:23:33 +0530 Subject: [PATCH 06/40] fix: kafka tests --- router-tests/event_metrics_prometheus_test.go | 93 ++++++++++++++++++- router-tests/events/kafka_events_test.go | 67 +++++-------- router-tests/events/util.go | 24 +++++ 3 files changed, 137 insertions(+), 47 deletions(-) diff --git a/router-tests/event_metrics_prometheus_test.go b/router-tests/event_metrics_prometheus_test.go index c5307d95b3..c322f6de9e 100644 --- a/router-tests/event_metrics_prometheus_test.go +++ b/router-tests/event_metrics_prometheus_test.go @@ -1,12 +1,14 @@ package integration import ( + "github.com/hasura/go-graphql-client" "strings" "testing" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" - events_test "github.com/wundergraph/cosmo/router-tests/events" + "github.com/wundergraph/cosmo/router-tests/events" "github.com/wundergraph/cosmo/router-tests/testenv" "github.com/wundergraph/cosmo/router/pkg/trace/tracetest" "go.opentelemetry.io/otel/sdk/metric" @@ -28,7 +30,7 @@ func TestKafkaPublishMetricsPrometheus(t *testing.T) { EnablePrometheusEventMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { - events_test.EnsureTopicExists(t, xEnv, "employeeUpdated") + events.EnsureTopicExists(t, xEnv, "employeeUpdated") // First Publish res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ @@ -61,4 +63,91 @@ func TestKafkaPublishMetricsPrometheus(t *testing.T) { require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) + + t.Run("verify apache kafka subscription received recorded", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + + topic := "employeeUpdated" + + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, + EnableKafka: true, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + events.EnsureTopicExists(t, xEnv, "employeeUpdated") + + var subscriptionOne struct { + employeeUpdatedMyKafka struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdatedMyKafka(employeeID: 3)"` + } + + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) + + subscriptionArgsCh := make(chan kafkaSubscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- kafkaSubscriptionArgs{ + dataValue: dataValue, + errValue: errValue, + } + return nil + }) + require.NoError(t, err) + require.NotEmpty(t, subscriptionOneID) + + clientRunCh := make(chan error) + go func() { + clientRunCh <- client.Run() + }() + + xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) + + events.ProduceKafkaMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + + mf, err := promRegistry.Gather() + require.NoError(t, err) + + family := findMetricFamilyByName(mf, "router_kafka_messages_received_total") + require.NotNil(t, family) + + metrics := family.GetMetric() + require.Len(t, metrics, 1) + require.NotEmpty(t, metrics) + + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + topic := findMetricLabelByName(metrics, "wg_kafka_topic") + + require.Equal(t, "my-kafka", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) + }) + + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, clientRunCh, func(t *testing.T, err error) { + require.NoError(t, err) + }, "unable to close client before timeout") + }) + }) +} + +type kafkaSubscriptionArgs struct { + dataValue []byte + errValue error } + +const KafkaWaitTimeout = time.Second * 30 diff --git a/router-tests/events/kafka_events_test.go b/router-tests/events/kafka_events_test.go index 2a3619e4e0..8e98b25c11 100644 --- a/router-tests/events/kafka_events_test.go +++ b/router-tests/events/kafka_events_test.go @@ -21,8 +21,6 @@ import ( "github.com/wundergraph/cosmo/router/pkg/config" ) -const KafkaWaitTimeout = time.Second * 30 - func assertKafkaLineEquals(t *testing.T, reader *bufio.Reader, expected string) { t.Helper() line, _, err := reader.ReadLine() @@ -107,7 +105,7 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) @@ -164,23 +162,23 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], ``) // Empty message + ProduceKafkaMessage(t, xEnv, topics[0], ``) // Empty message testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.ErrorContains(t, args.errValue, "Invalid message received") }) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) }) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) // Missing entity = Resolver error + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) // Missing entity = Resolver error testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.ErrorContains(t, args.errValue, "Cannot return null for non-nullable field 'Subscription.employeeUpdatedMyKafka.id'.") }) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) @@ -248,7 +246,7 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(2, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) @@ -321,7 +319,7 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(2, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) @@ -333,7 +331,7 @@ func TestKafkaEvents(t *testing.T) { require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) }) - produceKafkaMessage(t, xEnv, topics[1], `{"__typename":"Employee","id": 2,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[1], `{"__typename":"Employee","id": 2,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) @@ -399,7 +397,7 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) @@ -447,10 +445,10 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) assertKafkaMultipartValueEventually(t, reader, "{\"payload\":{\"data\":{\"employeeUpdatedMyKafka\":{\"id\":1,\"details\":{\"forename\":\"Jens\",\"surname\":\"Neuse\"}}}}}") - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) assertKafkaMultipartValueEventually(t, reader, "{\"payload\":{\"data\":{\"employeeUpdatedMyKafka\":{\"id\":1,\"details\":{\"forename\":\"Jens\",\"surname\":\"Neuse\"}}}}}") }) }) @@ -530,7 +528,7 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, responseCh, func(t *testing.T, response struct { response *http.Response @@ -595,7 +593,7 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) testenv.AwaitChannelWithT(t, KafkaWaitTimeout, responseCh, func(t *testing.T, resp struct { response *http.Response @@ -713,7 +711,7 @@ func TestKafkaEvents(t *testing.T) { // Events 1, 2, 11, and 12 should be included for i := uint32(1); i < 13; i++ { - produceKafkaMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) + ProduceKafkaMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) if i == 1 || i == 2 || i == 11 || i == 12 { conn.SetReadDeadline(time.Now().Add(KafkaWaitTimeout)) gErr := conn.ReadJSON(&msg) @@ -780,7 +778,7 @@ func TestKafkaEvents(t *testing.T) { // Events 1, 2, 11, and 12 should be included for i := uint32(1); i < 13; i++ { - produceKafkaMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) + ProduceKafkaMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) if i == 1 || i == 2 || i == 11 || i == 12 { conn.SetReadDeadline(time.Now().Add(KafkaWaitTimeout)) gErr := conn.ReadJSON(&msg) @@ -835,10 +833,10 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) // The message should be ignored because "1" does not equal 1 - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":1}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":1}`) // This message should be delivered because it matches the filter - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":12}`) + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":12}`) conn.SetReadDeadline(time.Now().Add(KafkaWaitTimeout)) readErr := conn.ReadJSON(&msg) require.NoError(t, readErr) @@ -894,23 +892,23 @@ func TestKafkaEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) - produceKafkaMessage(t, xEnv, topics[0], `{asas`) // Invalid message + ProduceKafkaMessage(t, xEnv, topics[0], `{asas`) // Invalid message testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.ErrorContains(t, args.errValue, "Invalid message received") }) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":1}`) // Correct message + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":1}`) // Correct message testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) }) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) // Missing entity = Resolver error + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) // Missing entity = Resolver error testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.ErrorContains(t, args.errValue, "Cannot return null for non-nullable field 'Subscription.employeeUpdatedMyKafka.id'.") }) - produceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message + ProduceKafkaMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionOneArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) @@ -1024,7 +1022,7 @@ func TestKafkaEvents(t *testing.T) { // Events 1, 3, 4, 7, and 11 should be included for i := int(MsgCount); i > 0; i-- { - produceKafkaMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) + ProduceKafkaMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) if i == 1 || i == 3 || i == 4 || i == 7 || i == 11 { conn.SetReadDeadline(time.Now().Add(KafkaWaitTimeout)) jsonErr := conn.ReadJSON(&msg) @@ -1042,27 +1040,6 @@ func TestKafkaEvents(t *testing.T) { }) } -func produceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - pErrCh := make(chan error) - - xEnv.KafkaClient.Produce(ctx, &kgo.Record{ - Topic: xEnv.GetPubSubName(topicName), - Value: []byte(message), - }, func(record *kgo.Record, err error) { - pErrCh <- err - }) - - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, pErrCh, func(t *testing.T, pErr error) { - require.NoError(t, pErr) - }) - - fErr := xEnv.KafkaClient.Flush(ctx) - require.NoError(t, fErr) -} - func readKafkaMessages(xEnv *testenv.Environment, topicName string, msgs int) ([]*kgo.Record, error) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() diff --git a/router-tests/events/util.go b/router-tests/events/util.go index 2bd3d055fe..d6eaf1df1c 100644 --- a/router-tests/events/util.go +++ b/router-tests/events/util.go @@ -3,11 +3,35 @@ package events import ( "context" "github.com/stretchr/testify/require" + "github.com/twmb/franz-go/pkg/kgo" "github.com/wundergraph/cosmo/router-tests/testenv" "testing" "time" ) +const KafkaWaitTimeout = time.Second * 30 + +func ProduceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + pErrCh := make(chan error) + + xEnv.KafkaClient.Produce(ctx, &kgo.Record{ + Topic: xEnv.GetPubSubName(topicName), + Value: []byte(message), + }, func(record *kgo.Record, err error) { + pErrCh <- err + }) + + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, pErrCh, func(t *testing.T, pErr error) { + require.NoError(t, pErr) + }) + + fErr := xEnv.KafkaClient.Flush(ctx) + require.NoError(t, fErr) +} + func EnsureTopicExists(t *testing.T, xEnv *testenv.Environment, topics ...string) { // Delete topic for idempotency deleteCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) From 50bd6e517af8caaa9bea9c5757589ef7714a60cd Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 18:49:44 +0530 Subject: [PATCH 07/40] fix: updates --- router-tests/event_metrics_prometheus_test.go | 424 +++++++++++++----- router-tests/events/nats_events_test.go | 7 - router-tests/events/util.go | 1 + 3 files changed, 321 insertions(+), 111 deletions(-) diff --git a/router-tests/event_metrics_prometheus_test.go b/router-tests/event_metrics_prometheus_test.go index c322f6de9e..ddce4b388d 100644 --- a/router-tests/event_metrics_prometheus_test.go +++ b/router-tests/event_metrics_prometheus_test.go @@ -1,150 +1,366 @@ package integration import ( - "github.com/hasura/go-graphql-client" + "bufio" + "bytes" + "encoding/json" + "go.uber.org/zap" + "net/http" "strings" "testing" "time" + "github.com/hasura/go-graphql-client" + "github.com/nats-io/nats.go" "github.com/prometheus/client_golang/prometheus" + io_prometheus_client "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" "github.com/wundergraph/cosmo/router-tests/events" "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/pkg/config" "github.com/wundergraph/cosmo/router/pkg/trace/tracetest" "go.opentelemetry.io/otel/sdk/metric" ) -func TestKafkaPublishMetricsPrometheus(t *testing.T) { - t.Run("verify apache kafka publish recorded", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) - metricReader := metric.NewManualReader() - promRegistry := prometheus.NewRegistry() - - testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, - MetricReader: metricReader, - PrometheusRegistry: promRegistry, - RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, - EnableKafka: true, - MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, - }, - }, func(t *testing.T, xEnv *testenv.Environment) { - events.EnsureTopicExists(t, xEnv, "employeeUpdated") - - // First Publish - res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ - Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`, +type natsSubscriptionArgs struct { + dataValue []byte + errValue error +} + +func TestEventMetrics(t *testing.T) { + t.Run("kafka", func(t *testing.T) { + t.Run("publish", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, + EnableKafka: true, + MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + events.EnsureTopicExists(t, xEnv, "employeeUpdated") + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) + require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) + require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + mf, err := promRegistry.Gather() + require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_kafka_publish_messages_total") + require.NotNil(t, family) + metrics := family.GetMetric() + require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + topic := findMetricLabelByName(metrics, "wg_kafka_topic") + require.Equal(t, "my-kafka", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) - require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + }) - // Second Publish - res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ - Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`, + t.Run("subscribe", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + topic := "employeeUpdated" + + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, + EnableKafka: true, + MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + events.EnsureTopicExists(t, xEnv, topic) + var subscriptionOne struct { + employeeUpdatedMyKafka struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdatedMyKafka(employeeID: 3)"` + } + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) + subscriptionArgsCh := make(chan kafkaSubscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- kafkaSubscriptionArgs{dataValue: dataValue, errValue: errValue} + return nil + }) + require.NoError(t, err) + require.NotEmpty(t, subscriptionOneID) + clientRunCh := make(chan error) + go func() { clientRunCh <- client.Run() }() + xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) + events.ProduceKafkaMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + mf, err := promRegistry.Gather() + require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_kafka_messages_received_total") + require.NotNil(t, family) + metrics := family.GetMetric() + require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + topic := findMetricLabelByName(metrics, "wg_kafka_topic") + require.Equal(t, "my-kafka", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) + }) + // Close the client to allow Run() to exit + require.NoError(t, client.Close()) + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, clientRunCh, func(t *testing.T, err error) { require.NoError(t, err) }, "unable to close client before timeout") }) - require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + }) + }) - mf, err := promRegistry.Gather() - require.NoError(t, err) + t.Run("nats", func(t *testing.T) { + t.Run("publish", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, + EnableNats: true, + MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { + updateEmployeeMyNats(id: 12, update: $update) {success} + }`, Variables: json.RawMessage(`{"update":{"name":"n1"}}`)}) + require.Equal(t, `{"data":{"updateEmployeeMyNats":{"success":true}}}`, res.Body) + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { + updateEmployeeMyNats(id: 12, update: $update) {success} + }`, Variables: json.RawMessage(`{"update":{"name":"n2"}}`)}) + require.Equal(t, `{"data":{"updateEmployeeMyNats":{"success":true}}}`, res.Body) + mf, err := promRegistry.Gather() + require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_nats_publish_messages_total") + require.NotNil(t, family) + metrics := family.GetMetric() + require.NotEmpty(t, metrics) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + subject := findMetricLabelByName(metrics, "wg_nats_subject") + require.Equal(t, "my-nats", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdatedMyNats.12")) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) + }) + }) - family := findMetricFamilyByName(mf, "router_kafka_publish_messages_total") - require.NotNil(t, family, "expected router_kafka_publish_messages_total metric family") + t.Run("request", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, + EnableNats: true, + MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + sub, err := xEnv.NatsConnectionMyNats.Subscribe(xEnv.GetPubSubName("getEmployeeMyNats.12"), func(msg *nats.Msg) { _ = msg.Respond([]byte(`{"id": 12, "__typename": "Employee"}`)) }) + require.NoError(t, err) + require.NoError(t, xEnv.NatsConnectionMyNats.Flush()) + t.Cleanup(func() { _ = sub.Unsubscribe() }) + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `query { employeeFromEventMyNats(employeeID: 12) { id details { forename } }}`}) + require.JSONEq(t, `{"data":{"employeeFromEventMyNats": {"id": 12, "details": {"forename": "David"}}}}`, res.Body) + mf, err := promRegistry.Gather() + require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_nats_request_total") + require.NotNil(t, family) + metrics := family.GetMetric() + require.NotEmpty(t, metrics) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + subject := findMetricLabelByName(metrics, "wg_nats_subject") + require.Equal(t, "my-nats", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(subject.GetValue(), "getEmployeeMyNats.12")) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) + }) + }) - metrics := family.GetMetric() - require.Len(t, metrics, 1) - require.NotEmpty(t, metrics) + t.Run("nats subscribe", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + testenv.Run(t, &testenv.Config{ + TraceExporter: exporter, + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, + EnableNats: true, + ModifyEngineExecutionConfiguration: func(ec *config.EngineExecutionConfiguration) { ec.WebSocketClientReadTimeout = time.Second }, + MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + var subscriptionOne struct { + employeeUpdated struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdated(employeeID: 3)"` + } - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - topic := findMetricLabelByName(metrics, "wg_kafka_topic") + surl := xEnv.GraphQLWebSocketSubscriptionURL() + client := graphql.NewSubscriptionClient(surl) - require.Equal(t, "my-kafka", eventProvider.GetValue()) - require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + subscriptionArgsCh := make(chan natsSubscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- natsSubscriptionArgs{ + dataValue: dataValue, + errValue: errValue, + } + return nil + }) + require.NoError(t, err) + require.NotEqual(t, "", subscriptionOneID) - require.Equal(t, float64(2), metrics[0].Counter.GetValue()) - }) - }) + clientRunErrCh := make(chan error) + go func() { + clientErr := client.Run() + clientRunErrCh <- clientErr + }() - t.Run("verify apache kafka subscription received recorded", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) - metricReader := metric.NewManualReader() - promRegistry := prometheus.NewRegistry() - - topic := "employeeUpdated" - - testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, - MetricReader: metricReader, - PrometheusRegistry: promRegistry, - RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, - EnableKafka: true, - MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, - }, - }, func(t *testing.T, xEnv *testenv.Environment) { - events.EnsureTopicExists(t, xEnv, "employeeUpdated") - - var subscriptionOne struct { - employeeUpdatedMyKafka struct { - ID float64 `graphql:"id"` - Details struct { - Forename string `graphql:"forename"` - Surname string `graphql:"surname"` - } `graphql:"details"` - } `graphql:"employeeUpdatedMyKafka(employeeID: 3)"` - } - - client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) - - subscriptionArgsCh := make(chan kafkaSubscriptionArgs) - subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { - subscriptionArgsCh <- kafkaSubscriptionArgs{ - dataValue: dataValue, - errValue: errValue, - } - return nil - }) - require.NoError(t, err) - require.NotEmpty(t, subscriptionOneID) + xEnv.WaitForSubscriptionCount(1, events.NatsWaitTimeout) - clientRunCh := make(chan error) - go func() { - clientRunCh <- client.Run() - }() + // Send a mutation to trigger the first subscription + resOne := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { updateAvailability(employeeID: 3, isAvailable: true) { id } }`, + }) + require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) - xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) + testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, subscriptionArgsCh, func(t *testing.T, args natsSubscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdated":{"id":3,"details":{"forename":"Stefan","surname":"Avram"}}}`, string(args.dataValue)) + }) + + // Trigger the first subscription via NATS + err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename": "Employee"}`)) + require.NoError(t, err) - events.ProduceKafkaMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + err = xEnv.NatsConnectionDefault.Flush() + require.NoError(t, err) + + testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, subscriptionArgsCh, func(t *testing.T, args natsSubscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdated":{"id":3,"details":{"forename":"Stefan","surname":"Avram"}}}`, string(args.dataValue)) + }) + + require.NoError(t, client.Close()) + testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, clientRunErrCh, func(t *testing.T, err error) { + require.NoError(t, err) + }, "unable to close client before timeout") + + xEnv.WaitForSubscriptionCount(0, events.NatsWaitTimeout) + xEnv.WaitForConnectionCount(0, events.NatsWaitTimeout) + + natsLogs := xEnv.Observer().FilterMessageSnippet("Nats").All() + require.Len(t, natsLogs, 2) + providerIDFields := xEnv.Observer().FilterField(zap.String("provider_id", "my-nats")).All() + require.Len(t, providerIDFields, 3) + // + //payload := []byte(`{"query":"subscription { employeeUpdatedMyNats(id: 12) { id } }"}`) + //client := http.Client{} + //req := xEnv.MakeGraphQLMultipartRequest(http.MethodPost, bytes.NewReader(payload)) + //resp, gErr := client.Do(req) + //require.NoError(t, gErr) + //require.Equal(t, http.StatusOK, resp.StatusCode) + //defer resp.Body.Close() + //reader := bufio.NewReader(resp.Body) + //xEnv.WaitForSubscriptionCount(1, 30*time.Second) + //require.NoError(t, xEnv.NatsConnectionMyNats.Publish(xEnv.GetPubSubName("employeeUpdatedMyNats.12"), []byte(`{"id":12,"__typename":"Employee"}`))) + //require.NoError(t, xEnv.NatsConnectionMyNats.Flush()) + //_, _, _ = reader.ReadLine() + //_, _, _ = reader.ReadLine() + //_, _, _ = reader.ReadLine() + //_, _, _ = reader.ReadLine() + //_, _, _ = reader.ReadLine() + + }) + }) + }) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { - require.NoError(t, args.errValue) - require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + t.Run("redis", func(t *testing.T) { + t.Run("publish", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + testenv.Run(t, &testenv.Config{TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}}, func(t *testing.T, xEnv *testenv.Environment) { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r1"}) { success } }`}) + require.JSONEq(t, `{"data":{"updateEmployeeMyRedis":{"success":true}}}`, res.Body) + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r2"}) { success } }`}) + require.JSONEq(t, `{"data":{"updateEmployeeMyRedis":{"success":true}}}`, res.Body) mf, err := promRegistry.Gather() require.NoError(t, err) - - family := findMetricFamilyByName(mf, "router_kafka_messages_received_total") + family := findMetricFamilyByName(mf, "router_redis_publish_messages_total") require.NotNil(t, family) - metrics := family.GetMetric() - require.Len(t, metrics, 1) require.NotEmpty(t, metrics) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - topic := findMetricLabelByName(metrics, "wg_kafka_topic") - - require.Equal(t, "my-kafka", eventProvider.GetValue()) - require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + channel := findMetricLabelByName(metrics, "wg_redis_channel") + require.Equal(t, "my-redis", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) + }) + }) + t.Run("subscribe", func(t *testing.T) { + exporter := tracetest.NewInMemoryExporter(t) + metricReader := metric.NewManualReader() + promRegistry := prometheus.NewRegistry() + testenv.Run(t, &testenv.Config{TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}}, func(t *testing.T, xEnv *testenv.Environment) { + payload := []byte(`{"query":"subscription { employeeUpdatedMyRedis(id: 1) { id } }"}`) + client := http.Client{} + req := xEnv.MakeGraphQLMultipartRequest(http.MethodPost, bytes.NewReader(payload)) + resp, gErr := client.Do(req) + require.NoError(t, gErr) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() + reader := bufio.NewReader(resp.Body) + xEnv.WaitForSubscriptionCount(1, 30*time.Second) + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 1, update: {name: "rr"}) { success } }`}) + require.JSONEq(t, `{"data":{"updateEmployeeMyRedis":{"success":true}}}`, res.Body) + _, _, _ = reader.ReadLine() + _, _, _ = reader.ReadLine() + _, _, _ = reader.ReadLine() + _, _, _ = reader.ReadLine() + _, _, _ = reader.ReadLine() + // Poll for metric family to appear + var family *io_prometheus_client.MetricFamily + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + mf, err := promRegistry.Gather() + require.NoError(t, err) + family = findMetricFamilyByName(mf, "router_redis_messages_received_total") + if family != nil && len(family.GetMetric()) > 0 { + break + } + time.Sleep(100 * time.Millisecond) + } + require.NotNil(t, family) + metrics := family.GetMetric() + require.NotEmpty(t, metrics) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + channel := findMetricLabelByName(metrics, "wg_redis_channel") + require.Equal(t, "my-redis", eventProvider.GetValue()) + require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) - - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, clientRunCh, func(t *testing.T, err error) { - require.NoError(t, err) - }, "unable to close client before timeout") }) }) } +// helpers reused from kafka test + type kafkaSubscriptionArgs struct { dataValue []byte errValue error diff --git a/router-tests/events/nats_events_test.go b/router-tests/events/nats_events_test.go index d3235643b4..d34c65ca62 100644 --- a/router-tests/events/nats_events_test.go +++ b/router-tests/events/nats_events_test.go @@ -26,8 +26,6 @@ import ( "github.com/wundergraph/cosmo/router-tests/testenv" ) -const NatsWaitTimeout = time.Second * 30 - func assertNatsLineEquals(t *testing.T, reader *bufio.Reader, expected string) { t.Helper() line, _, err := reader.ReadLine() @@ -57,11 +55,6 @@ func assertNatsMultipartValueEventually(t *testing.T, reader *bufio.Reader, expe }, NatsWaitTimeout, time.Millisecond*100) } -type natsSubscriptionArgs struct { - dataValue []byte - errValue error -} - func TestNatsEvents(t *testing.T) { t.Parallel() diff --git a/router-tests/events/util.go b/router-tests/events/util.go index d6eaf1df1c..a0e0a8d9dd 100644 --- a/router-tests/events/util.go +++ b/router-tests/events/util.go @@ -10,6 +10,7 @@ import ( ) const KafkaWaitTimeout = time.Second * 30 +const NatsWaitTimeout = time.Second * 30 func ProduceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) From eb4f235d67e5debb978c59ace4ce7e2cc291233e Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 19:56:33 +0530 Subject: [PATCH 08/40] fix: updates --- router-tests/event_metrics_prometheus_test.go | 290 +++++++++--------- router-tests/events/redis_events_test.go | 63 ++-- router-tests/events/util.go | 26 ++ router-tests/testenv/testenv.go | 4 +- 4 files changed, 200 insertions(+), 183 deletions(-) diff --git a/router-tests/event_metrics_prometheus_test.go b/router-tests/event_metrics_prometheus_test.go index ddce4b388d..c7bfbea65d 100644 --- a/router-tests/event_metrics_prometheus_test.go +++ b/router-tests/event_metrics_prometheus_test.go @@ -1,11 +1,7 @@ package integration import ( - "bufio" - "bytes" "encoding/json" - "go.uber.org/zap" - "net/http" "strings" "testing" "time" @@ -13,16 +9,14 @@ import ( "github.com/hasura/go-graphql-client" "github.com/nats-io/nats.go" "github.com/prometheus/client_golang/prometheus" - io_prometheus_client "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" "github.com/wundergraph/cosmo/router-tests/events" "github.com/wundergraph/cosmo/router-tests/testenv" "github.com/wundergraph/cosmo/router/pkg/config" - "github.com/wundergraph/cosmo/router/pkg/trace/tracetest" "go.opentelemetry.io/otel/sdk/metric" ) -type natsSubscriptionArgs struct { +type subscriptionArgs struct { dataValue []byte errValue error } @@ -30,52 +24,55 @@ type natsSubscriptionArgs struct { func TestEventMetrics(t *testing.T) { t.Run("kafka", func(t *testing.T) { t.Run("publish", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, - MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, }, func(t *testing.T, xEnv *testenv.Environment) { events.EnsureTopicExists(t, xEnv, "employeeUpdated") - res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) - require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) - res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) - require.JSONEq(t, `{"data":{"updateEmployeeMyKafka":{"success":true}}}`, res.Body) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) + mf, err := promRegistry.Gather() require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_kafka_publish_messages_total") - require.NotNil(t, family) metrics := family.GetMetric() require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - topic := findMetricLabelByName(metrics, "wg_kafka_topic") require.Equal(t, "my-kafka", eventProvider.GetValue()) + + topic := findMetricLabelByName(metrics, "wg_kafka_topic") require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) t.Run("subscribe", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() topic := "employeeUpdated" testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, - MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, }, func(t *testing.T, xEnv *testenv.Environment) { events.EnsureTopicExists(t, xEnv, topic) + var subscriptionOne struct { employeeUpdatedMyKafka struct { ID float64 `graphql:"id"` @@ -85,10 +82,11 @@ func TestEventMetrics(t *testing.T) { } `graphql:"details"` } `graphql:"employeeUpdatedMyKafka(employeeID: 3)"` } + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) - subscriptionArgsCh := make(chan kafkaSubscriptionArgs) + subscriptionArgsCh := make(chan subscriptionArgs) subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { - subscriptionArgsCh <- kafkaSubscriptionArgs{dataValue: dataValue, errValue: errValue} + subscriptionArgsCh <- subscriptionArgs{dataValue: dataValue, errValue: errValue} return nil }) require.NoError(t, err) @@ -96,102 +94,113 @@ func TestEventMetrics(t *testing.T) { clientRunCh := make(chan error) go func() { clientRunCh <- client.Run() }() xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) + events.ProduceKafkaMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args kafkaSubscriptionArgs) { + + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + mf, err := promRegistry.Gather() require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_kafka_messages_received_total") - require.NotNil(t, family) metrics := family.GetMetric() require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - topic := findMetricLabelByName(metrics, "wg_kafka_topic") require.Equal(t, "my-kafka", eventProvider.GetValue()) + + topic := findMetricLabelByName(metrics, "wg_kafka_topic") require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) - // Close the client to allow Run() to exit + require.NoError(t, client.Close()) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, clientRunCh, func(t *testing.T, err error) { require.NoError(t, err) }, "unable to close client before timeout") + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, clientRunCh, func(t *testing.T, err error) { require.NoError(t, err) }) }) }) }) t.Run("nats", func(t *testing.T) { t.Run("publish", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, - MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, }, func(t *testing.T, xEnv *testenv.Environment) { - res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { - updateEmployeeMyNats(id: 12, update: $update) {success} - }`, Variables: json.RawMessage(`{"update":{"name":"n1"}}`)}) - require.Equal(t, `{"data":{"updateEmployeeMyNats":{"success":true}}}`, res.Body) - res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { - updateEmployeeMyNats(id: 12, update: $update) {success} - }`, Variables: json.RawMessage(`{"update":{"name":"n2"}}`)}) - require.Equal(t, `{"data":{"updateEmployeeMyNats":{"success":true}}}`, res.Body) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { + updateEmployeeMyNats(id: 12, update: $update) {success} + }`, Variables: json.RawMessage(`{"update":{"name":"n1"}}`)}) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { + updateEmployeeMyNats(id: 12, update: $update) {success} + }`, Variables: json.RawMessage(`{"update":{"name":"n2"}}`)}) + mf, err := promRegistry.Gather() require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_nats_publish_messages_total") - require.NotNil(t, family) metrics := family.GetMetric() - require.NotEmpty(t, metrics) + require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - subject := findMetricLabelByName(metrics, "wg_nats_subject") require.Equal(t, "my-nats", eventProvider.GetValue()) + + subject := findMetricLabelByName(metrics, "wg_nats_subject") require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdatedMyNats.12")) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) t.Run("request", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, - MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, }, func(t *testing.T, xEnv *testenv.Environment) { sub, err := xEnv.NatsConnectionMyNats.Subscribe(xEnv.GetPubSubName("getEmployeeMyNats.12"), func(msg *nats.Msg) { _ = msg.Respond([]byte(`{"id": 12, "__typename": "Employee"}`)) }) require.NoError(t, err) require.NoError(t, xEnv.NatsConnectionMyNats.Flush()) t.Cleanup(func() { _ = sub.Unsubscribe() }) + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `query { employeeFromEventMyNats(employeeID: 12) { id details { forename } }}`}) require.JSONEq(t, `{"data":{"employeeFromEventMyNats": {"id": 12, "details": {"forename": "David"}}}}`, res.Body) + mf, err := promRegistry.Gather() require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_nats_request_total") - require.NotNil(t, family) metrics := family.GetMetric() - require.NotEmpty(t, metrics) + require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - subject := findMetricLabelByName(metrics, "wg_nats_subject") require.Equal(t, "my-nats", eventProvider.GetValue()) + + subject := findMetricLabelByName(metrics, "wg_nats_subject") require.True(t, strings.HasSuffix(subject.GetValue(), "getEmployeeMyNats.12")) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) }) - t.Run("nats subscribe", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) + t.Run("subscribe", func(t *testing.T) { metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ - TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, @@ -209,24 +218,22 @@ func TestEventMetrics(t *testing.T) { } `graphql:"employeeUpdated(employeeID: 3)"` } - surl := xEnv.GraphQLWebSocketSubscriptionURL() - client := graphql.NewSubscriptionClient(surl) + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) - subscriptionArgsCh := make(chan natsSubscriptionArgs) + subscriptionArgsCh := make(chan subscriptionArgs) subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { - subscriptionArgsCh <- natsSubscriptionArgs{ + subscriptionArgsCh <- subscriptionArgs{ dataValue: dataValue, errValue: errValue, } return nil }) require.NoError(t, err) - require.NotEqual(t, "", subscriptionOneID) + require.NotEmpty(t, subscriptionOneID) clientRunErrCh := make(chan error) go func() { - clientErr := client.Run() - clientRunErrCh <- clientErr + clientRunErrCh <- client.Run() }() xEnv.WaitForSubscriptionCount(1, events.NatsWaitTimeout) @@ -237,21 +244,30 @@ func TestEventMetrics(t *testing.T) { }) require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) - testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, subscriptionArgsCh, func(t *testing.T, args natsSubscriptionArgs) { - require.NoError(t, args.errValue) - require.JSONEq(t, `{"employeeUpdated":{"id":3,"details":{"forename":"Stefan","surname":"Avram"}}}`, string(args.dataValue)) - }) - - // Trigger the first subscription via NATS + // Trigger the second subscription via NATS err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename": "Employee"}`)) require.NoError(t, err) err = xEnv.NatsConnectionDefault.Flush() require.NoError(t, err) - testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, subscriptionArgsCh, func(t *testing.T, args natsSubscriptionArgs) { + testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdated":{"id":3,"details":{"forename":"Stefan","surname":"Avram"}}}`, string(args.dataValue)) + + mf, err := promRegistry.Gather() + require.NoError(t, err) + + family := findMetricFamilyByName(mf, "router_nats_messages_received_total") + metrics := family.GetMetric() + + eventProviderId := findMetricLabelByName(metrics, "wg_event_provider_id").GetValue() + require.Equal(t, "default", eventProviderId) + + subject := findMetricLabelByName(metrics, "wg_nats_subject") + require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdated.3")) + + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) require.NoError(t, client.Close()) @@ -261,109 +277,107 @@ func TestEventMetrics(t *testing.T) { xEnv.WaitForSubscriptionCount(0, events.NatsWaitTimeout) xEnv.WaitForConnectionCount(0, events.NatsWaitTimeout) - - natsLogs := xEnv.Observer().FilterMessageSnippet("Nats").All() - require.Len(t, natsLogs, 2) - providerIDFields := xEnv.Observer().FilterField(zap.String("provider_id", "my-nats")).All() - require.Len(t, providerIDFields, 3) - // - //payload := []byte(`{"query":"subscription { employeeUpdatedMyNats(id: 12) { id } }"}`) - //client := http.Client{} - //req := xEnv.MakeGraphQLMultipartRequest(http.MethodPost, bytes.NewReader(payload)) - //resp, gErr := client.Do(req) - //require.NoError(t, gErr) - //require.Equal(t, http.StatusOK, resp.StatusCode) - //defer resp.Body.Close() - //reader := bufio.NewReader(resp.Body) - //xEnv.WaitForSubscriptionCount(1, 30*time.Second) - //require.NoError(t, xEnv.NatsConnectionMyNats.Publish(xEnv.GetPubSubName("employeeUpdatedMyNats.12"), []byte(`{"id":12,"__typename":"Employee"}`))) - //require.NoError(t, xEnv.NatsConnectionMyNats.Flush()) - //_, _, _ = reader.ReadLine() - //_, _, _ = reader.ReadLine() - //_, _, _ = reader.ReadLine() - //_, _, _ = reader.ReadLine() - //_, _, _ = reader.ReadLine() - }) }) }) t.Run("redis", func(t *testing.T) { t.Run("publish", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() - testenv.Run(t, &testenv.Config{TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}}, func(t *testing.T, xEnv *testenv.Environment) { - res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r1"}) { success } }`}) - require.JSONEq(t, `{"data":{"updateEmployeeMyRedis":{"success":true}}}`, res.Body) - res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r2"}) { success } }`}) - require.JSONEq(t, `{"data":{"updateEmployeeMyRedis":{"success":true}}}`, res.Body) + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, + EnableRedis: true, + MetricOptions: testenv.MetricOptions{ + EnablePrometheusEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r1"}) { success } }`}) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r2"}) { success } }`}) + mf, err := promRegistry.Gather() require.NoError(t, err) + family := findMetricFamilyByName(mf, "router_redis_publish_messages_total") - require.NotNil(t, family) metrics := family.GetMetric() - require.NotEmpty(t, metrics) + require.Len(t, metrics, 1) + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - channel := findMetricLabelByName(metrics, "wg_redis_channel") require.Equal(t, "my-redis", eventProvider.GetValue()) + + channel := findMetricLabelByName(metrics, "wg_redis_channel") require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) t.Run("subscribe", func(t *testing.T) { - exporter := tracetest.NewInMemoryExporter(t) metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() - testenv.Run(t, &testenv.Config{TraceExporter: exporter, MetricReader: metricReader, PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}}, func(t *testing.T, xEnv *testenv.Environment) { - payload := []byte(`{"query":"subscription { employeeUpdatedMyRedis(id: 1) { id } }"}`) - client := http.Client{} - req := xEnv.MakeGraphQLMultipartRequest(http.MethodPost, bytes.NewReader(payload)) - resp, gErr := client.Do(req) - require.NoError(t, gErr) - require.Equal(t, http.StatusOK, resp.StatusCode) - defer resp.Body.Close() - reader := bufio.NewReader(resp.Body) - xEnv.WaitForSubscriptionCount(1, 30*time.Second) - res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 1, update: {name: "rr"}) { success } }`}) - require.JSONEq(t, `{"data":{"updateEmployeeMyRedis":{"success":true}}}`, res.Body) - _, _, _ = reader.ReadLine() - _, _, _ = reader.ReadLine() - _, _, _ = reader.ReadLine() - _, _, _ = reader.ReadLine() - _, _, _ = reader.ReadLine() - // Poll for metric family to appear - var family *io_prometheus_client.MetricFamily - deadline := time.Now().Add(5 * time.Second) - for time.Now().Before(deadline) { + + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + PrometheusRegistry: promRegistry, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, + EnableRedis: true, + MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + topic := "employeeUpdatedMyRedis" + + var subscriptionOne struct { + employeeUpdates struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdates"` + } + + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) + + subscriptionArgsCh := make(chan subscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- subscriptionArgs{dataValue, errValue} + return nil + }) + require.NoError(t, err) + require.NotEmpty(t, subscriptionOneID) + + runCh := make(chan error) + go func() { runCh <- client.Run() }() + + xEnv.WaitForSubscriptionCount(1, events.RedisWaitTimeout) + events.ProduceRedisMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + + testenv.AwaitChannelWithT(t, events.RedisWaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdates":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + mf, err := promRegistry.Gather() require.NoError(t, err) - family = findMetricFamilyByName(mf, "router_redis_messages_received_total") - if family != nil && len(family.GetMetric()) > 0 { - break - } - time.Sleep(100 * time.Millisecond) - } - require.NotNil(t, family) - metrics := family.GetMetric() - require.NotEmpty(t, metrics) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - channel := findMetricLabelByName(metrics, "wg_redis_channel") - require.Equal(t, "my-redis", eventProvider.GetValue()) - require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) - require.Equal(t, float64(1), metrics[0].Counter.GetValue()) + + family := findMetricFamilyByName(mf, "router_redis_messages_received_total") + metrics := family.GetMetric() + require.Len(t, metrics, 1) + + eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") + require.Equal(t, "my-redis", eventProvider.GetValue()) + + channel := findMetricLabelByName(metrics, "wg_redis_channel") + require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) + }) + + require.NoError(t, client.Close()) + testenv.AwaitChannelWithT(t, KafkaWaitTimeout, runCh, func(t *testing.T, err error) { require.NoError(t, err) }) }) }) }) } -// helpers reused from kafka test - -type kafkaSubscriptionArgs struct { - dataValue []byte - errValue error -} - const KafkaWaitTimeout = time.Second * 30 diff --git a/router-tests/events/redis_events_test.go b/router-tests/events/redis_events_test.go index aeaa1989be..a93fd8a11e 100644 --- a/router-tests/events/redis_events_test.go +++ b/router-tests/events/redis_events_test.go @@ -21,8 +21,6 @@ import ( "github.com/wundergraph/cosmo/router/pkg/config" ) -const RedisWaitTimeout = time.Second * 30 - func assertRedisLineEquals(t *testing.T, reader *bufio.Reader, expected string) { t.Helper() line, _, err := reader.ReadLine() @@ -104,7 +102,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // process the message select { @@ -170,7 +168,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce an empty message - produceRedisMessage(t, xEnv, topics[0], ``) + ProduceRedisMessage(t, xEnv, topics[0], ``) // process the message select { case subscriptionArgs := <-subscriptionArgsCh: @@ -181,7 +179,7 @@ func TestRedisEvents(t *testing.T) { t.Fatal("timeout waiting for first message error") } - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // Correct message select { case subscriptionArgs := <-subscriptionArgsCh: require.NoError(t, subscriptionArgs.errValue) @@ -191,7 +189,7 @@ func TestRedisEvents(t *testing.T) { } // Missing entity = Resolver error - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) select { case subscriptionArgs := <-subscriptionArgsCh: var gqlErr graphql.Errors @@ -202,7 +200,7 @@ func TestRedisEvents(t *testing.T) { } // Correct message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) select { case subscriptionArgs := <-subscriptionArgsCh: require.NoError(t, subscriptionArgs.errValue) @@ -273,7 +271,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(2, RedisWaitTimeout) // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // read the message from the first subscription select { @@ -354,7 +352,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(2, RedisWaitTimeout) // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // read the message from the first subscription select { @@ -375,7 +373,7 @@ func TestRedisEvents(t *testing.T) { } // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 2,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 2,"update":{"name":"foo"}}`) // read the message from the first subscription select { @@ -451,7 +449,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // read the message from the subscription select { @@ -509,12 +507,12 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // read the message from the subscription assertRedisMultipartValueEventually(t, reader, "{\"payload\":{\"data\":{\"employeeUpdates\":{\"id\":1,\"details\":{\"forename\":\"Jens\",\"surname\":\"Neuse\"}}}}}") // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // read the message from the subscription assertRedisMultipartValueEventually(t, reader, "{\"payload\":{\"data\":{\"employeeUpdates\":{\"id\":1,\"details\":{\"forename\":\"Jens\",\"surname\":\"Neuse\"}}}}}") }) @@ -590,7 +588,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce a message so that the subscription is triggered - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // get the client response var clientRet struct { @@ -663,7 +661,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce a message so that the subscription is triggered - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // get the client response var clientRet struct { @@ -792,7 +790,7 @@ func TestRedisEvents(t *testing.T) { // Events 1, 3, 4, 7, and 11 should be included for i := MsgCount; i > 0; i-- { - produceRedisMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) + ProduceRedisMessage(t, xEnv, topics[0], fmt.Sprintf(`{"__typename":"Employee","id":%d}`, i)) if i == 11 || i == 7 || i == 4 || i == 3 || i == 1 { gErr := conn.ReadJSON(&msg) @@ -853,7 +851,7 @@ func TestRedisEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce an invalid message - produceRedisMessage(t, xEnv, topics[0], `{asas`) + ProduceRedisMessage(t, xEnv, topics[0], `{asas`) // get the client response select { case args := <-subscriptionOneArgsCh: @@ -865,7 +863,7 @@ func TestRedisEvents(t *testing.T) { } // produce a correct message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":1}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id":1}`) // get the client response select { case args := <-subscriptionOneArgsCh: @@ -876,7 +874,7 @@ func TestRedisEvents(t *testing.T) { } // produce a message with a missing entity - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","update":{"name":"foo"}}`) // get the client response select { case args := <-subscriptionOneArgsCh: @@ -888,7 +886,7 @@ func TestRedisEvents(t *testing.T) { } // produce a correct message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // get the client response select { case args := <-subscriptionOneArgsCh: @@ -991,7 +989,7 @@ func TestRedisClusterEvents(t *testing.T) { xEnv.WaitForSubscriptionCount(1, RedisWaitTimeout) // produce a message - produceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + ProduceRedisMessage(t, xEnv, topics[0], `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) // read the message select { @@ -1047,29 +1045,6 @@ func TestRedisClusterEvents(t *testing.T) { } -func produceRedisMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - parsedURL, err := url.Parse(xEnv.RedisHosts[0]) - if err != nil { - t.Fatalf("Failed to parse Redis URL: %v", err) - } - var redisConn redis.UniversalClient - if !xEnv.RedisWithClusterMode { - redisConn = redis.NewClient(&redis.Options{ - Addr: parsedURL.Host, - }) - } else { - redisConn = redis.NewClusterClient(&redis.ClusterOptions{ - Addrs: []string{parsedURL.Host}, - }) - } - - intCmd := redisConn.Publish(ctx, xEnv.GetPubSubName(topicName), message) - require.NoError(t, intCmd.Err()) -} - func readRedisMessages(t *testing.T, xEnv *testenv.Environment, channelName string) (<-chan *redis.Message, error) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() diff --git a/router-tests/events/util.go b/router-tests/events/util.go index a0e0a8d9dd..a0033c06b6 100644 --- a/router-tests/events/util.go +++ b/router-tests/events/util.go @@ -2,15 +2,18 @@ package events import ( "context" + "github.com/redis/go-redis/v9" "github.com/stretchr/testify/require" "github.com/twmb/franz-go/pkg/kgo" "github.com/wundergraph/cosmo/router-tests/testenv" + "net/url" "testing" "time" ) const KafkaWaitTimeout = time.Second * 30 const NatsWaitTimeout = time.Second * 30 +const RedisWaitTimeout = time.Second * 30 func ProduceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) @@ -51,3 +54,26 @@ func EnsureTopicExists(t *testing.T, xEnv *testenv.Environment, topics ...string _, err = xEnv.KafkaAdminClient.CreateTopics(ctx, 1, 1, nil, prefixedTopics...) require.NoError(t, err) } + +func ProduceRedisMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + parsedURL, err := url.Parse(xEnv.RedisHosts[0]) + if err != nil { + t.Fatalf("Failed to parse Redis URL: %v", err) + } + var redisConn redis.UniversalClient + if !xEnv.RedisWithClusterMode { + redisConn = redis.NewClient(&redis.Options{ + Addr: parsedURL.Host, + }) + } else { + redisConn = redis.NewClusterClient(&redis.ClusterOptions{ + Addrs: []string{parsedURL.Host}, + }) + } + + intCmd := redisConn.Publish(ctx, xEnv.GetPubSubName(topicName), message) + require.NoError(t, intCmd.Err()) +} diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 3924d08baf..835c48cd53 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -2822,7 +2822,9 @@ func subgraphOptions(ctx context.Context, t testing.TB, logger *zap.Logger, nats } natsPubSubByProviderID := make(map[string]pubsubNats.Adapter, len(DemoNatsProviders)) for _, sourceName := range DemoNatsProviders { - adapter, err := pubsubNats.NewAdapter(ctx, logger, natsData.Params[0].Url, natsData.Params[0].Opts, "hostname", "listenaddr", datasource.ProviderOpts{}) + adapter, err := pubsubNats.NewAdapter(ctx, logger, natsData.Params[0].Url, natsData.Params[0].Opts, "hostname", "listenaddr", datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) require.NoError(t, err) require.NoError(t, adapter.Startup(ctx)) t.Cleanup(func() { From 3f3f5d329999fa0786c370805ce1793ae65359b5 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 20:56:00 +0530 Subject: [PATCH 09/40] fix: tests --- .../events/{util.go => event_helpers.go} | 6 +- router-tests/events/kafka_events_test.go | 2 + router-tests/events/nats_events_test.go | 2 + router-tests/events/redis_events_test.go | 2 + ...st.go => prometheus_event_metrics_test.go} | 56 ++++--- router-tests/testenv/testenv.go | 4 +- router/pkg/metric/event_measurements.go | 156 ++++-------------- router/pkg/metric/event_metric_store.go | 106 +++++++----- router/pkg/metric/noop_event_metrics.go | 37 +---- router/pkg/metric/oltp_event_metric_store.go | 41 +---- router/pkg/metric/prom_event_metric_store.go | 41 +---- router/pkg/otel/attributes.go | 9 +- 12 files changed, 178 insertions(+), 284 deletions(-) rename router-tests/events/{util.go => event_helpers.go} (90%) rename router-tests/{event_metrics_prometheus_test.go => prometheus_event_metrics_test.go} (85%) diff --git a/router-tests/events/util.go b/router-tests/events/event_helpers.go similarity index 90% rename from router-tests/events/util.go rename to router-tests/events/event_helpers.go index a0033c06b6..a1bb8386a2 100644 --- a/router-tests/events/util.go +++ b/router-tests/events/event_helpers.go @@ -11,9 +11,7 @@ import ( "time" ) -const KafkaWaitTimeout = time.Second * 30 -const NatsWaitTimeout = time.Second * 30 -const RedisWaitTimeout = time.Second * 30 +const waitTimeout = time.Second * 30 func ProduceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName string, message string) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) @@ -28,7 +26,7 @@ func ProduceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName stri pErrCh <- err }) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, pErrCh, func(t *testing.T, pErr error) { + testenv.AwaitChannelWithT(t, waitTimeout, pErrCh, func(t *testing.T, pErr error) { require.NoError(t, pErr) }) diff --git a/router-tests/events/kafka_events_test.go b/router-tests/events/kafka_events_test.go index 8e98b25c11..dbc17f870a 100644 --- a/router-tests/events/kafka_events_test.go +++ b/router-tests/events/kafka_events_test.go @@ -21,6 +21,8 @@ import ( "github.com/wundergraph/cosmo/router/pkg/config" ) +const KafkaWaitTimeout = time.Second * 30 + func assertKafkaLineEquals(t *testing.T, reader *bufio.Reader, expected string) { t.Helper() line, _, err := reader.ReadLine() diff --git a/router-tests/events/nats_events_test.go b/router-tests/events/nats_events_test.go index d34c65ca62..a679b9fe7c 100644 --- a/router-tests/events/nats_events_test.go +++ b/router-tests/events/nats_events_test.go @@ -26,6 +26,8 @@ import ( "github.com/wundergraph/cosmo/router-tests/testenv" ) +const NatsWaitTimeout = time.Second * 30 + func assertNatsLineEquals(t *testing.T, reader *bufio.Reader, expected string) { t.Helper() line, _, err := reader.ReadLine() diff --git a/router-tests/events/redis_events_test.go b/router-tests/events/redis_events_test.go index a93fd8a11e..1c287f7d6f 100644 --- a/router-tests/events/redis_events_test.go +++ b/router-tests/events/redis_events_test.go @@ -21,6 +21,8 @@ import ( "github.com/wundergraph/cosmo/router/pkg/config" ) +const RedisWaitTimeout = time.Second * 30 + func assertRedisLineEquals(t *testing.T, reader *bufio.Reader, expected string) { t.Helper() line, _, err := reader.ReadLine() diff --git a/router-tests/event_metrics_prometheus_test.go b/router-tests/prometheus_event_metrics_test.go similarity index 85% rename from router-tests/event_metrics_prometheus_test.go rename to router-tests/prometheus_event_metrics_test.go index c7bfbea65d..07b4b7ac76 100644 --- a/router-tests/event_metrics_prometheus_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -21,6 +21,8 @@ type subscriptionArgs struct { errValue error } +const WaitTimeout = time.Second * 30 + func TestEventMetrics(t *testing.T) { t.Run("kafka", func(t *testing.T) { t.Run("publish", func(t *testing.T) { @@ -43,13 +45,16 @@ func TestEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_kafka_publish_messages_total") + family := findMetricFamilyByName(mf, "router_events_publish_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") require.Equal(t, "my-kafka", eventProvider.GetValue()) + providerType := findMetricLabelByName(metrics, "wg_event_provider_type") + require.Equal(t, "kafka", providerType.GetValue()) + topic := findMetricLabelByName(metrics, "wg_kafka_topic") require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) @@ -93,24 +98,27 @@ func TestEventMetrics(t *testing.T) { require.NotEmpty(t, subscriptionOneID) clientRunCh := make(chan error) go func() { clientRunCh <- client.Run() }() - xEnv.WaitForSubscriptionCount(1, KafkaWaitTimeout) + xEnv.WaitForSubscriptionCount(1, WaitTimeout) events.ProduceKafkaMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + testenv.AwaitChannelWithT(t, WaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_kafka_messages_received_total") + family := findMetricFamilyByName(mf, "router_events_messages_received_total") metrics := family.GetMetric() require.Len(t, metrics, 1) eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") require.Equal(t, "my-kafka", eventProvider.GetValue()) + providerType := findMetricLabelByName(metrics, "wg_event_provider_type") + require.Equal(t, "kafka", providerType.GetValue()) + topic := findMetricLabelByName(metrics, "wg_kafka_topic") require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) @@ -118,7 +126,7 @@ func TestEventMetrics(t *testing.T) { }) require.NoError(t, client.Close()) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, clientRunCh, func(t *testing.T, err error) { require.NoError(t, err) }) + testenv.AwaitChannelWithT(t, WaitTimeout, clientRunCh, func(t *testing.T, err error) { require.NoError(t, err) }) }) }) }) @@ -146,13 +154,16 @@ func TestEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_nats_publish_messages_total") + family := findMetricFamilyByName(mf, "router_events_publish_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") require.Equal(t, "my-nats", eventProvider.GetValue()) + providerType := findMetricLabelByName(metrics, "wg_event_provider_type") + require.Equal(t, "nats", providerType.GetValue()) + subject := findMetricLabelByName(metrics, "wg_nats_subject") require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdatedMyNats.12")) @@ -236,7 +247,7 @@ func TestEventMetrics(t *testing.T) { clientRunErrCh <- client.Run() }() - xEnv.WaitForSubscriptionCount(1, events.NatsWaitTimeout) + xEnv.WaitForSubscriptionCount(1, WaitTimeout) // Send a mutation to trigger the first subscription resOne := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ @@ -251,19 +262,22 @@ func TestEventMetrics(t *testing.T) { err = xEnv.NatsConnectionDefault.Flush() require.NoError(t, err) - testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + testenv.AwaitChannelWithT(t, WaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdated":{"id":3,"details":{"forename":"Stefan","surname":"Avram"}}}`, string(args.dataValue)) mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_nats_messages_received_total") + family := findMetricFamilyByName(mf, "router_events_messages_received_total") metrics := family.GetMetric() eventProviderId := findMetricLabelByName(metrics, "wg_event_provider_id").GetValue() require.Equal(t, "default", eventProviderId) + providerType := findMetricLabelByName(metrics, "wg_event_provider_type") + require.Equal(t, "nats", providerType.GetValue()) + subject := findMetricLabelByName(metrics, "wg_nats_subject") require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdated.3")) @@ -271,12 +285,12 @@ func TestEventMetrics(t *testing.T) { }) require.NoError(t, client.Close()) - testenv.AwaitChannelWithT(t, events.NatsWaitTimeout, clientRunErrCh, func(t *testing.T, err error) { + testenv.AwaitChannelWithT(t, WaitTimeout, clientRunErrCh, func(t *testing.T, err error) { require.NoError(t, err) }, "unable to close client before timeout") - xEnv.WaitForSubscriptionCount(0, events.NatsWaitTimeout) - xEnv.WaitForConnectionCount(0, events.NatsWaitTimeout) + xEnv.WaitForSubscriptionCount(0, WaitTimeout) + xEnv.WaitForConnectionCount(0, WaitTimeout) }) }) }) @@ -301,13 +315,16 @@ func TestEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_redis_publish_messages_total") + family := findMetricFamilyByName(mf, "router_events_publish_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") require.Equal(t, "my-redis", eventProvider.GetValue()) + providerType := findMetricLabelByName(metrics, "wg_event_provider_type") + require.Equal(t, "redis", providerType.GetValue()) + channel := findMetricLabelByName(metrics, "wg_redis_channel") require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) @@ -351,33 +368,34 @@ func TestEventMetrics(t *testing.T) { runCh := make(chan error) go func() { runCh <- client.Run() }() - xEnv.WaitForSubscriptionCount(1, events.RedisWaitTimeout) + xEnv.WaitForSubscriptionCount(1, WaitTimeout) events.ProduceRedisMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) - testenv.AwaitChannelWithT(t, events.RedisWaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + testenv.AwaitChannelWithT(t, WaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { require.NoError(t, args.errValue) require.JSONEq(t, `{"employeeUpdates":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_redis_messages_received_total") + family := findMetricFamilyByName(mf, "router_events_messages_received_total") metrics := family.GetMetric() require.Len(t, metrics, 1) eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") require.Equal(t, "my-redis", eventProvider.GetValue()) + providerType := findMetricLabelByName(metrics, "wg_event_provider_type") + require.Equal(t, "redis", providerType.GetValue()) + channel := findMetricLabelByName(metrics, "wg_redis_channel") require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) require.NoError(t, client.Close()) - testenv.AwaitChannelWithT(t, KafkaWaitTimeout, runCh, func(t *testing.T, err error) { require.NoError(t, err) }) + testenv.AwaitChannelWithT(t, WaitTimeout, runCh, func(t *testing.T, err error) { require.NoError(t, err) }) }) }) }) } - -const KafkaWaitTimeout = time.Second * 30 diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 835c48cd53..7aaedbbee3 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -408,7 +408,7 @@ func CreateTestSupervisorEnv(t testing.TB, cfg *Config) (*Environment, error) { ) if cfg.EnableKafka { - cfg.KafkaSeeds = []string{"localhost:9092"} + cfg.KafkaSeeds = []string{"localhost:9095"} client, err := kgo.NewClient( kgo.SeedBrokers(cfg.KafkaSeeds...), @@ -835,7 +835,7 @@ func CreateTestEnv(t testing.TB, cfg *Config) (*Environment, error) { ) if cfg.EnableKafka { - cfg.KafkaSeeds = []string{"localhost:9092"} + cfg.KafkaSeeds = []string{"localhost:9095"} client, err := kgo.NewClient( kgo.SeedBrokers(cfg.KafkaSeeds...), kgo.FetchMaxWait(time.Millisecond*100), diff --git a/router/pkg/metric/event_measurements.go b/router/pkg/metric/event_measurements.go index e12bae8291..c3a5a2733b 100644 --- a/router/pkg/metric/event_measurements.go +++ b/router/pkg/metric/event_measurements.go @@ -8,50 +8,25 @@ import ( // Event (Kafka/Redis/NATS) metric constants const ( - kafkaPublishMessages = "router.kafka.publish.messages" - kafkaPublishFailures = "router.kafka.publish.fail" - kafkaMessagesReceived = "router.kafka.messages.received" - - redisPublishMessages = "router.redis.publish.messages" - redisPublishFailures = "router.redis.publish.fail" - redisMessagesReceived = "router.redis.messages.received" - - natsPublishMessages = "router.nats.publish.messages" - natsPublishFailures = "router.nats.publish.fail" - natsMessagesReceived = "router.nats.messages.received" - natsRequests = "router.nats.request" - natsRequestFailures = "router.nats.request.fail" + // unified counters across providers; provider type captured via attributes + eventsPublishMessages = "router.events.publish.messages" + eventsPublishFailures = "router.events.publish.fail" + eventsMessagesReceived = "router.events.messages.received" + + // keep nats request metrics separate as they are not generic publish/receive + natsRequests = "router.nats.request" + natsRequestFailures = "router.nats.request.fail" ) var ( - kafkaPublishMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of Kafka messages published"), - } - kafkaPublishFailuresOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of Kafka publish failures"), - } - kafkaMessagesReceivedOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of Kafka messages received"), - } - - redisPublishMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of Redis messages published"), - } - redisPublishFailuresOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of Redis publish failures"), - } - redisMessagesReceivedOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of Redis messages received"), - } - - natsPublishMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of NATS messages published"), + eventsPublishMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of event messages published"), } - natsPublishFailuresOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of NATS publish failures"), + eventsPublishFailuresOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of event publish failures"), } - natsMessagesReceivedOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of NATS messages received"), + eventsMessagesReceivedOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of event messages received"), } // New NATS request counter options @@ -64,94 +39,39 @@ var ( ) type eventInstruments struct { - kafkaPublishMessages otelmetric.Int64Counter - kafkaPublishFailures otelmetric.Int64Counter - kafkaMessagesReceived otelmetric.Int64Counter + // unified instruments + publishMessages otelmetric.Int64Counter + publishFailures otelmetric.Int64Counter + messagesReceived otelmetric.Int64Counter - redisPublishMessages otelmetric.Int64Counter - redisPublishFailures otelmetric.Int64Counter - redisMessagesReceived otelmetric.Int64Counter - - natsPublishMessages otelmetric.Int64Counter - natsPublishFailures otelmetric.Int64Counter - natsMessagesReceived otelmetric.Int64Counter - - // New NATS request instruments + // NATS request instruments natsRequests otelmetric.Int64Counter natsRequestFailures otelmetric.Int64Counter } func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { - kafkaPublishMessagesCounter, err := meter.Int64Counter( - kafkaPublishMessages, - kafkaPublishMessagesOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create kafka publish messages counter: %w", err) - } - - kafkaPublishFailuresCounter, err := meter.Int64Counter( - kafkaPublishFailures, - kafkaPublishFailuresOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create kafka publish failures counter: %w", err) - } - - kafkaMessagesReceivedCounter, err := meter.Int64Counter( - kafkaMessagesReceived, - kafkaMessagesReceivedOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create kafka messages received counter: %w", err) - } - - redisPublishMessagesCounter, err := meter.Int64Counter( - redisPublishMessages, - redisPublishMessagesOptions..., + publishMessagesCounter, err := meter.Int64Counter( + eventsPublishMessages, + eventsPublishMessagesOptions..., ) if err != nil { - return nil, fmt.Errorf("failed to create redis publish messages counter: %w", err) + return nil, fmt.Errorf("failed to create publish messages counter: %w", err) } - redisPublishFailuresCounter, err := meter.Int64Counter( - redisPublishFailures, - redisPublishFailuresOptions..., + publishFailuresCounter, err := meter.Int64Counter( + eventsPublishFailures, + eventsPublishFailuresOptions..., ) if err != nil { - return nil, fmt.Errorf("failed to create redis publish failures counter: %w", err) + return nil, fmt.Errorf("failed to create publish failures counter: %w", err) } - redisMessagesReceivedCounter, err := meter.Int64Counter( - redisMessagesReceived, - redisMessagesReceivedOptions..., + messagesReceivedCounter, err := meter.Int64Counter( + eventsMessagesReceived, + eventsMessagesReceivedOptions..., ) if err != nil { - return nil, fmt.Errorf("failed to create redis messages received counter: %w", err) - } - - natsPublishMessagesCounter, err := meter.Int64Counter( - natsPublishMessages, - natsPublishMessagesOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create nats publish messages counter: %w", err) - } - - natsPublishFailuresCounter, err := meter.Int64Counter( - natsPublishFailures, - natsPublishFailuresOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create nats publish failures counter: %w", err) - } - - natsMessagesReceivedCounter, err := meter.Int64Counter( - natsMessagesReceived, - natsMessagesReceivedOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create nats messages received counter: %w", err) + return nil, fmt.Errorf("failed to create messages received counter: %w", err) } // New NATS request counters @@ -172,17 +92,9 @@ func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { } return &eventInstruments{ - kafkaPublishMessages: kafkaPublishMessagesCounter, - kafkaPublishFailures: kafkaPublishFailuresCounter, - kafkaMessagesReceived: kafkaMessagesReceivedCounter, - - redisPublishMessages: redisPublishMessagesCounter, - redisPublishFailures: redisPublishFailuresCounter, - redisMessagesReceived: redisMessagesReceivedCounter, - - natsPublishMessages: natsPublishMessagesCounter, - natsPublishFailures: natsPublishFailuresCounter, - natsMessagesReceived: natsMessagesReceivedCounter, + publishMessages: publishMessagesCounter, + publishFailures: publishFailuresCounter, + messagesReceived: messagesReceivedCounter, // NATS request instruments natsRequests: natsRequestsCounter, diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index 3afef1c173..6871b00ed8 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -16,18 +16,12 @@ import ( // EventMetricProvider is the interface that wraps the basic Event metric methods. // We maintain two providers, one for OTEL and one for Prometheus. type EventMetricProvider interface { - KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) - KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) - KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) - - RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) - RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) - RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) - - NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) - NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) - NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) + // unified publish/receive for brokers (kafka, redis, nats) + Publish(ctx context.Context, opts ...otelmetric.AddOption) + PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) + MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) + // keep NATS request separate NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) @@ -96,57 +90,93 @@ func (e *EventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOpti } func (e *EventMetrics) KafkaPublish(ctx context.Context, providerID string, topic string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic)) - e.otlpMetrics.KafkaPublish(ctx, opts) - e.promMetrics.KafkaPublish(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("kafka"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgKafkaTopic.String(topic), + ) + e.otlpMetrics.Publish(ctx, opts) + e.promMetrics.Publish(ctx, opts) } func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic)) - e.otlpMetrics.KafkaPublishFailure(ctx, opts) - e.promMetrics.KafkaPublishFailure(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("kafka"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgKafkaTopic.String(topic), + ) + e.otlpMetrics.PublishFailure(ctx, opts) + e.promMetrics.PublishFailure(ctx, opts) } func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic)) - e.otlpMetrics.KafkaMessageReceived(ctx, opts) - e.promMetrics.KafkaMessageReceived(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("kafka"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgKafkaTopic.String(topic), + ) + e.otlpMetrics.MessagesReceived(ctx, opts) + e.promMetrics.MessagesReceived(ctx, opts) } func (e *EventMetrics) RedisPublish(ctx context.Context, providerID string, channel string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel)) - e.otlpMetrics.RedisPublish(ctx, opts) - e.promMetrics.RedisPublish(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("redis"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgRedisChannel.String(channel), + ) + e.otlpMetrics.Publish(ctx, opts) + e.promMetrics.Publish(ctx, opts) } func (e *EventMetrics) RedisPublishFailure(ctx context.Context, providerID string, channel string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel)) - e.otlpMetrics.RedisPublishFailure(ctx, opts) - e.promMetrics.RedisPublishFailure(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("redis"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgRedisChannel.String(channel), + ) + e.otlpMetrics.PublishFailure(ctx, opts) + e.promMetrics.PublishFailure(ctx, opts) } func (e *EventMetrics) RedisMessageReceived(ctx context.Context, providerID string, channel string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel)) - e.otlpMetrics.RedisMessageReceived(ctx, opts) - e.promMetrics.RedisMessageReceived(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("redis"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgRedisChannel.String(channel), + ) + e.otlpMetrics.MessagesReceived(ctx, opts) + e.promMetrics.MessagesReceived(ctx, opts) } func (e *EventMetrics) NatsPublish(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) - e.otlpMetrics.NatsPublish(ctx, opts) - e.promMetrics.NatsPublish(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("nats"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgNatsSubject.String(subject), + ) + e.otlpMetrics.Publish(ctx, opts) + e.promMetrics.Publish(ctx, opts) } func (e *EventMetrics) NatsPublishFailure(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) - e.otlpMetrics.NatsPublishFailure(ctx, opts) - e.promMetrics.NatsPublishFailure(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("nats"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgNatsSubject.String(subject), + ) + e.otlpMetrics.PublishFailure(ctx, opts) + e.promMetrics.PublishFailure(ctx, opts) } func (e *EventMetrics) NatsMessageReceived(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) - e.otlpMetrics.NatsMessageReceived(ctx, opts) - e.promMetrics.NatsMessageReceived(ctx, opts) + opts := e.withAttrs( + otelattrs.WgEventProviderType.String("nats"), + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgNatsSubject.String(subject), + ) + e.otlpMetrics.MessagesReceived(ctx, opts) + e.promMetrics.MessagesReceived(ctx, opts) } func (e *EventMetrics) NatsRequest(ctx context.Context, providerID string, subject string) { diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index 246dd5b28a..dd1ee441b7 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -9,20 +9,9 @@ import ( // A noop metric provider so we do not need to do nil checks for each provider call from the store type noopEventMetricProvider struct{} -func (n *noopEventMetricProvider) KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { -} -func (n *noopEventMetricProvider) KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { -} -func (n *noopEventMetricProvider) RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { -} -func (n *noopEventMetricProvider) RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { -} -func (n *noopEventMetricProvider) NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { -} -func (n *noopEventMetricProvider) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { +func (n *noopEventMetricProvider) Publish(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) { } func (n *noopEventMetricProvider) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) {} @@ -34,42 +23,28 @@ func (n *noopEventMetricProvider) Shutdown() error { return nil type NoopEventMetricStore struct{} func (n *NoopEventMetricStore) KafkaPublish(ctx context.Context, providerID string, topic string) {} - func (n *NoopEventMetricStore) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { } - func (n *NoopEventMetricStore) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { } func (n *NoopEventMetricStore) RedisPublish(ctx context.Context, providerID string, channel string) {} - func (n *NoopEventMetricStore) RedisPublishFailure(ctx context.Context, providerID string, channel string) { } - func (n *NoopEventMetricStore) RedisMessageReceived(ctx context.Context, providerID string, channel string) { } func (n *NoopEventMetricStore) NatsPublish(ctx context.Context, providerID string, subject string) {} - func (n *NoopEventMetricStore) NatsPublishFailure(ctx context.Context, providerID string, subject string) { } - func (n *NoopEventMetricStore) NatsMessageReceived(ctx context.Context, providerID string, subject string) { } func (n *NoopEventMetricStore) NatsRequest(ctx context.Context, providerID string, subject string) {} - func (n *NoopEventMetricStore) NatsRequestFailure(ctx context.Context, providerID string, subject string) { } -func (n *NoopEventMetricStore) Flush(ctx context.Context) error { - return nil -} - -func (n *NoopEventMetricStore) Shutdown(ctx context.Context) error { - return nil -} +func (n *NoopEventMetricStore) Flush(ctx context.Context) error { return nil } +func (n *NoopEventMetricStore) Shutdown(ctx context.Context) error { return nil } -func NewNoopEventMetricStore() *NoopEventMetricStore { - return &NoopEventMetricStore{} -} +func NewNoopEventMetricStore() *NoopEventMetricStore { return &NoopEventMetricStore{} } diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go index fd494af0a0..59e8cad83f 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -2,7 +2,6 @@ package metric import ( "context" - "fmt" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/sdk/metric" @@ -29,7 +28,7 @@ func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider instruments, err := newEventInstruments(meter) if err != nil { - return nil, fmt.Errorf("failed to create otlp event instruments: %w", err) + return nil, err } return &otlpEventMetrics{ @@ -40,42 +39,20 @@ func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider }, nil } -func (o *otlpEventMetrics) KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.kafkaPublishMessages.Add(ctx, 1, opts...) +// Unified methods +func (o *otlpEventMetrics) Publish(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.publishMessages.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.kafkaPublishFailures.Add(ctx, 1, opts...) +func (o *otlpEventMetrics) PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.publishFailures.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.kafkaMessagesReceived.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.redisPublishMessages.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.redisPublishFailures.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.redisMessagesReceived.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.natsPublishMessages.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.natsPublishFailures.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.natsMessagesReceived.Add(ctx, 1, opts...) +func (o *otlpEventMetrics) MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.messagesReceived.Add(ctx, 1, opts...) } +// Keep NATS request methods func (o *otlpEventMetrics) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) { o.instruments.natsRequests.Add(ctx, 1, opts...) } diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go index 5e87561657..fae8755c94 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_event_metric_store.go @@ -2,7 +2,6 @@ package metric import ( "context" - "fmt" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/sdk/metric" @@ -29,7 +28,7 @@ func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider instruments, err := newEventInstruments(meter) if err != nil { - return nil, fmt.Errorf("failed to create prometheus event instruments: %w", err) + return nil, err } return &promEventMetrics{ @@ -40,42 +39,20 @@ func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider }, nil } -func (p *promEventMetrics) KafkaPublish(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.kafkaPublishMessages.Add(ctx, 1, opts...) +// Unified methods +func (p *promEventMetrics) Publish(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.publishMessages.Add(ctx, 1, opts...) } -func (p *promEventMetrics) KafkaPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.kafkaPublishFailures.Add(ctx, 1, opts...) +func (p *promEventMetrics) PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.publishFailures.Add(ctx, 1, opts...) } -func (p *promEventMetrics) KafkaMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.kafkaMessagesReceived.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) RedisPublish(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.redisPublishMessages.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) RedisPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.redisPublishFailures.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) RedisMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.redisMessagesReceived.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) NatsPublish(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.natsPublishMessages.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) NatsPublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.natsPublishFailures.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) NatsMessageReceived(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.natsMessagesReceived.Add(ctx, 1, opts...) +func (p *promEventMetrics) MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.messagesReceived.Add(ctx, 1, opts...) } +// NATS request methods remain func (p *promEventMetrics) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) { p.instruments.natsRequests.Add(ctx, 1, opts...) } diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index f3abcfe015..7d3e63c1de 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -62,10 +62,11 @@ const ( // Event metrics attributes const ( - WgEventProviderID = attribute.Key("wg.event.provider.id") - WgKafkaTopic = attribute.Key("wg.kafka.topic") - WgNatsSubject = attribute.Key("wg.nats.subject") - WgRedisChannel = attribute.Key("wg.redis.channel") + WgEventProviderID = attribute.Key("wg.event.provider.id") + WgEventProviderType = attribute.Key("wg.event.provider.type") + WgKafkaTopic = attribute.Key("wg.kafka.topic") + WgNatsSubject = attribute.Key("wg.nats.subject") + WgRedisChannel = attribute.Key("wg.redis.channel") ) const ( From 7f8e86994bd528553ad5ed8ebf0bc16972c19196 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 21:17:44 +0530 Subject: [PATCH 10/40] fix: tests --- router-tests/telemetry/event_metrics_test.go | 441 +++++++++++++++++++ router-tests/testenv/testenv.go | 4 +- 2 files changed, 443 insertions(+), 2 deletions(-) create mode 100644 router-tests/telemetry/event_metrics_test.go diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go new file mode 100644 index 0000000000..8abc699d20 --- /dev/null +++ b/router-tests/telemetry/event_metrics_test.go @@ -0,0 +1,441 @@ +package telemetry + +import ( + "context" + "encoding/json" + "strings" + "testing" + "time" + + "github.com/hasura/go-graphql-client" + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/require" + integration "github.com/wundergraph/cosmo/router-tests" + "github.com/wundergraph/cosmo/router-tests/events" + "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/pkg/config" + otelattrs "github.com/wundergraph/cosmo/router/pkg/otel" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +type subscriptionArgs struct { + dataValue []byte + errValue error +} + +const WaitTimeout = time.Second * 30 + +func TestOTLEventMetrics(t *testing.T) { + t.Run("kafka", func(t *testing.T) { + t.Run("publish", func(t *testing.T) { + metricReader := metric.NewManualReader() + + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, + EnableKafka: true, + MetricOptions: testenv.MetricOptions{ + EnableOTLPEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + events.EnsureTopicExists(t, xEnv, "employeeUpdated") + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyKafka(employeeID: 3, update: {name: "name test"}) { success } }`}) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.events.publish.messages") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-kafka", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "kafka", eventProviderType.AsString()) + + kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) + require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + + require.Equal(t, int64(2), dataPoint.Value) + }) + }) + + t.Run("subscribe", func(t *testing.T) { + metricReader := metric.NewManualReader() + topic := "employeeUpdated" + + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, + EnableKafka: true, + MetricOptions: testenv.MetricOptions{ + EnableOTLPEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + events.EnsureTopicExists(t, xEnv, topic) + + var subscriptionOne struct { + employeeUpdatedMyKafka struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdatedMyKafka(employeeID: 3)"` + } + + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) + subscriptionArgsCh := make(chan subscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- subscriptionArgs{dataValue: dataValue, errValue: errValue} + return nil + }) + require.NoError(t, err) + require.NotEmpty(t, subscriptionOneID) + clientRunCh := make(chan error) + go func() { clientRunCh <- client.Run() }() + xEnv.WaitForSubscriptionCount(1, WaitTimeout) + + events.ProduceKafkaMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + + testenv.AwaitChannelWithT(t, WaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdatedMyKafka":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.events.messages.received") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-kafka", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "kafka", eventProviderType.AsString()) + + kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) + require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + + require.Equal(t, int64(1), dataPoint.Value) + }) + + require.NoError(t, client.Close()) + testenv.AwaitChannelWithT(t, WaitTimeout, clientRunCh, func(t *testing.T, err error) { require.NoError(t, err) }) + }) + }) + }) + + t.Run("nats", func(t *testing.T) { + t.Run("publish", func(t *testing.T) { + metricReader := metric.NewManualReader() + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, + EnableNats: true, + MetricOptions: testenv.MetricOptions{ + EnableOTLPEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { + updateEmployeeMyNats(id: 12, update: $update) {success} + }`, Variables: json.RawMessage(`{"update":{"name":"n1"}}`)}) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { + updateEmployeeMyNats(id: 12, update: $update) {success} + }`, Variables: json.RawMessage(`{"update":{"name":"n2"}}`)}) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.events.publish.messages") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-nats", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "nats", eventProviderType.AsString()) + + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdatedMyNats.12")) + + require.Equal(t, int64(2), dataPoint.Value) + }) + }) + + t.Run("request", func(t *testing.T) { + metricReader := metric.NewManualReader() + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, + EnableNats: true, + MetricOptions: testenv.MetricOptions{ + EnableOTLPEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + sub, err := xEnv.NatsConnectionMyNats.Subscribe(xEnv.GetPubSubName("getEmployeeMyNats.12"), func(msg *nats.Msg) { _ = msg.Respond([]byte(`{"id": 12, "__typename": "Employee"}`)) }) + require.NoError(t, err) + require.NoError(t, xEnv.NatsConnectionMyNats.Flush()) + t.Cleanup(func() { _ = sub.Unsubscribe() }) + + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `query { employeeFromEventMyNats(employeeID: 12) { id details { forename } }}`}) + require.JSONEq(t, `{"data":{"employeeFromEventMyNats": {"id": 12, "details": {"forename": "David"}}}}`, res.Body) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.nats.request") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-nats", eventProviderId.AsString()) + + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + require.True(t, strings.HasSuffix(natsSubject.AsString(), "getEmployeeMyNats.12")) + + require.Equal(t, int64(1), dataPoint.Value) + }) + }) + + t.Run("subscribe", func(t *testing.T) { + metricReader := metric.NewManualReader() + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, + EnableNats: true, + ModifyEngineExecutionConfiguration: func(ec *config.EngineExecutionConfiguration) { ec.WebSocketClientReadTimeout = time.Second }, + MetricOptions: testenv.MetricOptions{EnableOTLPEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + var subscriptionOne struct { + employeeUpdated struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdated(employeeID: 3)"` + } + + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) + + subscriptionArgsCh := make(chan subscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- subscriptionArgs{ + dataValue: dataValue, + errValue: errValue, + } + return nil + }) + require.NoError(t, err) + require.NotEmpty(t, subscriptionOneID) + + clientRunErrCh := make(chan error) + go func() { + clientRunErrCh <- client.Run() + }() + + xEnv.WaitForSubscriptionCount(1, WaitTimeout) + + // Send a mutation to trigger the first subscription + resOne := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Query: `mutation { updateAvailability(employeeID: 3, isAvailable: true) { id } }`, + }) + require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) + + // Trigger the second subscription via NATS + err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename": "Employee"}`)) + require.NoError(t, err) + + err = xEnv.NatsConnectionDefault.Flush() + require.NoError(t, err) + + testenv.AwaitChannelWithT(t, WaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdated":{"id":3,"details":{"forename":"Stefan","surname":"Avram"}}}`, string(args.dataValue)) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.events.messages.received") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "default", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "nats", eventProviderType.AsString()) + + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdated.3")) + + require.Equal(t, int64(2), dataPoint.Value) + }) + + require.NoError(t, client.Close()) + testenv.AwaitChannelWithT(t, WaitTimeout, clientRunErrCh, func(t *testing.T, err error) { + require.NoError(t, err) + }, "unable to close client before timeout") + + xEnv.WaitForSubscriptionCount(0, WaitTimeout) + xEnv.WaitForConnectionCount(0, WaitTimeout) + }) + }) + }) + + t.Run("redis", func(t *testing.T) { + t.Run("publish", func(t *testing.T) { + metricReader := metric.NewManualReader() + + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, + EnableRedis: true, + MetricOptions: testenv.MetricOptions{ + EnableOTLPEventMetrics: true, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r1"}) { success } }`}) + xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r2"}) { success } }`}) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.events.publish.messages") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-redis", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "redis", eventProviderType.AsString()) + + redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) + require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + + require.Equal(t, int64(2), dataPoint.Value) + }) + }) + + t.Run("subscribe", func(t *testing.T) { + metricReader := metric.NewManualReader() + + testenv.Run(t, &testenv.Config{ + MetricReader: metricReader, + RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, + EnableRedis: true, + MetricOptions: testenv.MetricOptions{EnableOTLPEventMetrics: true}, + }, func(t *testing.T, xEnv *testenv.Environment) { + topic := "employeeUpdatedMyRedis" + + var subscriptionOne struct { + employeeUpdates struct { + ID float64 `graphql:"id"` + Details struct { + Forename string `graphql:"forename"` + Surname string `graphql:"surname"` + } `graphql:"details"` + } `graphql:"employeeUpdates"` + } + + client := graphql.NewSubscriptionClient(xEnv.GraphQLWebSocketSubscriptionURL()) + + subscriptionArgsCh := make(chan subscriptionArgs) + subscriptionOneID, err := client.Subscribe(&subscriptionOne, nil, func(dataValue []byte, errValue error) error { + subscriptionArgsCh <- subscriptionArgs{dataValue, errValue} + return nil + }) + require.NoError(t, err) + require.NotEmpty(t, subscriptionOneID) + + runCh := make(chan error) + go func() { runCh <- client.Run() }() + + xEnv.WaitForSubscriptionCount(1, WaitTimeout) + events.ProduceRedisMessage(t, xEnv, topic, `{"__typename":"Employee","id": 1,"update":{"name":"foo"}}`) + + testenv.AwaitChannelWithT(t, WaitTimeout, subscriptionArgsCh, func(t *testing.T, args subscriptionArgs) { + require.NoError(t, args.errValue) + require.JSONEq(t, `{"employeeUpdates":{"id":1,"details":{"forename":"Jens","surname":"Neuse"}}}`, string(args.dataValue)) + + rm := metricdata.ResourceMetrics{} + require.NoError(t, metricReader.Collect(context.Background(), &rm)) + + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + require.NotNil(t, scope) + metric := integration.GetMetricByName(scope, "router.events.messages.received") + require.NotNil(t, metric) + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + dataPoint := sum.DataPoints[0] + attrs := dataPoint.Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-redis", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "redis", eventProviderType.AsString()) + + redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) + require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + + require.Equal(t, int64(1), dataPoint.Value) + }) + + require.NoError(t, client.Close()) + testenv.AwaitChannelWithT(t, WaitTimeout, runCh, func(t *testing.T, err error) { require.NoError(t, err) }) + }) + }) + }) +} diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 7aaedbbee3..835c48cd53 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -408,7 +408,7 @@ func CreateTestSupervisorEnv(t testing.TB, cfg *Config) (*Environment, error) { ) if cfg.EnableKafka { - cfg.KafkaSeeds = []string{"localhost:9095"} + cfg.KafkaSeeds = []string{"localhost:9092"} client, err := kgo.NewClient( kgo.SeedBrokers(cfg.KafkaSeeds...), @@ -835,7 +835,7 @@ func CreateTestEnv(t testing.TB, cfg *Config) (*Environment, error) { ) if cfg.EnableKafka { - cfg.KafkaSeeds = []string{"localhost:9095"} + cfg.KafkaSeeds = []string{"localhost:9092"} client, err := kgo.NewClient( kgo.SeedBrokers(cfg.KafkaSeeds...), kgo.FetchMaxWait(time.Millisecond*100), From 3cd12349c444a6dbe13649fba7288c89233987aa Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 21:30:55 +0530 Subject: [PATCH 11/40] fix: review comments --- .../employees/subgraph/schema.resolvers.go | 5 - demo/pkg/subgraphs/subgraphs.go | 9 +- router-tests/events/nats_events_test.go | 5 + router-tests/prometheus_event_metrics_test.go | 2 +- router-tests/telemetry/event_metrics_test.go | 179 ++++++++++-------- router/pkg/pubsub/pubsub.go | 3 +- 6 files changed, 118 insertions(+), 85 deletions(-) diff --git a/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go b/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go index 497fc42fb8..4e78eb6c9f 100644 --- a/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go +++ b/demo/pkg/subgraphs/employees/subgraph/schema.resolvers.go @@ -255,16 +255,11 @@ func (r *subscriptionResolver) CountEmp2(ctx context.Context, max int, intervalM defer close(ch) for i := 0; i <= max; i++ { - fmt.Println("EE") select { case <-ctx.Done(): return case ch <- i: time.Sleep(time.Duration(intervalMilliseconds) * time.Millisecond) - fmt.Println("Dobne") - if i == 2 { - //panic("panicing") - } } } }() diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index 7a833b8099..90bfcf8dd3 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" "io" "log" @@ -211,13 +212,17 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID := map[string]natsPubsub.Adapter{} - defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{}) + defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) if err != nil { return nil, fmt.Errorf("failed to create default nats adapter: %w", err) } natsPubSubByProviderID["default"] = defaultAdapter - myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{}) + myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) if err != nil { return nil, fmt.Errorf("failed to create my-nats adapter: %w", err) } diff --git a/router-tests/events/nats_events_test.go b/router-tests/events/nats_events_test.go index a679b9fe7c..d3235643b4 100644 --- a/router-tests/events/nats_events_test.go +++ b/router-tests/events/nats_events_test.go @@ -57,6 +57,11 @@ func assertNatsMultipartValueEventually(t *testing.T, reader *bufio.Reader, expe }, NatsWaitTimeout, time.Millisecond*100) } +type natsSubscriptionArgs struct { + dataValue []byte + errValue error +} + func TestNatsEvents(t *testing.T) { t.Parallel() diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index 07b4b7ac76..3f89f5f1ba 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -23,7 +23,7 @@ type subscriptionArgs struct { const WaitTimeout = time.Second * 30 -func TestEventMetrics(t *testing.T) { +func TestFlakyEventMetrics(t *testing.T) { t.Run("kafka", func(t *testing.T) { t.Run("publish", func(t *testing.T) { metricReader := metric.NewManualReader() diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 8abc699d20..840287f926 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -26,7 +26,7 @@ type subscriptionArgs struct { const WaitTimeout = time.Second * 30 -func TestOTLEventMetrics(t *testing.T) { +func TestFlakyEventMetrics(t *testing.T) { t.Run("kafka", func(t *testing.T) { t.Run("publish", func(t *testing.T) { metricReader := metric.NewManualReader() @@ -54,19 +54,23 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-kafka", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "kafka", eventProviderType.AsString()) - - kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - require.Equal(t, int64(2), dataPoint.Value) + if eventProviderId.AsString() == "my-kafka" && + eventProviderType.AsString() == "kafka" && + strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(2), matched.Value) }) }) @@ -123,19 +127,23 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes - - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-kafka", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "kafka", eventProviderType.AsString()) + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - require.Equal(t, int64(1), dataPoint.Value) + if eventProviderId.AsString() == "my-kafka" && + eventProviderType.AsString() == "kafka" && + strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(1), matched.Value) }) require.NoError(t, client.Close()) @@ -173,19 +181,23 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes - - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-nats", eventProviderId.AsString()) + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "nats", eventProviderType.AsString()) - - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdatedMyNats.12")) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.Equal(t, int64(2), dataPoint.Value) + if eventProviderId.AsString() == "my-nats" && + eventProviderType.AsString() == "nats" && + strings.HasSuffix(natsSubject.AsString(), "employeeUpdatedMyNats.12") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(2), matched.Value) }) }) @@ -218,16 +230,21 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-nats", eventProviderId.AsString()) - - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.True(t, strings.HasSuffix(natsSubject.AsString(), "getEmployeeMyNats.12")) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.Equal(t, int64(1), dataPoint.Value) + if eventProviderId.AsString() == "my-nats" && + strings.HasSuffix(natsSubject.AsString(), "getEmployeeMyNats.12") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(1), matched.Value) }) }) @@ -298,19 +315,23 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "default", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "nats", eventProviderType.AsString()) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdated.3")) - - require.Equal(t, int64(2), dataPoint.Value) + if eventProviderId.AsString() == "default" && + eventProviderType.AsString() == "nats" && + strings.HasSuffix(natsSubject.AsString(), "employeeUpdated.3") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(2), matched.Value) }) require.NoError(t, client.Close()) @@ -350,19 +371,23 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes - - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-redis", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "redis", eventProviderType.AsString()) + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - require.Equal(t, int64(2), dataPoint.Value) + if eventProviderId.AsString() == "my-redis" && + eventProviderType.AsString() == "redis" && + strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(2), matched.Value) }) }) @@ -418,19 +443,23 @@ func TestOTLEventMetrics(t *testing.T) { sum, ok := metric.Data.(metricdata.Sum[int64]) require.True(t, ok) - dataPoint := sum.DataPoints[0] - attrs := dataPoint.Attributes + var matched *metricdata.DataPoint[int64] + for i := range sum.DataPoints { + attrs := sum.DataPoints[i].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-redis", eventProviderId.AsString()) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "redis", eventProviderType.AsString()) - - redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) - - require.Equal(t, int64(1), dataPoint.Value) + if eventProviderId.AsString() == "my-redis" && + eventProviderType.AsString() == "redis" && + strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis") { + matched = &sum.DataPoints[i] + break + } + } + require.NotNil(t, matched) + require.Equal(t, int64(1), matched.Value) }) require.NoError(t, client.Close()) diff --git a/router/pkg/pubsub/pubsub.go b/router/pkg/pubsub/pubsub.go index 9fe3be2fbc..6a1f5d3c26 100644 --- a/router/pkg/pubsub/pubsub.go +++ b/router/pkg/pubsub/pubsub.go @@ -60,10 +60,9 @@ func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfi kafkaBuilder := kafka.NewProviderBuilder(ctx, logger, hostName, routerListenAddr) kafkaDsConfsWithEvents := []dsConfAndEvents[*nodev1.KafkaEventConfiguration]{} for _, dsConf := range dsConfs { - getKafka := dsConf.Configuration.GetCustomEvents().GetKafka() kafkaDsConfsWithEvents = append(kafkaDsConfsWithEvents, dsConfAndEvents[*nodev1.KafkaEventConfiguration]{ dsConf: &dsConf, - events: getKafka, + events: dsConf.Configuration.GetCustomEvents().GetKafka(), }) } kafkaPubSubProviders, kafkaOuts, err := build(ctx, kafkaBuilder, config.Providers.Kafka, kafkaDsConfsWithEvents, store) From bbd5f062c199ed4cc683f53d91a2a0a9601ca437 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 21:43:41 +0530 Subject: [PATCH 12/40] fix: tests --- router-tests/events/event_helpers.go | 2 +- router-tests/prometheus_event_metrics_test.go | 22 ++++++++ router-tests/telemetry/event_metrics_test.go | 54 ++++++++++++------- router/pkg/pubsub/kafka/adapter.go | 3 +- .../pkg/pubsub/kafka/provider_builder_test.go | 5 +- router/pkg/pubsub/nats/adapter.go | 4 +- .../pkg/pubsub/nats/provider_builder_test.go | 5 +- router/pkg/pubsub/pubsub_test.go | 9 ++-- router/pkg/pubsub/redis/adapter.go | 5 +- .../pkg/pubsub/redis/provider_builder_test.go | 13 +++-- 10 files changed, 86 insertions(+), 36 deletions(-) diff --git a/router-tests/events/event_helpers.go b/router-tests/events/event_helpers.go index a1bb8386a2..30e99fddba 100644 --- a/router-tests/events/event_helpers.go +++ b/router-tests/events/event_helpers.go @@ -22,7 +22,7 @@ func ProduceKafkaMessage(t *testing.T, xEnv *testenv.Environment, topicName stri xEnv.KafkaClient.Produce(ctx, &kgo.Record{ Topic: xEnv.GetPubSubName(topicName), Value: []byte(message), - }, func(record *kgo.Record, err error) { + }, func(_ *kgo.Record, err error) { pErrCh <- err }) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index 3f89f5f1ba..e166ec80d0 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -24,8 +24,14 @@ type subscriptionArgs struct { const WaitTimeout = time.Second * 30 func TestFlakyEventMetrics(t *testing.T) { + t.Parallel() + t.Run("kafka", func(t *testing.T) { + t.Parallel() + t.Run("publish", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() @@ -63,6 +69,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("subscribe", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() topic := "employeeUpdated" @@ -132,7 +140,11 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("nats", func(t *testing.T) { + t.Parallel() + t.Run("publish", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ @@ -172,6 +184,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("request", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ @@ -209,6 +223,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("subscribe", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() testenv.Run(t, &testenv.Config{ @@ -296,7 +312,11 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("redis", func(t *testing.T) { + t.Parallel() + t.Run("publish", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() @@ -333,6 +353,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("subscribe", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 840287f926..92ddbf925e 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -27,8 +27,14 @@ type subscriptionArgs struct { const WaitTimeout = time.Second * 30 func TestFlakyEventMetrics(t *testing.T) { + t.Parallel() + t.Run("kafka", func(t *testing.T) { + t.Parallel() + t.Run("publish", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() testenv.Run(t, &testenv.Config{ @@ -48,33 +54,29 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.events.publish.messages") - require.NotNil(t, metric) + metricEntry := integration.GetMetricByName(scope, "router.events.publish.messages") + require.NotNil(t, metricEntry) - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) + require.Len(t, sum.DataPoints, 1) - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes + attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - if eventProviderId.AsString() == "my-kafka" && - eventProviderType.AsString() == "kafka" && - strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(2), matched.Value) + require.Equal(t, "my-kafka", eventProviderId.AsString()) + require.Equal(t, "kafka", eventProviderType.AsString()) + require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) }) t.Run("subscribe", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() topic := "employeeUpdated" @@ -153,7 +155,11 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("nats", func(t *testing.T) { + t.Parallel() + t.Run("publish", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() testenv.Run(t, &testenv.Config{ MetricReader: metricReader, @@ -202,6 +208,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("request", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() testenv.Run(t, &testenv.Config{ MetricReader: metricReader, @@ -249,6 +257,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("subscribe", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() testenv.Run(t, &testenv.Config{ MetricReader: metricReader, @@ -346,7 +356,11 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("redis", func(t *testing.T) { + t.Parallel() + t.Run("publish", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() testenv.Run(t, &testenv.Config{ @@ -392,6 +406,8 @@ func TestFlakyEventMetrics(t *testing.T) { }) t.Run("subscribe", func(t *testing.T) { + t.Parallel() + metricReader := metric.NewManualReader() testenv.Run(t, &testenv.Config{ diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 2f7fca6690..fdea2c200b 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -184,10 +184,9 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu log.Error("publish error", zap.Error(pErr)) p.eventMetricStore.KafkaPublishFailure(ctx, event.ProviderID, event.Topic) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) - } else { - p.eventMetricStore.KafkaPublish(ctx, event.ProviderID, event.Topic) } + p.eventMetricStore.KafkaPublish(ctx, event.ProviderID, event.Topic) return nil } diff --git a/router/pkg/pubsub/kafka/provider_builder_test.go b/router/pkg/pubsub/kafka/provider_builder_test.go index 5d72d28d91..99d9eb937a 100644 --- a/router/pkg/pubsub/kafka/provider_builder_test.go +++ b/router/pkg/pubsub/kafka/provider_builder_test.go @@ -2,6 +2,7 @@ package kafka import ( "context" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -93,7 +94,9 @@ func TestPubSubProviderBuilderFactory(t *testing.T) { builder := NewProviderBuilder(ctx, logger, "host", "addr") require.NotNil(t, builder) - provider, err := builder.BuildProvider(cfg) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) require.NoError(t, err) // Check the returned provider diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index a7e8e2df22..49fb916c0e 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -245,10 +245,10 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv log.Error("request error", zap.Error(err)) p.eventMetricStore.NatsRequestFailure(ctx, event.ProviderID, event.Subject) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) - } else { - p.eventMetricStore.NatsRequest(ctx, event.ProviderID, event.Subject) } + p.eventMetricStore.NatsRequest(ctx, event.ProviderID, event.Subject) + // We don't collect metrics on err here as it's an error related to the writer _, err = w.Write(msg.Data) if err != nil { diff --git a/router/pkg/pubsub/nats/provider_builder_test.go b/router/pkg/pubsub/nats/provider_builder_test.go index d2646b1941..683a205937 100644 --- a/router/pkg/pubsub/nats/provider_builder_test.go +++ b/router/pkg/pubsub/nats/provider_builder_test.go @@ -2,6 +2,7 @@ package nats import ( "context" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -82,7 +83,9 @@ func TestPubSubProviderBuilderFactory(t *testing.T) { builder := NewProviderBuilder(ctx, logger, "host", "addr") require.NotNil(t, builder) - provider, err := builder.BuildProvider(cfg) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) require.NoError(t, err) // Check the returned provider diff --git a/router/pkg/pubsub/pubsub_test.go b/router/pkg/pubsub/pubsub_test.go index 65d90cf385..568e941fd3 100644 --- a/router/pkg/pubsub/pubsub_test.go +++ b/router/pkg/pubsub/pubsub_test.go @@ -3,6 +3,7 @@ package pubsub import ( "context" "errors" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -65,7 +66,7 @@ func TestBuild_OK(t *testing.T) { // ctx, kafkaBuilder, config.Providers.Kafka, kafkaDsConfsWithEvents // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) // Assertions assert.NoError(t, err) @@ -121,7 +122,7 @@ func TestBuild_ProviderError(t *testing.T) { mockBuilder.On("BuildProvider", natsEventSources[0]).Return(nil, errors.New("provider error")) // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) // Assertions assert.Error(t, err) @@ -176,7 +177,7 @@ func TestBuild_ShouldGetAnErrorIfProviderIsNotDefined(t *testing.T) { mockBuilder.On("TypeID").Return("nats") // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) // Assertions assert.Error(t, err) @@ -239,7 +240,7 @@ func TestBuild_ShouldNotInitializeProviderIfNotUsed(t *testing.T) { mockBuilder.On("BuildProvider", natsEventSources[1]).Return(mockPubSubUsedProvider, nil) // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) // Assertions assert.NoError(t, err) diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index 939b5fa109..fad0388482 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -152,9 +152,8 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu log.Error("publish error", zap.Error(intCmd.Err())) p.eventMetricStore.RedisPublishFailure(ctx, event.ProviderID, event.Channel) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) - } else { - p.eventMetricStore.RedisPublish(ctx, event.ProviderID, event.Channel) } - + + p.eventMetricStore.RedisPublish(ctx, event.ProviderID, event.Channel) return nil } diff --git a/router/pkg/pubsub/redis/provider_builder_test.go b/router/pkg/pubsub/redis/provider_builder_test.go index 2cbaad10b9..351b8a16d8 100644 --- a/router/pkg/pubsub/redis/provider_builder_test.go +++ b/router/pkg/pubsub/redis/provider_builder_test.go @@ -2,6 +2,7 @@ package redis import ( "context" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -21,7 +22,9 @@ func TestBuildRedisOptions(t *testing.T) { logger := zaptest.NewLogger(t) ctx := context.Background() builder := NewProviderBuilder(ctx, logger, "host", "addr") - provider, err := builder.BuildProvider(cfg) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) require.NoError(t, err) require.NotNil(t, provider) @@ -39,7 +42,9 @@ func TestBuildRedisOptions(t *testing.T) { logger := zaptest.NewLogger(t) ctx := context.Background() builder := NewProviderBuilder(ctx, logger, "host", "addr") - provider, err := builder.BuildProvider(cfg) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) require.NoError(t, err) require.NotNil(t, provider) @@ -63,7 +68,9 @@ func TestPubSubProviderBuilderFactory(t *testing.T) { builder := NewProviderBuilder(ctx, logger, "host", "addr") require.NotNil(t, builder) - provider, err := builder.BuildProvider(cfg) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ + EventMetricStore: rmetric.NewNoopEventMetricStore(), + }) require.NoError(t, err) // Check the returned provider From 0c1c9c000d33d55ec7130a83d2c294f6b44b2cce Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 21:50:21 +0530 Subject: [PATCH 13/40] fix: refactoring --- router-tests/telemetry/event_metrics_test.go | 214 ++++++++----------- 1 file changed, 90 insertions(+), 124 deletions(-) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 92ddbf925e..f86c2c503c 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -123,29 +123,24 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.events.messages.received") - require.NotNil(t, metric) - - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) - - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes - - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - - if eventProviderId.AsString() == "my-kafka" && - eventProviderType.AsString() == "kafka" && - strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(1), matched.Value) + metricEntry := integration.GetMetricByName(scope, "router.events.messages.received") + require.NotNil(t, metricEntry) + + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) + + require.Len(t, sum.DataPoints, 1) + attrs := sum.DataPoints[0].Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-kafka", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "kafka", eventProviderType.AsString()) + + kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) + require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) require.NoError(t, client.Close()) @@ -181,29 +176,23 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.events.publish.messages") - require.NotNil(t, metric) + metricEntry := integration.GetMetricByName(scope, "router.events.publish.messages") + require.NotNil(t, metricEntry) - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) + require.Len(t, sum.DataPoints, 1) + attrs := sum.DataPoints[0].Attributes - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-nats", eventProviderId.AsString()) - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "nats", eventProviderType.AsString()) - if eventProviderId.AsString() == "my-nats" && - eventProviderType.AsString() == "nats" && - strings.HasSuffix(natsSubject.AsString(), "employeeUpdatedMyNats.12") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(2), matched.Value) + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdatedMyNats.12")) + + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) }) @@ -232,27 +221,20 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.nats.request") - require.NotNil(t, metric) + metricEntry := integration.GetMetricByName(scope, "router.nats.request") + require.NotNil(t, metricEntry) - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) + require.Len(t, sum.DataPoints, 1) + attrs := sum.DataPoints[0].Attributes - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-nats", eventProviderId.AsString()) - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + require.True(t, strings.HasSuffix(natsSubject.AsString(), "getEmployeeMyNats.12")) - if eventProviderId.AsString() == "my-nats" && - strings.HasSuffix(natsSubject.AsString(), "getEmployeeMyNats.12") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(1), matched.Value) + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) }) @@ -319,29 +301,24 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.events.messages.received") - require.NotNil(t, metric) - - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) - - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes - - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - - if eventProviderId.AsString() == "default" && - eventProviderType.AsString() == "nats" && - strings.HasSuffix(natsSubject.AsString(), "employeeUpdated.3") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(2), matched.Value) + metricEntry := integration.GetMetricByName(scope, "router.events.messages.received") + require.NotNil(t, metricEntry) + + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) + + require.Len(t, sum.DataPoints, 1) + attrs := sum.DataPoints[0].Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "default", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "nats", eventProviderType.AsString()) + + natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) + require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdated.3")) + + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) require.NoError(t, client.Close()) @@ -379,29 +356,24 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.events.publish.messages") - require.NotNil(t, metric) + metricEntry := integration.GetMetricByName(scope, "router.events.publish.messages") + require.NotNil(t, metricEntry) - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes + require.Len(t, sum.DataPoints, 1) + attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-redis", eventProviderId.AsString()) - if eventProviderId.AsString() == "my-redis" && - eventProviderType.AsString() == "redis" && - strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(2), matched.Value) + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "redis", eventProviderType.AsString()) + + redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) + require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) }) @@ -453,29 +425,23 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metric := integration.GetMetricByName(scope, "router.events.messages.received") - require.NotNil(t, metric) - - sum, ok := metric.Data.(metricdata.Sum[int64]) - require.True(t, ok) - - var matched *metricdata.DataPoint[int64] - for i := range sum.DataPoints { - attrs := sum.DataPoints[i].Attributes - - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - - if eventProviderId.AsString() == "my-redis" && - eventProviderType.AsString() == "redis" && - strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis") { - matched = &sum.DataPoints[i] - break - } - } - require.NotNil(t, matched) - require.Equal(t, int64(1), matched.Value) + metricEntry := integration.GetMetricByName(scope, "router.events.messages.received") + require.NotNil(t, metricEntry) + + sum, _ := metricEntry.Data.(metricdata.Sum[int64]) + + require.Len(t, sum.DataPoints, 1) + attrs := sum.DataPoints[0].Attributes + + eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) + require.Equal(t, "my-redis", eventProviderId.AsString()) + + eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) + require.Equal(t, "redis", eventProviderType.AsString()) + + redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) + require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) require.NoError(t, client.Close()) From b3690547655d2edcb93b43da854dde61386d9c3a Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Thu, 14 Aug 2025 22:56:43 +0530 Subject: [PATCH 14/40] fix: review comments --- router-tests/events/event_helpers.go | 6 +++- router/pkg/config/config.go | 2 +- .../pkg/config/testdata/config_defaults.json | 2 ++ router/pkg/config/testdata/config_full.json | 2 ++ router/pkg/metric/event_metric_store.go | 36 +++++++++++++------ router/pkg/pubsub/kafka/adapter.go | 9 ++++- .../pkg/pubsub/kafka/provider_builder_test.go | 5 +-- .../pkg/pubsub/nats/provider_builder_test.go | 5 +-- router/pkg/pubsub/pubsub.go | 4 +++ .../pkg/pubsub/redis/provider_builder_test.go | 13 ++----- 10 files changed, 52 insertions(+), 32 deletions(-) diff --git a/router-tests/events/event_helpers.go b/router-tests/events/event_helpers.go index 30e99fddba..48d97e90c4 100644 --- a/router-tests/events/event_helpers.go +++ b/router-tests/events/event_helpers.go @@ -38,7 +38,7 @@ func EnsureTopicExists(t *testing.T, xEnv *testenv.Environment, topics ...string // Delete topic for idempotency deleteCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - prefixedTopics := make([]string, len(topics)) + prefixedTopics := make([]string, 0, len(topics)) for _, topic := range topics { prefixedTopics = append(prefixedTopics, xEnv.GetPubSubName(topic)) } @@ -72,6 +72,10 @@ func ProduceRedisMessage(t *testing.T, xEnv *testenv.Environment, topicName stri }) } + defer func() { + _ = redisConn.Close() + }() + intCmd := redisConn.Publish(ctx, xEnv.GetPubSubName(topicName), message) require.NoError(t, intCmd.Err()) } diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index 7669abbeb7..43b71262ac 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -100,7 +100,7 @@ type Prometheus struct { ListenAddr string `yaml:"listen_addr" envDefault:"127.0.0.1:8088" env:"PROMETHEUS_LISTEN_ADDR"` GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"PROMETHEUS_GRAPHQL_CACHE"` ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"PROMETHEUS_CONNECTION_STATS"` - EventMetrics bool `yaml:"event_metrics" envDefault:"false" env:"METRICS_OTLP_EVENT_METRICS"` + EventMetrics bool `yaml:"event_metrics" envDefault:"false" env:"PROMETHEUS_EVENT_METRICS"` EngineStats EngineStats `yaml:"engine_stats" envPrefix:"PROMETHEUS_"` CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"PROMETHEUS_CIRCUIT_BREAKER"` ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"PROMETHEUS_EXCLUDE_METRICS"` diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 25919658e4..45febd1140 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -40,6 +40,7 @@ "Subscriptions": false }, "CircuitBreaker": false, + "EventMetrics": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": null @@ -50,6 +51,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": false, "ConnectionStats": false, + "EventMetrics": false, "EngineStats": { "Subscriptions": false }, diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 583709b097..3292f32bff 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -61,6 +61,7 @@ "Subscriptions": true }, "CircuitBreaker": false, + "EventMetrics": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": [ @@ -80,6 +81,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": true, "ConnectionStats": true, + "EventMetrics": false, "EngineStats": { "Subscriptions": true }, diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index 6871b00ed8..cd042a9f71 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -13,6 +13,12 @@ import ( otelattrs "github.com/wundergraph/cosmo/router/pkg/otel" ) +const ( + ProviderTypeKafka = "kafka" + ProviderTypeNats = "nats" + ProviderTypeRedis = "redis" +) + // EventMetricProvider is the interface that wraps the basic Event metric methods. // We maintain two providers, one for OTEL and one for Prometheus. type EventMetricProvider interface { @@ -91,7 +97,7 @@ func (e *EventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOpti func (e *EventMetrics) KafkaPublish(ctx context.Context, providerID string, topic string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("kafka"), + otelattrs.WgEventProviderType.String(ProviderTypeKafka), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic), ) @@ -101,7 +107,7 @@ func (e *EventMetrics) KafkaPublish(ctx context.Context, providerID string, topi func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("kafka"), + otelattrs.WgEventProviderType.String(ProviderTypeKafka), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic), ) @@ -111,7 +117,7 @@ func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, providerID strin func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("kafka"), + otelattrs.WgEventProviderType.String(ProviderTypeKafka), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgKafkaTopic.String(topic), ) @@ -121,7 +127,7 @@ func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, providerID stri func (e *EventMetrics) RedisPublish(ctx context.Context, providerID string, channel string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("redis"), + otelattrs.WgEventProviderType.String(ProviderTypeRedis), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel), ) @@ -131,7 +137,7 @@ func (e *EventMetrics) RedisPublish(ctx context.Context, providerID string, chan func (e *EventMetrics) RedisPublishFailure(ctx context.Context, providerID string, channel string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("redis"), + otelattrs.WgEventProviderType.String(ProviderTypeRedis), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel), ) @@ -141,7 +147,7 @@ func (e *EventMetrics) RedisPublishFailure(ctx context.Context, providerID strin func (e *EventMetrics) RedisMessageReceived(ctx context.Context, providerID string, channel string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("redis"), + otelattrs.WgEventProviderType.String(ProviderTypeRedis), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgRedisChannel.String(channel), ) @@ -151,7 +157,7 @@ func (e *EventMetrics) RedisMessageReceived(ctx context.Context, providerID stri func (e *EventMetrics) NatsPublish(ctx context.Context, providerID string, subject string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("nats"), + otelattrs.WgEventProviderType.String(ProviderTypeNats), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject), ) @@ -161,7 +167,7 @@ func (e *EventMetrics) NatsPublish(ctx context.Context, providerID string, subje func (e *EventMetrics) NatsPublishFailure(ctx context.Context, providerID string, subject string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("nats"), + otelattrs.WgEventProviderType.String(ProviderTypeNats), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject), ) @@ -171,7 +177,7 @@ func (e *EventMetrics) NatsPublishFailure(ctx context.Context, providerID string func (e *EventMetrics) NatsMessageReceived(ctx context.Context, providerID string, subject string) { opts := e.withAttrs( - otelattrs.WgEventProviderType.String("nats"), + otelattrs.WgEventProviderType.String(ProviderTypeNats), otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject), ) @@ -180,13 +186,21 @@ func (e *EventMetrics) NatsMessageReceived(ctx context.Context, providerID strin } func (e *EventMetrics) NatsRequest(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) + opts := e.withAttrs( + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgEventProviderType.String(ProviderTypeNats), + otelattrs.WgNatsSubject.String(subject), + ) e.otlpMetrics.NatsRequest(ctx, opts) e.promMetrics.NatsRequest(ctx, opts) } func (e *EventMetrics) NatsRequestFailure(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs(otelattrs.WgEventProviderID.String(providerID), otelattrs.WgNatsSubject.String(subject)) + opts := e.withAttrs( + otelattrs.WgEventProviderID.String(providerID), + otelattrs.WgEventProviderType.String(ProviderTypeNats), + otelattrs.WgNatsSubject.String(subject), + ) e.otlpMetrics.NatsRequestFailure(ctx, opts) e.promMetrics.NatsRequestFailure(ctx, opts) } diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index fdea2c200b..5777dc6d8b 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -234,12 +234,19 @@ func NewProviderAdapter(ctx context.Context, logger *zap.Logger, opts []kgo.Opt, logger = zap.NewNop() } + var store metric.EventMetricStore + if providerOpts.EventMetricStore != nil { + store = providerOpts.EventMetricStore + } else { + store = metric.NewNoopEventMetricStore() + } + return &ProviderAdapter{ ctx: ctx, logger: logger.With(zap.String("pubsub", "kafka")), opts: opts, closeWg: sync.WaitGroup{}, cancel: cancel, - eventMetricStore: providerOpts.EventMetricStore, + eventMetricStore: store, }, nil } diff --git a/router/pkg/pubsub/kafka/provider_builder_test.go b/router/pkg/pubsub/kafka/provider_builder_test.go index 99d9eb937a..6afa7612e4 100644 --- a/router/pkg/pubsub/kafka/provider_builder_test.go +++ b/router/pkg/pubsub/kafka/provider_builder_test.go @@ -2,7 +2,6 @@ package kafka import ( "context" - rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -94,9 +93,7 @@ func TestPubSubProviderBuilderFactory(t *testing.T) { builder := NewProviderBuilder(ctx, logger, "host", "addr") require.NotNil(t, builder) - provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), - }) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{}) require.NoError(t, err) // Check the returned provider diff --git a/router/pkg/pubsub/nats/provider_builder_test.go b/router/pkg/pubsub/nats/provider_builder_test.go index 683a205937..407e6bde3c 100644 --- a/router/pkg/pubsub/nats/provider_builder_test.go +++ b/router/pkg/pubsub/nats/provider_builder_test.go @@ -2,7 +2,6 @@ package nats import ( "context" - rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -83,9 +82,7 @@ func TestPubSubProviderBuilderFactory(t *testing.T) { builder := NewProviderBuilder(ctx, logger, "host", "addr") require.NotNil(t, builder) - provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), - }) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{}) require.NoError(t, err) // Check the returned provider diff --git a/router/pkg/pubsub/pubsub.go b/router/pkg/pubsub/pubsub.go index 6a1f5d3c26..81705dbeab 100644 --- a/router/pkg/pubsub/pubsub.go +++ b/router/pkg/pubsub/pubsub.go @@ -53,6 +53,10 @@ func (e *ProviderNotDefinedError) Error() string { // BuildProvidersAndDataSources is a generic function that builds providers and data sources for the given // EventsConfiguration and DataSourceConfigurationWithMetadata func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store metric.EventMetricStore, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { + if store == nil { + store = metric.NewNoopEventMetricStore() + } + var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource diff --git a/router/pkg/pubsub/redis/provider_builder_test.go b/router/pkg/pubsub/redis/provider_builder_test.go index 351b8a16d8..58963dbbc6 100644 --- a/router/pkg/pubsub/redis/provider_builder_test.go +++ b/router/pkg/pubsub/redis/provider_builder_test.go @@ -2,7 +2,6 @@ package redis import ( "context" - rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" "github.com/stretchr/testify/assert" @@ -22,9 +21,7 @@ func TestBuildRedisOptions(t *testing.T) { logger := zaptest.NewLogger(t) ctx := context.Background() builder := NewProviderBuilder(ctx, logger, "host", "addr") - provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), - }) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{}) require.NoError(t, err) require.NotNil(t, provider) @@ -42,9 +39,7 @@ func TestBuildRedisOptions(t *testing.T) { logger := zaptest.NewLogger(t) ctx := context.Background() builder := NewProviderBuilder(ctx, logger, "host", "addr") - provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), - }) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{}) require.NoError(t, err) require.NotNil(t, provider) @@ -68,9 +63,7 @@ func TestPubSubProviderBuilderFactory(t *testing.T) { builder := NewProviderBuilder(ctx, logger, "host", "addr") require.NotNil(t, builder) - provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), - }) + provider, err := builder.BuildProvider(cfg, datasource.ProviderOpts{}) require.NoError(t, err) // Check the returned provider From 7d50c7ddb9e75f67c739157f0affa8e14a5fd8c3 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Fri, 15 Aug 2025 02:21:29 +0530 Subject: [PATCH 15/40] fix: review comments --- router/pkg/metric/oltp_connection_metric_store.go | 2 +- router/pkg/metric/prom_connection_metric_store.go | 2 +- router/pkg/pubsub/pubsub_test.go | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/router/pkg/metric/oltp_connection_metric_store.go b/router/pkg/metric/oltp_connection_metric_store.go index b2e9c3e92a..e92f43b72b 100644 --- a/router/pkg/metric/oltp_connection_metric_store.go +++ b/router/pkg/metric/oltp_connection_metric_store.go @@ -45,7 +45,7 @@ func newOtlpConnectionMetrics(logger *zap.Logger, meterProvider *metric.MeterPro err = metrics.startInitMetrics(stats, baseAttributes) if err != nil { - logger.Error("failed to start initial connection metrics", zap.Error(err)) + return nil, err } return metrics, nil diff --git a/router/pkg/metric/prom_connection_metric_store.go b/router/pkg/metric/prom_connection_metric_store.go index 24248bb671..a4a58b14fa 100644 --- a/router/pkg/metric/prom_connection_metric_store.go +++ b/router/pkg/metric/prom_connection_metric_store.go @@ -45,7 +45,7 @@ func newPromConnectionMetrics(logger *zap.Logger, meterProvider *metric.MeterPro err = metrics.startInitMetrics(stats, attributes) if err != nil { - logger.Error("failed to start initial connection metrics", zap.Error(err)) + return nil, err } return metrics, nil diff --git a/router/pkg/pubsub/pubsub_test.go b/router/pkg/pubsub/pubsub_test.go index 568e941fd3..5b2ba9f72e 100644 --- a/router/pkg/pubsub/pubsub_test.go +++ b/router/pkg/pubsub/pubsub_test.go @@ -3,6 +3,7 @@ package pubsub import ( "context" "errors" + "github.com/stretchr/testify/mock" rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "testing" @@ -119,7 +120,7 @@ func TestBuild_ProviderError(t *testing.T) { {ID: "provider-1"}, } - mockBuilder.On("BuildProvider", natsEventSources[0]).Return(nil, errors.New("provider error")) + mockBuilder.On("BuildProvider", natsEventSources[0], mock.Anything).Return(nil, errors.New("provider error")) // Execute the function providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) From a8141ff4fe238474bf95fbacf785e5d6c9ca872f Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Fri, 15 Aug 2025 14:04:57 +0530 Subject: [PATCH 16/40] fix: tests --- router/pkg/pubsub/pubsub_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/router/pkg/pubsub/pubsub_test.go b/router/pkg/pubsub/pubsub_test.go index 5b2ba9f72e..b232084e19 100644 --- a/router/pkg/pubsub/pubsub_test.go +++ b/router/pkg/pubsub/pubsub_test.go @@ -238,7 +238,8 @@ func TestBuild_ShouldNotInitializeProviderIfNotUsed(t *testing.T) { mockPubSubUsedProvider.On("ID").Return("provider-2") mockBuilder.On("TypeID").Return("nats") - mockBuilder.On("BuildProvider", natsEventSources[1]).Return(mockPubSubUsedProvider, nil) + mockBuilder.On("BuildProvider", natsEventSources[1], mock.Anything). + Return(mockPubSubUsedProvider, nil) // Execute the function providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) From 89e96b29941e6a3266e1519b50ae3244739d5f6c Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 01:47:42 +0530 Subject: [PATCH 17/40] fix: refactoring --- router/pkg/metric/event_measurements.go | 91 +++------- router/pkg/metric/event_metric_store.go | 171 +++++-------------- router/pkg/metric/noop_event_metrics.go | 37 +--- router/pkg/metric/oltp_event_metric_store.go | 21 +-- router/pkg/metric/prom_event_metric_store.go | 21 +-- router/pkg/otel/attributes.go | 8 + router/pkg/pubsub/kafka/adapter.go | 7 +- router/pkg/pubsub/nats/adapter.go | 16 +- router/pkg/pubsub/redis/adapter.go | 8 +- 9 files changed, 103 insertions(+), 277 deletions(-) diff --git a/router/pkg/metric/event_measurements.go b/router/pkg/metric/event_measurements.go index c3a5a2733b..87bf347606 100644 --- a/router/pkg/metric/event_measurements.go +++ b/router/pkg/metric/event_measurements.go @@ -8,96 +8,45 @@ import ( // Event (Kafka/Redis/NATS) metric constants const ( - // unified counters across providers; provider type captured via attributes - eventsPublishMessages = "router.events.publish.messages" - eventsPublishFailures = "router.events.publish.fail" - eventsMessagesReceived = "router.events.messages.received" - - // keep nats request metrics separate as they are not generic publish/receive - natsRequests = "router.nats.request" - natsRequestFailures = "router.nats.request.fail" + // unified counters across providers per messaging semantic conventions + messagingClientSentMessages = "messaging.client.sent.messages" + messagingClientConsumedMessages = "messaging.client.consumed.messages" ) var ( - eventsPublishMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of event messages published"), - } - eventsPublishFailuresOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of event publish failures"), - } - eventsMessagesReceivedOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of event messages received"), - } - - // New NATS request counter options - natsRequestsOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of NATS requests"), + messagingClientSentMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of messaging client sent messages"), } - natsRequestFailuresOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of NATS request failures"), + messagingClientConsumedMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of messaging client consumed messages"), } ) type eventInstruments struct { - // unified instruments - publishMessages otelmetric.Int64Counter - publishFailures otelmetric.Int64Counter - messagesReceived otelmetric.Int64Counter - - // NATS request instruments - natsRequests otelmetric.Int64Counter - natsRequestFailures otelmetric.Int64Counter + // instruments following messaging semantic conventions + sentMessages otelmetric.Int64Counter + consumedMessages otelmetric.Int64Counter } func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { - publishMessagesCounter, err := meter.Int64Counter( - eventsPublishMessages, - eventsPublishMessagesOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create publish messages counter: %w", err) - } - - publishFailuresCounter, err := meter.Int64Counter( - eventsPublishFailures, - eventsPublishFailuresOptions..., + sentCounter, err := meter.Int64Counter( + messagingClientSentMessages, + messagingClientSentMessagesOptions..., ) if err != nil { - return nil, fmt.Errorf("failed to create publish failures counter: %w", err) + return nil, fmt.Errorf("failed to create sent messages counter: %w", err) } - messagesReceivedCounter, err := meter.Int64Counter( - eventsMessagesReceived, - eventsMessagesReceivedOptions..., + consumedCounter, err := meter.Int64Counter( + messagingClientConsumedMessages, + messagingClientConsumedMessagesOptions..., ) if err != nil { - return nil, fmt.Errorf("failed to create messages received counter: %w", err) - } - - // New NATS request counters - natsRequestsCounter, err := meter.Int64Counter( - natsRequests, - natsRequestsOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create nats requests counter: %w", err) - } - - natsRequestFailuresCounter, err := meter.Int64Counter( - natsRequestFailures, - natsRequestFailuresOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create nats request failures counter: %w", err) + return nil, fmt.Errorf("failed to create consumed messages counter: %w", err) } return &eventInstruments{ - publishMessages: publishMessagesCounter, - publishFailures: publishFailuresCounter, - messagesReceived: messagesReceivedCounter, - - // NATS request instruments - natsRequests: natsRequestsCounter, - natsRequestFailures: natsRequestFailuresCounter, + sentMessages: sentCounter, + consumedMessages: consumedCounter, }, nil } diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index cd042a9f71..e7db7980d5 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -10,7 +10,7 @@ import ( "go.opentelemetry.io/otel/sdk/metric" "go.uber.org/zap" - otelattrs "github.com/wundergraph/cosmo/router/pkg/otel" + otel "github.com/wundergraph/cosmo/router/pkg/otel" ) const ( @@ -19,36 +19,29 @@ const ( ProviderTypeRedis = "redis" ) +// MessagingEvent carries the values for messaging metrics attributes. +type MessagingEvent struct { + OperationName string + MessagingSystem string + ErrorType string + DestinationName string +} + // EventMetricProvider is the interface that wraps the basic Event metric methods. // We maintain two providers, one for OTEL and one for Prometheus. type EventMetricProvider interface { - // unified publish/receive for brokers (kafka, redis, nats) - Publish(ctx context.Context, opts ...otelmetric.AddOption) - PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) - MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) - - // keep NATS request separate - NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) - NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) + // unified produce/consume for brokers (kafka, redis, nats) + Produce(ctx context.Context, opts ...otelmetric.AddOption) + Consume(ctx context.Context, opts ...otelmetric.AddOption) Flush(ctx context.Context) error Shutdown() error } type EventMetricStore interface { - KafkaPublish(ctx context.Context, providerID string, topic string) - KafkaPublishFailure(ctx context.Context, providerID string, topic string) - KafkaMessageReceived(ctx context.Context, providerID string, topic string) - - RedisPublish(ctx context.Context, providerID string, channel string) - RedisPublishFailure(ctx context.Context, providerID string, channel string) - RedisMessageReceived(ctx context.Context, providerID string, channel string) - - NatsPublish(ctx context.Context, providerID string, subject string) - NatsPublishFailure(ctx context.Context, providerID string, subject string) - NatsMessageReceived(ctx context.Context, providerID string, subject string) - NatsRequest(ctx context.Context, providerID string, subject string) - NatsRequestFailure(ctx context.Context, providerID string, subject string) + // Generic produce/consume with explicit parameters per semantic conventions + Produce(ctx context.Context, event MessagingEvent) + Consume(ctx context.Context, event MessagingEvent) Flush(ctx context.Context) error Shutdown(ctx context.Context) error @@ -95,114 +88,36 @@ func (e *EventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOpti return otelmetric.WithAttributes(append(copied, attrs...)...) } -func (e *EventMetrics) KafkaPublish(ctx context.Context, providerID string, topic string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeKafka), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgKafkaTopic.String(topic), - ) - e.otlpMetrics.Publish(ctx, opts) - e.promMetrics.Publish(ctx, opts) -} - -func (e *EventMetrics) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeKafka), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgKafkaTopic.String(topic), - ) - e.otlpMetrics.PublishFailure(ctx, opts) - e.promMetrics.PublishFailure(ctx, opts) -} - -func (e *EventMetrics) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeKafka), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgKafkaTopic.String(topic), - ) - e.otlpMetrics.MessagesReceived(ctx, opts) - e.promMetrics.MessagesReceived(ctx, opts) -} - -func (e *EventMetrics) RedisPublish(ctx context.Context, providerID string, channel string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeRedis), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgRedisChannel.String(channel), - ) - e.otlpMetrics.Publish(ctx, opts) - e.promMetrics.Publish(ctx, opts) -} - -func (e *EventMetrics) RedisPublishFailure(ctx context.Context, providerID string, channel string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeRedis), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgRedisChannel.String(channel), - ) - e.otlpMetrics.PublishFailure(ctx, opts) - e.promMetrics.PublishFailure(ctx, opts) -} - -func (e *EventMetrics) RedisMessageReceived(ctx context.Context, providerID string, channel string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeRedis), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgRedisChannel.String(channel), - ) - e.otlpMetrics.MessagesReceived(ctx, opts) - e.promMetrics.MessagesReceived(ctx, opts) -} - -func (e *EventMetrics) NatsPublish(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeNats), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgNatsSubject.String(subject), - ) - e.otlpMetrics.Publish(ctx, opts) - e.promMetrics.Publish(ctx, opts) -} - -func (e *EventMetrics) NatsPublishFailure(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeNats), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgNatsSubject.String(subject), - ) - e.otlpMetrics.PublishFailure(ctx, opts) - e.promMetrics.PublishFailure(ctx, opts) -} - -func (e *EventMetrics) NatsMessageReceived(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs( - otelattrs.WgEventProviderType.String(ProviderTypeNats), - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgNatsSubject.String(subject), - ) - e.otlpMetrics.MessagesReceived(ctx, opts) - e.promMetrics.MessagesReceived(ctx, opts) -} - -func (e *EventMetrics) NatsRequest(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs( - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgEventProviderType.String(ProviderTypeNats), - otelattrs.WgNatsSubject.String(subject), - ) - e.otlpMetrics.NatsRequest(ctx, opts) - e.promMetrics.NatsRequest(ctx, opts) +func (e *EventMetrics) Produce(ctx context.Context, event MessagingEvent) { + attrs := []attribute.KeyValue{ + otel.MessagingOperationName.String(event.OperationName), + otel.MessagingSystem.String(event.MessagingSystem), + } + if event.ErrorType != "" { + attrs = append(attrs, otel.MessagingErrorType.String(event.ErrorType)) + } + if event.DestinationName != "" { + attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) + } + opt := e.withAttrs(attrs...) + e.otlpMetrics.Produce(ctx, opt) + e.promMetrics.Produce(ctx, opt) } -func (e *EventMetrics) NatsRequestFailure(ctx context.Context, providerID string, subject string) { - opts := e.withAttrs( - otelattrs.WgEventProviderID.String(providerID), - otelattrs.WgEventProviderType.String(ProviderTypeNats), - otelattrs.WgNatsSubject.String(subject), - ) - e.otlpMetrics.NatsRequestFailure(ctx, opts) - e.promMetrics.NatsRequestFailure(ctx, opts) +func (e *EventMetrics) Consume(ctx context.Context, event MessagingEvent) { + attrs := []attribute.KeyValue{ + otel.MessagingOperationName.String(event.OperationName), + otel.MessagingSystem.String(event.MessagingSystem), + } + if event.ErrorType != "" { + attrs = append(attrs, otel.MessagingErrorType.String(event.ErrorType)) + } + if event.DestinationName != "" { + attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) + } + opt := e.withAttrs(attrs...) + e.otlpMetrics.Consume(ctx, opt) + e.promMetrics.Consume(ctx, opt) } // Flush flushes the metrics to the backend synchronously. diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index dd1ee441b7..824088277e 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -9,40 +9,15 @@ import ( // A noop metric provider so we do not need to do nil checks for each provider call from the store type noopEventMetricProvider struct{} -func (n *noopEventMetricProvider) Publish(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) { -} - -func (n *noopEventMetricProvider) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) { -} -func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } -func (n *noopEventMetricProvider) Shutdown() error { return nil } +func (n *noopEventMetricProvider) Produce(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) Consume(ctx context.Context, opts ...otelmetric.AddOption) {} +func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } +func (n *noopEventMetricProvider) Shutdown() error { return nil } type NoopEventMetricStore struct{} -func (n *NoopEventMetricStore) KafkaPublish(ctx context.Context, providerID string, topic string) {} -func (n *NoopEventMetricStore) KafkaPublishFailure(ctx context.Context, providerID string, topic string) { -} -func (n *NoopEventMetricStore) KafkaMessageReceived(ctx context.Context, providerID string, topic string) { -} - -func (n *NoopEventMetricStore) RedisPublish(ctx context.Context, providerID string, channel string) {} -func (n *NoopEventMetricStore) RedisPublishFailure(ctx context.Context, providerID string, channel string) { -} -func (n *NoopEventMetricStore) RedisMessageReceived(ctx context.Context, providerID string, channel string) { -} - -func (n *NoopEventMetricStore) NatsPublish(ctx context.Context, providerID string, subject string) {} -func (n *NoopEventMetricStore) NatsPublishFailure(ctx context.Context, providerID string, subject string) { -} -func (n *NoopEventMetricStore) NatsMessageReceived(ctx context.Context, providerID string, subject string) { -} - -func (n *NoopEventMetricStore) NatsRequest(ctx context.Context, providerID string, subject string) {} -func (n *NoopEventMetricStore) NatsRequestFailure(ctx context.Context, providerID string, subject string) { -} +func (n *NoopEventMetricStore) Produce(ctx context.Context, event MessagingEvent) {} +func (n *NoopEventMetricStore) Consume(ctx context.Context, event MessagingEvent) {} func (n *NoopEventMetricStore) Flush(ctx context.Context) error { return nil } func (n *NoopEventMetricStore) Shutdown(ctx context.Context) error { return nil } diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go index 59e8cad83f..e331161c18 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -40,25 +40,12 @@ func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider } // Unified methods -func (o *otlpEventMetrics) Publish(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.publishMessages.Add(ctx, 1, opts...) +func (o *otlpEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.sentMessages.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.publishFailures.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.messagesReceived.Add(ctx, 1, opts...) -} - -// Keep NATS request methods -func (o *otlpEventMetrics) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.natsRequests.Add(ctx, 1, opts...) -} - -func (o *otlpEventMetrics) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.natsRequestFailures.Add(ctx, 1, opts...) +func (o *otlpEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.consumedMessages.Add(ctx, 1, opts...) } func (o *otlpEventMetrics) Flush(ctx context.Context) error { diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go index fae8755c94..5d7182faf4 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_event_metric_store.go @@ -40,25 +40,12 @@ func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider } // Unified methods -func (p *promEventMetrics) Publish(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.publishMessages.Add(ctx, 1, opts...) +func (p *promEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.sentMessages.Add(ctx, 1, opts...) } -func (p *promEventMetrics) PublishFailure(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.publishFailures.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) MessagesReceived(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.messagesReceived.Add(ctx, 1, opts...) -} - -// NATS request methods remain -func (p *promEventMetrics) NatsRequest(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.natsRequests.Add(ctx, 1, opts...) -} - -func (p *promEventMetrics) NatsRequestFailure(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.natsRequestFailures.Add(ctx, 1, opts...) +func (p *promEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { + p.instruments.consumedMessages.Add(ctx, 1, opts...) } func (p *promEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index 7d3e63c1de..35ff62cfc8 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -69,6 +69,14 @@ const ( WgRedisChannel = attribute.Key("wg.redis.channel") ) +// Messaging metrics attributes +const ( + MessagingOperationName = attribute.Key("messaging.operation.name") + MessagingSystem = attribute.Key("messaging.system") + MessagingErrorType = attribute.Key("error.type") + MessagingDestinationName = attribute.Key("messaging.destination.name") +) + const ( CacheMetricsOperationTypeAdded = "added" CacheMetricsOperationTypeUpdated = "updated" diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 5777dc6d8b..8a71fdc86c 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -91,7 +91,7 @@ func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, u r := iter.Next() p.logger.Debug("subscription update", zap.String("topic", r.Topic), zap.ByteString("data", r.Value)) - p.eventMetricStore.KafkaMessageReceived(p.ctx, providerId, r.Topic) + p.eventMetricStore.Consume(p.ctx, metric.MessagingEvent{OperationName: "receive", MessagingSystem: metric.ProviderTypeKafka, DestinationName: r.Topic}) updater.Update(r.Value) } } @@ -182,11 +182,12 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu if pErr != nil { log.Error("publish error", zap.Error(pErr)) - p.eventMetricStore.KafkaPublishFailure(ctx, event.ProviderID, event.Topic) + // failure emission: include error.type generic + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeKafka, ErrorType: "error", DestinationName: event.Topic}) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } - p.eventMetricStore.KafkaPublish(ctx, event.ProviderID, event.Topic) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeKafka, DestinationName: event.Topic}) return nil } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index 49fb916c0e..a449a70097 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -135,7 +135,11 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent for msg := range msgBatch.Messages() { log.Debug("subscription update", zap.String("message_subject", msg.Subject()), zap.ByteString("data", msg.Data())) - p.eventMetricStore.NatsMessageReceived(p.ctx, event.ProviderID, msg.Subject()) + p.eventMetricStore.Consume(p.ctx, metric.MessagingEvent{ + OperationName: "receive", + MessagingSystem: metric.ProviderTypeNats, + DestinationName: msg.Subject(), + }) updater.Update(msg.Data()) // Acknowledge the message after it has been processed @@ -173,7 +177,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent select { case msg := <-msgChan: log.Debug("subscription update", zap.String("message_subject", msg.Subject), zap.ByteString("data", msg.Data)) - p.eventMetricStore.NatsMessageReceived(p.ctx, event.ProviderID, msg.Subject) + p.eventMetricStore.Consume(p.ctx, metric.MessagingEvent{OperationName: "receive", MessagingSystem: metric.ProviderTypeNats, DestinationName: msg.Subject}) updater.Update(msg.Data) case <-p.ctx.Done(): // When the application context is done, we stop the subscriptions @@ -218,10 +222,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv err := p.client.Publish(event.Subject, event.Data) if err != nil { log.Error("publish error", zap.Error(err)) - p.eventMetricStore.NatsPublishFailure(ctx, event.ProviderID, event.Subject) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeNats, ErrorType: "error", DestinationName: event.Subject}) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } else { - p.eventMetricStore.NatsPublish(ctx, event.ProviderID, event.Subject) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeNats, DestinationName: event.Subject}) } return nil @@ -243,11 +247,11 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv msg, err := p.client.RequestWithContext(ctx, event.Subject, event.Data) if err != nil { log.Error("request error", zap.Error(err)) - p.eventMetricStore.NatsRequestFailure(ctx, event.ProviderID, event.Subject) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "request", MessagingSystem: metric.ProviderTypeNats, ErrorType: "error", DestinationName: event.Subject}) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) } - p.eventMetricStore.NatsRequest(ctx, event.ProviderID, event.Subject) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "request", MessagingSystem: metric.ProviderTypeNats, DestinationName: event.Subject}) // We don't collect metrics on err here as it's an error related to the writer _, err = w.Write(msg.Data) diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index fad0388482..59003da3c2 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -111,7 +111,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent return } log.Debug("subscription update", zap.String("message_channel", msg.Channel), zap.String("data", msg.Payload)) - p.eventMetricStore.RedisMessageReceived(ctx, event.ProviderID, msg.Channel) + p.eventMetricStore.Consume(ctx, metric.MessagingEvent{OperationName: "receive", MessagingSystem: metric.ProviderTypeRedis, DestinationName: msg.Channel}) updater.Update([]byte(msg.Payload)) case <-p.ctx.Done(): // When the application context is done, we stop the subscription if it is not already done @@ -150,10 +150,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu intCmd := p.conn.Publish(ctx, event.Channel, data) if intCmd.Err() != nil { log.Error("publish error", zap.Error(intCmd.Err())) - p.eventMetricStore.RedisPublishFailure(ctx, event.ProviderID, event.Channel) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeRedis, ErrorType: "error", DestinationName: event.Channel}) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) } - - p.eventMetricStore.RedisPublish(ctx, event.ProviderID, event.Channel) + + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeRedis, DestinationName: event.Channel}) return nil } From 6c7e0a9305f6ac7e3a2c4328911f03f5a1b8e213 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 02:35:34 +0530 Subject: [PATCH 18/40] fix: tests --- router-tests/prometheus_event_metrics_test.go | 95 +++++++++---------- router-tests/telemetry/event_metrics_test.go | 93 ++++++++---------- router/pkg/metric/event_measurements.go | 6 +- router/pkg/metric/event_metric_store.go | 9 -- router/pkg/metric/noop_event_metrics.go | 1 - router/pkg/metric/oltp_event_metric_store.go | 4 +- router/pkg/metric/prom_event_metric_store.go | 3 +- 7 files changed, 89 insertions(+), 122 deletions(-) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index e166ec80d0..a1de0167a0 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -51,18 +51,18 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_events_publish_messages_total") + family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - require.Equal(t, "my-kafka", eventProvider.GetValue()) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "send", operation.GetValue()) - providerType := findMetricLabelByName(metrics, "wg_event_provider_type") - require.Equal(t, "kafka", providerType.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "kafka", system.GetValue()) - topic := findMetricLabelByName(metrics, "wg_kafka_topic") - require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) @@ -117,18 +117,18 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_events_messages_received_total") + family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - require.Equal(t, "my-kafka", eventProvider.GetValue()) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "receive", operation.GetValue()) - providerType := findMetricLabelByName(metrics, "wg_event_provider_type") - require.Equal(t, "kafka", providerType.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "kafka", system.GetValue()) - topic := findMetricLabelByName(metrics, "wg_kafka_topic") - require.True(t, strings.HasSuffix(topic.GetValue(), "employeeUpdated")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) @@ -166,18 +166,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_events_publish_messages_total") + family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - require.Equal(t, "my-nats", eventProvider.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "nats", system.GetValue()) - providerType := findMetricLabelByName(metrics, "wg_event_provider_type") - require.Equal(t, "nats", providerType.GetValue()) - - subject := findMetricLabelByName(metrics, "wg_nats_subject") - require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdatedMyNats.12")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyNats.12")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) @@ -208,15 +205,18 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_nats_request_total") + family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - require.Equal(t, "my-nats", eventProvider.GetValue()) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "request", operation.GetValue()) + + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "nats", system.GetValue()) - subject := findMetricLabelByName(metrics, "wg_nats_subject") - require.True(t, strings.HasSuffix(subject.GetValue(), "getEmployeeMyNats.12")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "getEmployeeMyNats.12")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) @@ -285,17 +285,14 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_events_messages_received_total") + family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") metrics := family.GetMetric() - eventProviderId := findMetricLabelByName(metrics, "wg_event_provider_id").GetValue() - require.Equal(t, "default", eventProviderId) - - providerType := findMetricLabelByName(metrics, "wg_event_provider_type") - require.Equal(t, "nats", providerType.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "nats", system.GetValue()) - subject := findMetricLabelByName(metrics, "wg_nats_subject") - require.True(t, strings.HasSuffix(subject.GetValue(), "employeeUpdated.3")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated.3")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) @@ -335,18 +332,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_events_publish_messages_total") + family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - require.Equal(t, "my-redis", eventProvider.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "redis", system.GetValue()) - providerType := findMetricLabelByName(metrics, "wg_event_provider_type") - require.Equal(t, "redis", providerType.GetValue()) - - channel := findMetricLabelByName(metrics, "wg_redis_channel") - require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) @@ -354,7 +348,7 @@ func TestFlakyEventMetrics(t *testing.T) { t.Run("subscribe", func(t *testing.T) { t.Parallel() - + metricReader := metric.NewManualReader() promRegistry := prometheus.NewRegistry() @@ -400,18 +394,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "router_events_messages_received_total") + family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - eventProvider := findMetricLabelByName(metrics, "wg_event_provider_id") - require.Equal(t, "my-redis", eventProvider.GetValue()) - - providerType := findMetricLabelByName(metrics, "wg_event_provider_type") - require.Equal(t, "redis", providerType.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") + require.Equal(t, "redis", system.GetValue()) - channel := findMetricLabelByName(metrics, "wg_redis_channel") - require.True(t, strings.HasSuffix(channel.GetValue(), "employeeUpdatedMyRedis")) + destination := findMetricLabelByName(metrics, "messaging_destination_name") + require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index f86c2c503c..30c6dd3e95 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -54,7 +54,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.events.publish.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -62,13 +62,14 @@ func TestFlakyEventMetrics(t *testing.T) { attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "send", operation.AsString()) - require.Equal(t, "my-kafka", eventProviderId.AsString()) - require.Equal(t, "kafka", eventProviderType.AsString()) - require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "kafka", system.AsString()) + + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -123,7 +124,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.events.messages.received") + metricEntry := integration.GetMetricByName(scope, "messaging.client.consumed.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -131,14 +132,11 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-kafka", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "kafka", eventProviderType.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "kafka", system.AsString()) - kafkaTopic, _ := attrs.Value(otelattrs.WgKafkaTopic) - require.True(t, strings.HasSuffix(kafkaTopic.AsString(), "employeeUpdated")) + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -176,21 +174,18 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.events.publish.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-nats", eventProviderId.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "nats", system.AsString()) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "nats", eventProviderType.AsString()) - - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdatedMyNats.12")) + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -221,18 +216,21 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.nats.request") + metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-nats", eventProviderId.AsString()) + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "request", operation.AsString()) + + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "nats", system.AsString()) - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.True(t, strings.HasSuffix(natsSubject.AsString(), "getEmployeeMyNats.12")) + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -301,7 +299,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.events.messages.received") + metricEntry := integration.GetMetricByName(scope, "messaging.client.consumed.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -309,14 +307,11 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "default", eventProviderId.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "nats", system.AsString()) - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "nats", eventProviderType.AsString()) - - natsSubject, _ := attrs.Value(otelattrs.WgNatsSubject) - require.True(t, strings.HasSuffix(natsSubject.AsString(), "employeeUpdated.3")) + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -356,7 +351,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.events.publish.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -364,14 +359,11 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-redis", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "redis", eventProviderType.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "redis", system.AsString()) - redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -425,7 +417,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "router.events.messages.received") + metricEntry := integration.GetMetricByName(scope, "messaging.client.consumed.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -433,14 +425,11 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - eventProviderId, _ := attrs.Value(otelattrs.WgEventProviderID) - require.Equal(t, "my-redis", eventProviderId.AsString()) - - eventProviderType, _ := attrs.Value(otelattrs.WgEventProviderType) - require.Equal(t, "redis", eventProviderType.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) + require.Equal(t, "redis", system.AsString()) - redisChannel, _ := attrs.Value(otelattrs.WgRedisChannel) - require.True(t, strings.HasSuffix(redisChannel.AsString(), "employeeUpdatedMyRedis")) + destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) diff --git a/router/pkg/metric/event_measurements.go b/router/pkg/metric/event_measurements.go index 87bf347606..8bc97bad5f 100644 --- a/router/pkg/metric/event_measurements.go +++ b/router/pkg/metric/event_measurements.go @@ -24,12 +24,12 @@ var ( type eventInstruments struct { // instruments following messaging semantic conventions - sentMessages otelmetric.Int64Counter + producedMessages otelmetric.Int64Counter consumedMessages otelmetric.Int64Counter } func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { - sentCounter, err := meter.Int64Counter( + producedCounter, err := meter.Int64Counter( messagingClientSentMessages, messagingClientSentMessagesOptions..., ) @@ -46,7 +46,7 @@ func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { } return &eventInstruments{ - sentMessages: sentCounter, + producedMessages: producedCounter, consumedMessages: consumedCounter, }, nil } diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go index e7db7980d5..ef1c793007 100644 --- a/router/pkg/metric/event_metric_store.go +++ b/router/pkg/metric/event_metric_store.go @@ -35,7 +35,6 @@ type EventMetricProvider interface { Consume(ctx context.Context, opts ...otelmetric.AddOption) Flush(ctx context.Context) error - Shutdown() error } type EventMetricStore interface { @@ -143,13 +142,5 @@ func (e *EventMetrics) Shutdown(ctx context.Context) error { err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errFlush)) } - if errProm := e.promMetrics.Shutdown(); errProm != nil { - err = errors.Join(err, fmt.Errorf("failed to shutdown prom metrics: %w", errProm)) - } - - if errOtlp := e.otlpMetrics.Shutdown(); errOtlp != nil { - err = errors.Join(err, fmt.Errorf("failed to shutdown otlp metrics: %w", errOtlp)) - } - return err } diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index 824088277e..6a42cecdd5 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -12,7 +12,6 @@ type noopEventMetricProvider struct{} func (n *noopEventMetricProvider) Produce(ctx context.Context, opts ...otelmetric.AddOption) {} func (n *noopEventMetricProvider) Consume(ctx context.Context, opts ...otelmetric.AddOption) {} func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } -func (n *noopEventMetricProvider) Shutdown() error { return nil } type NoopEventMetricStore struct{} diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_event_metric_store.go index e331161c18..ad255fa290 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_event_metric_store.go @@ -41,7 +41,7 @@ func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider // Unified methods func (o *otlpEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.sentMessages.Add(ctx, 1, opts...) + o.instruments.producedMessages.Add(ctx, 1, opts...) } func (o *otlpEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { @@ -51,5 +51,3 @@ func (o *otlpEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOp func (o *otlpEventMetrics) Flush(ctx context.Context) error { return o.meterProvider.ForceFlush(ctx) } - -func (o *otlpEventMetrics) Shutdown() error { return nil } diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_event_metric_store.go index 5d7182faf4..5c221637c4 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_event_metric_store.go @@ -41,7 +41,7 @@ func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider // Unified methods func (p *promEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { - p.instruments.sentMessages.Add(ctx, 1, opts...) + p.instruments.producedMessages.Add(ctx, 1, opts...) } func (p *promEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { @@ -49,4 +49,3 @@ func (p *promEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOp } func (p *promEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } -func (p *promEventMetrics) Shutdown() error { return nil } From b4f9a09954ef0b35f0da5da16a4027335fe34080 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 02:56:47 +0530 Subject: [PATCH 19/40] fix: tests --- router-tests/prometheus_event_metrics_test.go | 17 +++++++++++++ router-tests/telemetry/event_metrics_test.go | 25 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index a1de0167a0..462c80a38b 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -57,6 +57,7 @@ func TestFlakyEventMetrics(t *testing.T) { operation := findMetricLabelByName(metrics, "messaging_operation_name") require.Equal(t, "send", operation.GetValue()) + require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "kafka", system.GetValue()) @@ -123,6 +124,7 @@ func TestFlakyEventMetrics(t *testing.T) { operation := findMetricLabelByName(metrics, "messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) + require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "kafka", system.GetValue()) @@ -170,6 +172,9 @@ func TestFlakyEventMetrics(t *testing.T) { metrics := family.GetMetric() require.Len(t, metrics, 1) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "send", operation.GetValue()) + require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "nats", system.GetValue()) @@ -211,6 +216,7 @@ func TestFlakyEventMetrics(t *testing.T) { operation := findMetricLabelByName(metrics, "messaging_operation_name") require.Equal(t, "request", operation.GetValue()) + require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "nats", system.GetValue()) @@ -288,6 +294,10 @@ func TestFlakyEventMetrics(t *testing.T) { family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") metrics := family.GetMetric() + require.Nil(t, findMetricLabelByName(metrics, "error_type")) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "receive", operation.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "nats", system.GetValue()) @@ -336,6 +346,9 @@ func TestFlakyEventMetrics(t *testing.T) { metrics := family.GetMetric() require.Len(t, metrics, 1) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "send", operation.GetValue()) + require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "redis", system.GetValue()) @@ -398,6 +411,10 @@ func TestFlakyEventMetrics(t *testing.T) { metrics := family.GetMetric() require.Len(t, metrics, 1) + require.Nil(t, findMetricLabelByName(metrics, "error_type")) + operation := findMetricLabelByName(metrics, "messaging_operation_name") + require.Equal(t, "receive", operation.GetValue()) + system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "redis", system.GetValue()) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 30c6dd3e95..4c6a341cb8 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -71,6 +71,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) }) @@ -132,12 +135,18 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "receive", operation.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "kafka", system.AsString()) destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -187,6 +196,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) }) @@ -232,6 +244,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) }) @@ -313,6 +328,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -365,6 +383,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(2), sum.DataPoints[0].Value) }) }) @@ -430,6 +451,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) + + _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + require.False(t, hasErr) + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) From 77065eeb8da7ebbfa1a63371c5276b5146ff3c15 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 02:59:02 +0530 Subject: [PATCH 20/40] fix: tests --- router-tests/telemetry/event_metrics_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 4c6a341cb8..52b29491d7 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -190,6 +190,9 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "send", operation.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "nats", system.AsString()) @@ -322,6 +325,9 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "receive", operation.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "nats", system.AsString()) @@ -377,6 +383,9 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "send", operation.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "redis", system.AsString()) @@ -446,6 +455,9 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes + operation, _ := attrs.Value(otelattrs.MessagingOperationName) + require.Equal(t, "receive", operation.AsString()) + system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "redis", system.AsString()) From 933cab45289e12ba33fb7bbba860ed067d21f045 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 14:01:00 +0530 Subject: [PATCH 21/40] fix: naming operation --- router-tests/prometheus_event_metrics_test.go | 6 +++--- router-tests/telemetry/event_metrics_test.go | 6 +++--- router/pkg/pubsub/kafka/adapter.go | 4 ++-- router/pkg/pubsub/nats/adapter.go | 4 ++-- router/pkg/pubsub/redis/adapter.go | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index 462c80a38b..6d77390903 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -56,7 +56,7 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "messaging_operation_name") - require.Equal(t, "send", operation.GetValue()) + require.Equal(t, "produce", operation.GetValue()) require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") @@ -173,7 +173,7 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "messaging_operation_name") - require.Equal(t, "send", operation.GetValue()) + require.Equal(t, "publish", operation.GetValue()) require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "nats", system.GetValue()) @@ -347,7 +347,7 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "messaging_operation_name") - require.Equal(t, "send", operation.GetValue()) + require.Equal(t, "publish", operation.GetValue()) require.Nil(t, findMetricLabelByName(metrics, "error_type")) system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "redis", system.GetValue()) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 52b29491d7..2ce9fea88d 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -63,7 +63,7 @@ func TestFlakyEventMetrics(t *testing.T) { attrs := sum.DataPoints[0].Attributes operation, _ := attrs.Value(otelattrs.MessagingOperationName) - require.Equal(t, "send", operation.AsString()) + require.Equal(t, "produce", operation.AsString()) system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "kafka", system.AsString()) @@ -191,7 +191,7 @@ func TestFlakyEventMetrics(t *testing.T) { attrs := sum.DataPoints[0].Attributes operation, _ := attrs.Value(otelattrs.MessagingOperationName) - require.Equal(t, "send", operation.AsString()) + require.Equal(t, "publish", operation.AsString()) system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "nats", system.AsString()) @@ -384,7 +384,7 @@ func TestFlakyEventMetrics(t *testing.T) { attrs := sum.DataPoints[0].Attributes operation, _ := attrs.Value(otelattrs.MessagingOperationName) - require.Equal(t, "send", operation.AsString()) + require.Equal(t, "publish", operation.AsString()) system, _ := attrs.Value(otelattrs.MessagingSystem) require.Equal(t, "redis", system.AsString()) diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 8a71fdc86c..5e28fd40a3 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -183,11 +183,11 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu if pErr != nil { log.Error("publish error", zap.Error(pErr)) // failure emission: include error.type generic - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeKafka, ErrorType: "error", DestinationName: event.Topic}) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "produce", MessagingSystem: metric.ProviderTypeKafka, ErrorType: "error", DestinationName: event.Topic}) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeKafka, DestinationName: event.Topic}) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "produce", MessagingSystem: metric.ProviderTypeKafka, DestinationName: event.Topic}) return nil } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index a449a70097..903ea7e767 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -222,10 +222,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv err := p.client.Publish(event.Subject, event.Data) if err != nil { log.Error("publish error", zap.Error(err)) - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeNats, ErrorType: "error", DestinationName: event.Subject}) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeNats, ErrorType: "error", DestinationName: event.Subject}) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } else { - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeNats, DestinationName: event.Subject}) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeNats, DestinationName: event.Subject}) } return nil diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index 59003da3c2..b5e727f45f 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -150,10 +150,10 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu intCmd := p.conn.Publish(ctx, event.Channel, data) if intCmd.Err() != nil { log.Error("publish error", zap.Error(intCmd.Err())) - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeRedis, ErrorType: "error", DestinationName: event.Channel}) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeRedis, ErrorType: "error", DestinationName: event.Channel}) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) } - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "send", MessagingSystem: metric.ProviderTypeRedis, DestinationName: event.Channel}) + p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeRedis, DestinationName: event.Channel}) return nil } From dbc50e55438601f3745bda0a556998acdb46afd7 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 14:10:40 +0530 Subject: [PATCH 22/40] fix: cleanup --- router/pkg/otel/attributes.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index 35ff62cfc8..c030233496 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -60,15 +60,6 @@ const ( WgGraphQLParentType = attribute.Key("wg.graphql.parent_type") ) -// Event metrics attributes -const ( - WgEventProviderID = attribute.Key("wg.event.provider.id") - WgEventProviderType = attribute.Key("wg.event.provider.type") - WgKafkaTopic = attribute.Key("wg.kafka.topic") - WgNatsSubject = attribute.Key("wg.nats.subject") - WgRedisChannel = attribute.Key("wg.redis.channel") -) - // Messaging metrics attributes const ( MessagingOperationName = attribute.Key("messaging.operation.name") From b56c97f1a51245e8a2553cf3eeff39c65a969929 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 18:41:31 +0530 Subject: [PATCH 23/40] fix: improve tests --- router-tests/prometheus_event_metrics_test.go | 7 ++----- router-tests/telemetry/event_metrics_test.go | 6 +----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index 6d77390903..1efe622e3e 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -277,10 +277,6 @@ func TestFlakyEventMetrics(t *testing.T) { }) require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) - // Trigger the second subscription via NATS - err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename": "Employee"}`)) - require.NoError(t, err) - err = xEnv.NatsConnectionDefault.Flush() require.NoError(t, err) @@ -304,7 +300,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated.3")) - require.Equal(t, float64(2), metrics[0].Counter.GetValue()) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) require.NoError(t, client.Close()) @@ -349,6 +345,7 @@ func TestFlakyEventMetrics(t *testing.T) { operation := findMetricLabelByName(metrics, "messaging_operation_name") require.Equal(t, "publish", operation.GetValue()) require.Nil(t, findMetricLabelByName(metrics, "error_type")) + system := findMetricLabelByName(metrics, "messaging_system") require.Equal(t, "redis", system.GetValue()) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 2ce9fea88d..08e5a5bba6 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -301,10 +301,6 @@ func TestFlakyEventMetrics(t *testing.T) { }) require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) - // Trigger the second subscription via NATS - err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename": "Employee"}`)) - require.NoError(t, err) - err = xEnv.NatsConnectionDefault.Flush() require.NoError(t, err) @@ -337,7 +333,7 @@ func TestFlakyEventMetrics(t *testing.T) { _, hasErr := attrs.Value(otelattrs.MessagingErrorType) require.False(t, hasErr) - require.Equal(t, int64(2), sum.DataPoints[0].Value) + require.Equal(t, int64(1), sum.DataPoints[0].Value) }) require.NoError(t, client.Close()) From e72b9ae15c870dbcdf9b55efd61ec0638d2f1686 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 18:57:52 +0530 Subject: [PATCH 24/40] fix: attempt to fix subgraph tests --- router-tests/structured_logging_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router-tests/structured_logging_test.go b/router-tests/structured_logging_test.go index 82bfd1d9ff..aab02ef998 100644 --- a/router-tests/structured_logging_test.go +++ b/router-tests/structured_logging_test.go @@ -3054,7 +3054,7 @@ func TestFlakyAccessLogs(t *testing.T) { actual2 := requestLog.All()[1].ContextMap()["response_body"].(string) require.Equal(t, - `{"data":{"_entities":[{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false}]}}`, + `{"data":{"_entities":[{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":true},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false}]}}`, actual2) actual3 := requestLog.All()[2].ContextMap()["response_body"].(string) From 775ea72567cb6dd86660421de01bd96ccf42e817 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 19:54:48 +0530 Subject: [PATCH 25/40] fix: revert false positive --- router-tests/structured_logging_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router-tests/structured_logging_test.go b/router-tests/structured_logging_test.go index aab02ef998..82bfd1d9ff 100644 --- a/router-tests/structured_logging_test.go +++ b/router-tests/structured_logging_test.go @@ -3054,7 +3054,7 @@ func TestFlakyAccessLogs(t *testing.T) { actual2 := requestLog.All()[1].ContextMap()["response_body"].(string) require.Equal(t, - `{"data":{"_entities":[{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":true},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false}]}}`, + `{"data":{"_entities":[{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false},{"__typename":"Employee","isAvailable":false}]}}`, actual2) actual3 := requestLog.All()[2].ContextMap()["response_body"].(string) From e3d63d415644fb9be32b7e05d33acdc8fe09dab2 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 22:43:52 +0530 Subject: [PATCH 26/40] fix: tests --- router-tests/prometheus_event_metrics_test.go | 7 ++----- router-tests/telemetry/event_metrics_test.go | 6 ++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_event_metrics_test.go index 1efe622e3e..a7b3bffb7d 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_event_metrics_test.go @@ -271,11 +271,8 @@ func TestFlakyEventMetrics(t *testing.T) { xEnv.WaitForSubscriptionCount(1, WaitTimeout) - // Send a mutation to trigger the first subscription - resOne := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ - Query: `mutation { updateAvailability(employeeID: 3, isAvailable: true) { id } }`, - }) - require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) + err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename":"Employee"}`)) + require.NoError(t, err) err = xEnv.NatsConnectionDefault.Flush() require.NoError(t, err) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index 08e5a5bba6..de9f321adc 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -296,10 +296,8 @@ func TestFlakyEventMetrics(t *testing.T) { xEnv.WaitForSubscriptionCount(1, WaitTimeout) // Send a mutation to trigger the first subscription - resOne := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ - Query: `mutation { updateAvailability(employeeID: 3, isAvailable: true) { id } }`, - }) - require.JSONEq(t, `{"data":{"updateAvailability":{"id":3}}}`, resOne.Body) + err = xEnv.NatsConnectionDefault.Publish(xEnv.GetPubSubName("employeeUpdated.3"), []byte(`{"id":3,"__typename":"Employee"}`)) + require.NoError(t, err) err = xEnv.NatsConnectionDefault.Flush() require.NoError(t, err) From 2ac4afa89e10801f24fd1729ca33d1c47b684688 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Mon, 18 Aug 2025 22:45:26 +0530 Subject: [PATCH 27/40] fix: linting --- router/pkg/pubsub/nats/adapter.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index 903ea7e767..c73aecd081 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -319,15 +319,7 @@ func (p *ProviderAdapter) Shutdown(ctx context.Context) error { return nil } -func NewAdapter( - ctx context.Context, - logger *zap.Logger, - url string, - opts []nats.Option, - hostName string, - routerListenAddr string, - providerOpts datasource.ProviderOpts, -) (Adapter, error) { +func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats.Option, hostName string, routerListenAddr string, providerOpts datasource.ProviderOpts) (Adapter, error) { if logger == nil { logger = zap.NewNop() } From fb2c05ca50264c6f1fd9d1cfd6673242e8f23321 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 01:01:49 +0530 Subject: [PATCH 28/40] fix: review comments --- demo/pkg/subgraphs/subgraphs.go | 4 +- router-tests/telemetry/event_metrics_test.go | 14 +- router-tests/testenv/testenv.go | 22 +-- router/core/factoryresolver.go | 12 +- router/core/graph_server.go | 26 +-- router/core/router.go | 20 +-- router/pkg/config/config.go | 42 ++--- router/pkg/config/config.schema.json | 4 +- .../pkg/config/testdata/config_defaults.json | 4 +- router/pkg/config/testdata/config_full.json | 4 +- router/pkg/metric/config.go | 8 +- router/pkg/metric/event_measurements.go | 52 ------ router/pkg/metric/event_metric_store.go | 146 ----------------- .../metric/messaging_event_measurements.go | 49 ++++++ .../metric/messaging_event_metric_store.go | 149 ++++++++++++++++++ router/pkg/metric/noop_event_metrics.go | 9 -- ...o => oltp_messaging_event_metric_store.go} | 17 +- ...o => prom_messaging_event_metric_store.go} | 19 +-- router/pkg/otel/attributes.go | 9 +- router/pkg/pubsub/datasource/provider.go | 2 +- router/pkg/pubsub/kafka/adapter.go | 67 +++++--- router/pkg/pubsub/nats/adapter.go | 88 +++++++---- router/pkg/pubsub/pubsub.go | 6 +- router/pkg/pubsub/redis/adapter.go | 55 +++++-- 24 files changed, 450 insertions(+), 378 deletions(-) delete mode 100644 router/pkg/metric/event_measurements.go delete mode 100644 router/pkg/metric/event_metric_store.go create mode 100644 router/pkg/metric/messaging_event_measurements.go create mode 100644 router/pkg/metric/messaging_event_metric_store.go rename router/pkg/metric/{oltp_event_metric_store.go => oltp_messaging_event_metric_store.go} (56%) rename router/pkg/metric/{prom_event_metric_store.go => prom_messaging_event_metric_store.go} (53%) diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index 90bfcf8dd3..7e2c750b41 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -213,7 +213,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID := map[string]natsPubsub.Adapter{} defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), + MessagingEventMetricStore: rmetric.NewNoopEventMetricStore(), }) if err != nil { return nil, fmt.Errorf("failed to create default nats adapter: %w", err) @@ -221,7 +221,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID["default"] = defaultAdapter myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), + MessagingEventMetricStore: rmetric.NewNoopEventMetricStore(), }) if err != nil { return nil, fmt.Errorf("failed to create my-nats adapter: %w", err) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/event_metrics_test.go index de9f321adc..a03ef52ddf 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/event_metrics_test.go @@ -71,7 +71,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(2), sum.DataPoints[0].Value) @@ -144,7 +144,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) @@ -199,7 +199,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(2), sum.DataPoints[0].Value) @@ -247,7 +247,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) @@ -328,7 +328,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) @@ -386,7 +386,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(2), sum.DataPoints[0].Value) @@ -458,7 +458,7 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) - _, hasErr := attrs.Value(otelattrs.MessagingErrorType) + _, hasErr := attrs.Value(otelattrs.MessagingError) require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index d44491f3e2..89f9d9aa2e 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -1504,11 +1504,11 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node EngineStats: rmetric.EngineStatsConfig{ Subscription: testConfig.MetricOptions.PrometheusEngineStatsOptions.EnableSubscription, }, - CircuitBreaker: testConfig.MetricOptions.EnablePrometheusCircuitBreakerMetrics, - ExcludeMetrics: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetrics, - ExcludeMetricLabels: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetricLabels, - EventMetrics: testConfig.MetricOptions.EnablePrometheusEventMetrics, - ExcludeScopeInfo: testConfig.MetricOptions.MetricExclusions.ExcludeScopeInfo, + CircuitBreaker: testConfig.MetricOptions.EnablePrometheusCircuitBreakerMetrics, + ExcludeMetrics: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetrics, + ExcludeMetricLabels: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetricLabels, + MessagingEventMetrics: testConfig.MetricOptions.EnablePrometheusEventMetrics, + ExcludeScopeInfo: testConfig.MetricOptions.MetricExclusions.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: testConfig.MetricOptions.PrometheusSchemaFieldUsage.Enabled, IncludeOperationSha: testConfig.MetricOptions.PrometheusSchemaFieldUsage.IncludeOperationSha, @@ -1527,11 +1527,11 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node Enabled: true, }, OTLP: config.MetricsOTLP{ - Enabled: true, - RouterRuntime: testConfig.MetricOptions.EnableRuntimeMetrics, - GraphqlCache: testConfig.MetricOptions.EnableOTLPRouterCache, - EventMetrics: testConfig.MetricOptions.EnableOTLPEventMetrics, - ConnectionStats: testConfig.MetricOptions.EnableOTLPConnectionMetrics, + Enabled: true, + RouterRuntime: testConfig.MetricOptions.EnableRuntimeMetrics, + GraphqlCache: testConfig.MetricOptions.EnableOTLPRouterCache, + MessagingEventMetrics: testConfig.MetricOptions.EnableOTLPEventMetrics, + ConnectionStats: testConfig.MetricOptions.EnableOTLPConnectionMetrics, EngineStats: config.EngineStats{ Subscriptions: testConfig.MetricOptions.OTLPEngineStatsOptions.EnableSubscription, }, @@ -2825,7 +2825,7 @@ func subgraphOptions(ctx context.Context, t testing.TB, logger *zap.Logger, nats natsPubSubByProviderID := make(map[string]pubsubNats.Adapter, len(DemoNatsProviders)) for _, sourceName := range DemoNatsProviders { adapter, err := pubsubNats.NewAdapter(ctx, logger, natsData.Params[0].Url, natsData.Params[0].Opts, "hostname", "listenaddr", datasource.ProviderOpts{ - EventMetricStore: rmetric.NewNoopEventMetricStore(), + MessagingEventMetricStore: rmetric.NewNoopEventMetricStore(), }) require.NoError(t, err) require.NoError(t, adapter.Startup(ctx)) diff --git a/router/core/factoryresolver.go b/router/core/factoryresolver.go index 75f640c559..96226c3eee 100644 --- a/router/core/factoryresolver.go +++ b/router/core/factoryresolver.go @@ -208,11 +208,11 @@ func (l *Loader) LoadInternedString(engineConfig *nodev1.EngineConfiguration, st } type RouterEngineConfiguration struct { - Execution config.EngineExecutionConfiguration - Headers *config.HeaderRules - Events config.EventsConfiguration - SubgraphErrorPropagation config.SubgraphErrorPropagationConfiguration - EventMetricStore rmetric.EventMetricStore + Execution config.EngineExecutionConfiguration + Headers *config.HeaderRules + Events config.EventsConfiguration + SubgraphErrorPropagation config.SubgraphErrorPropagationConfiguration + MessagingEventMetricStore rmetric.MessagingEventMetricStore } func mapProtoFilterToPlanFilter(input *nodev1.SubscriptionFilterCondition, output *plan.SubscriptionFilterCondition) *plan.SubscriptionFilterCondition { @@ -472,7 +472,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod factoryProviders, factoryDataSources, err := pubsub.BuildProvidersAndDataSources( l.ctx, routerEngineConfig.Events, - routerEngineConfig.EventMetricStore, + routerEngineConfig.MessagingEventMetricStore, l.logger, pubSubDS, l.resolver.InstanceData().HostName, diff --git a/router/core/graph_server.go b/router/core/graph_server.go index a3f41cd91c..2276be69ce 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -518,7 +518,7 @@ type graphMux struct { metricStore rmetric.Store prometheusCacheMetrics *rmetric.CacheMetrics otelCacheMetrics *rmetric.CacheMetrics - eventMetricStore rmetric.EventMetricStore + messagingEventMetricStore rmetric.MessagingEventMetricStore } // buildOperationCaches creates the caches for the graph mux. @@ -760,8 +760,8 @@ func (s *graphMux) Shutdown(ctx context.Context) error { } } - if s.eventMetricStore != nil { - if aErr := s.eventMetricStore.Shutdown(ctx); aErr != nil { + if s.messagingEventMetricStore != nil { + if aErr := s.messagingEventMetricStore.Shutdown(ctx); aErr != nil { err = errors.Join(err, aErr) } } @@ -781,8 +781,8 @@ func (s *graphServer) buildGraphMux( opts BuildGraphMuxOptions, ) (*graphMux, error) { gm := &graphMux{ - metricStore: rmetric.NewNoopMetrics(), - eventMetricStore: rmetric.NewNoopEventMetricStore(), + metricStore: rmetric.NewNoopMetrics(), + messagingEventMetricStore: rmetric.NewNoopEventMetricStore(), } httpRouter := chi.NewRouter() @@ -880,8 +880,8 @@ func (s *graphServer) buildGraphMux( } } - if s.metricConfig.OpenTelemetry.EventMetrics || s.metricConfig.Prometheus.EventMetrics { - store, err := rmetric.NewEventMetricStore( + if s.metricConfig.OpenTelemetry.MessagingEventMetrics || s.metricConfig.Prometheus.MessagingEventMetrics { + store, err := rmetric.NewMessagingEventMetricStore( s.logger, baseMetricAttributes, s.otlpMeterProvider, @@ -890,7 +890,7 @@ func (s *graphServer) buildGraphMux( if err != nil { return nil, err } - gm.eventMetricStore = store + gm.messagingEventMetricStore = store } subgraphs, err := configureSubgraphOverwrites( @@ -1132,11 +1132,11 @@ func (s *graphServer) buildGraphMux( } routerEngineConfig := &RouterEngineConfiguration{ - Execution: s.engineExecutionConfiguration, - Headers: s.headerRules, - Events: s.eventsConfig, - SubgraphErrorPropagation: s.subgraphErrorPropagation, - EventMetricStore: gm.eventMetricStore, + Execution: s.engineExecutionConfiguration, + Headers: s.headerRules, + Events: s.eventsConfig, + SubgraphErrorPropagation: s.subgraphErrorPropagation, + MessagingEventMetricStore: gm.messagingEventMetricStore, } // map[string]*http.Transport cannot be coerced into map[string]http.RoundTripper, unfortunately diff --git a/router/core/router.go b/router/core/router.go index db71af8f1f..3c5697b7cf 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -2238,11 +2238,11 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { EngineStats: rmetric.EngineStatsConfig{ Subscription: cfg.Metrics.OTLP.EngineStats.Subscriptions, }, - Exporters: openTelemetryExporters, - CircuitBreaker: cfg.Metrics.OTLP.CircuitBreaker, - EventMetrics: cfg.Metrics.OTLP.EventMetrics, - ExcludeMetrics: cfg.Metrics.OTLP.ExcludeMetrics, - ExcludeMetricLabels: cfg.Metrics.OTLP.ExcludeMetricLabels, + Exporters: openTelemetryExporters, + CircuitBreaker: cfg.Metrics.OTLP.CircuitBreaker, + MessagingEventMetrics: cfg.Metrics.OTLP.MessagingEventMetrics, + ExcludeMetrics: cfg.Metrics.OTLP.ExcludeMetrics, + ExcludeMetricLabels: cfg.Metrics.OTLP.ExcludeMetricLabels, }, Prometheus: rmetric.PrometheusConfig{ Enabled: cfg.Metrics.Prometheus.Enabled, @@ -2253,11 +2253,11 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { EngineStats: rmetric.EngineStatsConfig{ Subscription: cfg.Metrics.Prometheus.EngineStats.Subscriptions, }, - CircuitBreaker: cfg.Metrics.Prometheus.CircuitBreaker, - ExcludeMetrics: cfg.Metrics.Prometheus.ExcludeMetrics, - ExcludeMetricLabels: cfg.Metrics.Prometheus.ExcludeMetricLabels, - EventMetrics: cfg.Metrics.Prometheus.EventMetrics, - ExcludeScopeInfo: cfg.Metrics.Prometheus.ExcludeScopeInfo, + CircuitBreaker: cfg.Metrics.Prometheus.CircuitBreaker, + ExcludeMetrics: cfg.Metrics.Prometheus.ExcludeMetrics, + ExcludeMetricLabels: cfg.Metrics.Prometheus.ExcludeMetricLabels, + MessagingEventMetrics: cfg.Metrics.Prometheus.MessagingEventMetrics, + ExcludeScopeInfo: cfg.Metrics.Prometheus.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: cfg.Metrics.Prometheus.SchemaFieldUsage.Enabled, IncludeOperationSha: cfg.Metrics.Prometheus.SchemaFieldUsage.IncludeOperationSha, diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index 5ed94f028b..9e4cb4715a 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -95,17 +95,17 @@ type EngineStats struct { } type Prometheus struct { - Enabled bool `yaml:"enabled" envDefault:"true" env:"PROMETHEUS_ENABLED"` - Path string `yaml:"path" envDefault:"/metrics" env:"PROMETHEUS_HTTP_PATH"` - ListenAddr string `yaml:"listen_addr" envDefault:"127.0.0.1:8088" env:"PROMETHEUS_LISTEN_ADDR"` - GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"PROMETHEUS_GRAPHQL_CACHE"` - ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"PROMETHEUS_CONNECTION_STATS"` - EventMetrics bool `yaml:"event_metrics" envDefault:"false" env:"PROMETHEUS_EVENT_METRICS"` - EngineStats EngineStats `yaml:"engine_stats" envPrefix:"PROMETHEUS_"` - CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"PROMETHEUS_CIRCUIT_BREAKER"` - ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"PROMETHEUS_EXCLUDE_METRICS"` - ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"PROMETHEUS_EXCLUDE_METRIC_LABELS"` - ExcludeScopeInfo bool `yaml:"exclude_scope_info" envDefault:"false" env:"PROMETHEUS_EXCLUDE_SCOPE_INFO"` + Enabled bool `yaml:"enabled" envDefault:"true" env:"PROMETHEUS_ENABLED"` + Path string `yaml:"path" envDefault:"/metrics" env:"PROMETHEUS_HTTP_PATH"` + ListenAddr string `yaml:"listen_addr" envDefault:"127.0.0.1:8088" env:"PROMETHEUS_LISTEN_ADDR"` + GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"PROMETHEUS_GRAPHQL_CACHE"` + ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"PROMETHEUS_CONNECTION_STATS"` + MessagingEventMetrics bool `yaml:"messaging_event_metrics" envDefault:"false" env:"PROMETHEUS_MESSAGING_EVENT_METRICS"` + EngineStats EngineStats `yaml:"engine_stats" envPrefix:"PROMETHEUS_"` + CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"PROMETHEUS_CIRCUIT_BREAKER"` + ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"PROMETHEUS_EXCLUDE_METRICS"` + ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"PROMETHEUS_EXCLUDE_METRIC_LABELS"` + ExcludeScopeInfo bool `yaml:"exclude_scope_info" envDefault:"false" env:"PROMETHEUS_EXCLUDE_SCOPE_INFO"` SchemaFieldUsage PrometheusSchemaFieldUsage `yaml:"schema_usage" envPrefix:"PROMETHEUS_SCHEMA_FIELD_USAGE_"` } @@ -132,16 +132,16 @@ type Metrics struct { } type MetricsOTLP struct { - Enabled bool `yaml:"enabled" envDefault:"true" env:"METRICS_OTLP_ENABLED"` - RouterRuntime bool `yaml:"router_runtime" envDefault:"true" env:"METRICS_OTLP_ROUTER_RUNTIME"` - GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"METRICS_OTLP_GRAPHQL_CACHE"` - ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"METRICS_OTLP_CONNECTION_STATS"` - EngineStats EngineStats `yaml:"engine_stats" envPrefix:"METRICS_OTLP_"` - CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"METRICS_OTLP_CIRCUIT_BREAKER"` - EventMetrics bool `yaml:"event_metrics" envDefault:"false" env:"METRICS_OTLP_EVENT_METRICS"` - ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"METRICS_OTLP_EXCLUDE_METRICS"` - ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"METRICS_OTLP_EXCLUDE_METRIC_LABELS"` - Exporters []MetricsOTLPExporter `yaml:"exporters"` + Enabled bool `yaml:"enabled" envDefault:"true" env:"METRICS_OTLP_ENABLED"` + RouterRuntime bool `yaml:"router_runtime" envDefault:"true" env:"METRICS_OTLP_ROUTER_RUNTIME"` + GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"METRICS_OTLP_GRAPHQL_CACHE"` + ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"METRICS_OTLP_CONNECTION_STATS"` + EngineStats EngineStats `yaml:"engine_stats" envPrefix:"METRICS_OTLP_"` + CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"METRICS_OTLP_CIRCUIT_BREAKER"` + MessagingEventMetrics bool `yaml:"messaging_event_metrics" envDefault:"false" env:"METRICS_OTLP_MESSAGING_EVENT_METRICS"` + ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"METRICS_OTLP_EXCLUDE_METRICS"` + ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"METRICS_OTLP_EXCLUDE_METRIC_LABELS"` + Exporters []MetricsOTLPExporter `yaml:"exporters"` } type Telemetry struct { diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index f5e084479b..4df91c44fc 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -1066,10 +1066,10 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, - "event_metrics": { + "messaging_event_metrics": { "type": "boolean", "default": false, - "description": "Enable the collection of event metrics. The default value is false." + "description": "Enable the collection of messaging event metrics. The default value is false." }, "circuit_breaker": { "type": "boolean", diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 1f27870e9d..590d8206df 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -40,7 +40,7 @@ "Subscriptions": false }, "CircuitBreaker": false, - "EventMetrics": false, + "MessagingEventMetrics": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": null @@ -51,7 +51,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": false, "ConnectionStats": false, - "EventMetrics": false, + "MessagingEventMetrics": false, "EngineStats": { "Subscriptions": false }, diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 7f4d81fd38..7a1bbeba61 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -61,7 +61,7 @@ "Subscriptions": true }, "CircuitBreaker": false, - "EventMetrics": false, + "MessagingEventMetrics": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": [ @@ -81,7 +81,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": true, "ConnectionStats": true, - "EventMetrics": false, + "MessagingEventMetrics": false, "EngineStats": { "Subscriptions": true }, diff --git a/router/pkg/metric/config.go b/router/pkg/metric/config.go index 351e59b198..8c34bc0b70 100644 --- a/router/pkg/metric/config.go +++ b/router/pkg/metric/config.go @@ -34,8 +34,8 @@ type PrometheusConfig struct { // Whether or not to exclude scope info ExcludeScopeInfo bool // Prometheus schema field usage configuration - PromSchemaFieldUsage PrometheusSchemaFieldUsage - EventMetrics bool + PromSchemaFieldUsage PrometheusSchemaFieldUsage + MessagingEventMetrics bool } type PrometheusSchemaFieldUsage struct { @@ -79,8 +79,8 @@ type OpenTelemetry struct { // Metric labels to exclude from the OTLP exporter. ExcludeMetricLabels []*regexp.Regexp // TestReader is used for testing purposes. If set, the reader will be used instead of the configured exporters. - TestReader sdkmetric.Reader - EventMetrics bool + TestReader sdkmetric.Reader + MessagingEventMetrics bool } func GetDefaultExporter(cfg *Config) *OpenTelemetryExporter { diff --git a/router/pkg/metric/event_measurements.go b/router/pkg/metric/event_measurements.go deleted file mode 100644 index 8bc97bad5f..0000000000 --- a/router/pkg/metric/event_measurements.go +++ /dev/null @@ -1,52 +0,0 @@ -package metric - -import ( - "fmt" - - otelmetric "go.opentelemetry.io/otel/metric" -) - -// Event (Kafka/Redis/NATS) metric constants -const ( - // unified counters across providers per messaging semantic conventions - messagingClientSentMessages = "messaging.client.sent.messages" - messagingClientConsumedMessages = "messaging.client.consumed.messages" -) - -var ( - messagingClientSentMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of messaging client sent messages"), - } - messagingClientConsumedMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of messaging client consumed messages"), - } -) - -type eventInstruments struct { - // instruments following messaging semantic conventions - producedMessages otelmetric.Int64Counter - consumedMessages otelmetric.Int64Counter -} - -func newEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { - producedCounter, err := meter.Int64Counter( - messagingClientSentMessages, - messagingClientSentMessagesOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create sent messages counter: %w", err) - } - - consumedCounter, err := meter.Int64Counter( - messagingClientConsumedMessages, - messagingClientConsumedMessagesOptions..., - ) - if err != nil { - return nil, fmt.Errorf("failed to create consumed messages counter: %w", err) - } - - return &eventInstruments{ - producedMessages: producedCounter, - consumedMessages: consumedCounter, - }, nil -} diff --git a/router/pkg/metric/event_metric_store.go b/router/pkg/metric/event_metric_store.go deleted file mode 100644 index ef1c793007..0000000000 --- a/router/pkg/metric/event_metric_store.go +++ /dev/null @@ -1,146 +0,0 @@ -package metric - -import ( - "context" - "errors" - "fmt" - - "go.opentelemetry.io/otel/attribute" - otelmetric "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/sdk/metric" - "go.uber.org/zap" - - otel "github.com/wundergraph/cosmo/router/pkg/otel" -) - -const ( - ProviderTypeKafka = "kafka" - ProviderTypeNats = "nats" - ProviderTypeRedis = "redis" -) - -// MessagingEvent carries the values for messaging metrics attributes. -type MessagingEvent struct { - OperationName string - MessagingSystem string - ErrorType string - DestinationName string -} - -// EventMetricProvider is the interface that wraps the basic Event metric methods. -// We maintain two providers, one for OTEL and one for Prometheus. -type EventMetricProvider interface { - // unified produce/consume for brokers (kafka, redis, nats) - Produce(ctx context.Context, opts ...otelmetric.AddOption) - Consume(ctx context.Context, opts ...otelmetric.AddOption) - - Flush(ctx context.Context) error -} - -type EventMetricStore interface { - // Generic produce/consume with explicit parameters per semantic conventions - Produce(ctx context.Context, event MessagingEvent) - Consume(ctx context.Context, event MessagingEvent) - - Flush(ctx context.Context) error - Shutdown(ctx context.Context) error -} - -// EventMetrics is the store for Event (Kafka/Redis/NATS) metrics. -type EventMetrics struct { - baseAttributes []attribute.KeyValue - logger *zap.Logger - - otlpMetrics EventMetricProvider - promMetrics EventMetricProvider -} - -func NewEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue, otelProvider, promProvider *metric.MeterProvider, metricsConfig *Config) (*EventMetrics, error) { - store := &EventMetrics{ - baseAttributes: baseAttributes, - logger: logger, - otlpMetrics: &noopEventMetricProvider{}, - promMetrics: &noopEventMetricProvider{}, - } - - if metricsConfig.OpenTelemetry.EventMetrics { - otlpMetrics, err := newOtlpEventMetrics(logger, otelProvider) - if err != nil { - return nil, fmt.Errorf("failed to create otlp event metrics: %w", err) - } - store.otlpMetrics = otlpMetrics - } - - if metricsConfig.Prometheus.EventMetrics { - promMetrics, err := newPromEventMetrics(logger, promProvider) - if err != nil { - return nil, fmt.Errorf("failed to create prometheus event metrics: %w", err) - } - store.promMetrics = promMetrics - } - - return store, nil -} - -func (e *EventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOption { - copied := append([]attribute.KeyValue{}, e.baseAttributes...) - return otelmetric.WithAttributes(append(copied, attrs...)...) -} - -func (e *EventMetrics) Produce(ctx context.Context, event MessagingEvent) { - attrs := []attribute.KeyValue{ - otel.MessagingOperationName.String(event.OperationName), - otel.MessagingSystem.String(event.MessagingSystem), - } - if event.ErrorType != "" { - attrs = append(attrs, otel.MessagingErrorType.String(event.ErrorType)) - } - if event.DestinationName != "" { - attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) - } - opt := e.withAttrs(attrs...) - e.otlpMetrics.Produce(ctx, opt) - e.promMetrics.Produce(ctx, opt) -} - -func (e *EventMetrics) Consume(ctx context.Context, event MessagingEvent) { - attrs := []attribute.KeyValue{ - otel.MessagingOperationName.String(event.OperationName), - otel.MessagingSystem.String(event.MessagingSystem), - } - if event.ErrorType != "" { - attrs = append(attrs, otel.MessagingErrorType.String(event.ErrorType)) - } - if event.DestinationName != "" { - attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) - } - opt := e.withAttrs(attrs...) - e.otlpMetrics.Consume(ctx, opt) - e.promMetrics.Consume(ctx, opt) -} - -// Flush flushes the metrics to the backend synchronously. -func (e *EventMetrics) Flush(ctx context.Context) error { - var err error - - if errOtlp := e.otlpMetrics.Flush(ctx); errOtlp != nil { - err = errors.Join(err, fmt.Errorf("failed to flush otlp metrics: %w", errOtlp)) - } - - if errProm := e.promMetrics.Flush(ctx); errProm != nil { - err = errors.Join(err, fmt.Errorf("failed to flush prometheus metrics: %w", errProm)) - } - - return err -} - -// Shutdown flushes the metrics and stops observers if any. -func (e *EventMetrics) Shutdown(ctx context.Context) error { - var err error - - if errFlush := e.Flush(ctx); errFlush != nil { - err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errFlush)) - } - - return err -} diff --git a/router/pkg/metric/messaging_event_measurements.go b/router/pkg/metric/messaging_event_measurements.go new file mode 100644 index 0000000000..ead75303ea --- /dev/null +++ b/router/pkg/metric/messaging_event_measurements.go @@ -0,0 +1,49 @@ +package metric + +import ( + "fmt" + + otelmetric "go.opentelemetry.io/otel/metric" +) + +const ( + messagingSentMessages = "messaging.event.sent.messages" + messagingConsumedMessages = "messaging.event.received.messages" +) + +var ( + messagingSentMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of messaging event sent messages"), + } + messagingConsumedMessagesOptions = []otelmetric.Int64CounterOption{ + otelmetric.WithDescription("Number of messaging event consumed messages"), + } +) + +type eventInstruments struct { + producedMessages otelmetric.Int64Counter + consumedMessages otelmetric.Int64Counter +} + +func newMessagingEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { + producedCounter, err := meter.Int64Counter( + messagingSentMessages, + messagingSentMessagesOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create sent messages counter: %w", err) + } + + consumedCounter, err := meter.Int64Counter( + messagingConsumedMessages, + messagingConsumedMessagesOptions..., + ) + if err != nil { + return nil, fmt.Errorf("failed to create received messages counter: %w", err) + } + + return &eventInstruments{ + producedMessages: producedCounter, + consumedMessages: consumedCounter, + }, nil +} diff --git a/router/pkg/metric/messaging_event_metric_store.go b/router/pkg/metric/messaging_event_metric_store.go new file mode 100644 index 0000000000..d797d35554 --- /dev/null +++ b/router/pkg/metric/messaging_event_metric_store.go @@ -0,0 +1,149 @@ +package metric + +import ( + "context" + "errors" + "fmt" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "go.uber.org/zap" + + otel "github.com/wundergraph/cosmo/router/pkg/otel" +) + +type ProviderType string + +const ( + ProviderTypeKafka ProviderType = "kafka" + ProviderTypeNats ProviderType = "nats" + ProviderTypeRedis ProviderType = "redis" +) + +// MessagingEvent carries the values for messaging metrics attributes. +type MessagingEvent struct { + ProviderId string // The id of the provider defined in the configuration + OperationName string // The operation name that is specific to the messaging system + MessagingSystem ProviderType // The messaging system type that are supported + Error bool // Indicates if the operation resulted in an error or not (true or false) + DestinationName string // The name of the destination queue / topic / channel +} + +// MessagingEventMetricProvider is the interface that wraps the basic Event metric methods. +type MessagingEventMetricProvider interface { + Produce(ctx context.Context, opts ...otelmetric.AddOption) + Consume(ctx context.Context, opts ...otelmetric.AddOption) + + Flush(ctx context.Context) error +} + +type MessagingEventMetricStore interface { + Produce(ctx context.Context, event MessagingEvent) + Consume(ctx context.Context, event MessagingEvent) + + Flush(ctx context.Context) error + Shutdown(ctx context.Context) error +} + +// MessagingEventMetrics is the store for Event (Kafka/Redis/NATS) metrics. +type MessagingEventMetrics struct { + baseAttributes []attribute.KeyValue + logger *zap.Logger + providers []MessagingEventMetricProvider +} + +func NewMessagingEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue, otelProvider, promProvider *metric.MeterProvider, metricsConfig *Config) (*MessagingEventMetrics, error) { + providers := make([]MessagingEventMetricProvider, 0) + + if metricsConfig.OpenTelemetry.MessagingEventMetrics { + otlpMetrics, err := newOtlpMessagingEventMetrics(logger, otelProvider) + if err != nil { + return nil, fmt.Errorf("failed to create otlp event metrics: %w", err) + } + providers = append(providers, otlpMetrics) + } + + if metricsConfig.Prometheus.MessagingEventMetrics { + promMetrics, err := newPromMessagingEventMetrics(logger, promProvider) + if err != nil { + return nil, fmt.Errorf("failed to create prometheus event metrics: %w", err) + } + providers = append(providers, promMetrics) + } + + store := &MessagingEventMetrics{ + baseAttributes: baseAttributes, + logger: logger, + providers: providers, + } + return store, nil +} + +func (e *MessagingEventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOption { + copied := append([]attribute.KeyValue{}, e.baseAttributes...) + return otelmetric.WithAttributes(append(copied, attrs...)...) +} + +func (e *MessagingEventMetrics) Produce(ctx context.Context, event MessagingEvent) { + attrs := []attribute.KeyValue{ + otel.MessagingOperationName.String(event.OperationName), + otel.MessagingSystem.String(string(event.MessagingSystem)), + otel.MessagingError.Bool(event.Error), + } + if event.ProviderId != "" { + attrs = append(attrs, otel.ProviderId.String(event.ProviderId)) + } + if event.DestinationName != "" { + attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) + } + opt := e.withAttrs(attrs...) + + for _, provider := range e.providers { + provider.Produce(ctx, opt) + } +} + +func (e *MessagingEventMetrics) Consume(ctx context.Context, event MessagingEvent) { + attrs := []attribute.KeyValue{ + otel.MessagingOperationName.String(event.OperationName), + otel.MessagingSystem.String(string(event.MessagingSystem)), + otel.MessagingError.Bool(event.Error), + } + if event.ProviderId != "" { + attrs = append(attrs, otel.ProviderId.String(event.ProviderId)) + } + if event.DestinationName != "" { + attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) + } + + opt := e.withAttrs(attrs...) + + for _, provider := range e.providers { + provider.Consume(ctx, opt) + } +} + +// Flush flushes the metrics to the backend synchronously. +func (e *MessagingEventMetrics) Flush(ctx context.Context) error { + var err error + + for _, provider := range e.providers { + if errOtlp := provider.Flush(ctx); errOtlp != nil { + err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errOtlp)) + } + } + + return err +} + +// Shutdown flushes the metrics and stops observers if any. +func (e *MessagingEventMetrics) Shutdown(ctx context.Context) error { + var err error + + if errFlush := e.Flush(ctx); errFlush != nil { + err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errFlush)) + } + + return err +} diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go index 6a42cecdd5..5a7d12ee13 100644 --- a/router/pkg/metric/noop_event_metrics.go +++ b/router/pkg/metric/noop_event_metrics.go @@ -2,17 +2,8 @@ package metric import ( "context" - - otelmetric "go.opentelemetry.io/otel/metric" ) -// A noop metric provider so we do not need to do nil checks for each provider call from the store -type noopEventMetricProvider struct{} - -func (n *noopEventMetricProvider) Produce(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) Consume(ctx context.Context, opts ...otelmetric.AddOption) {} -func (n *noopEventMetricProvider) Flush(ctx context.Context) error { return nil } - type NoopEventMetricStore struct{} func (n *NoopEventMetricStore) Produce(ctx context.Context, event MessagingEvent) {} diff --git a/router/pkg/metric/oltp_event_metric_store.go b/router/pkg/metric/oltp_messaging_event_metric_store.go similarity index 56% rename from router/pkg/metric/oltp_event_metric_store.go rename to router/pkg/metric/oltp_messaging_event_metric_store.go index ad255fa290..5358030271 100644 --- a/router/pkg/metric/oltp_event_metric_store.go +++ b/router/pkg/metric/oltp_messaging_event_metric_store.go @@ -9,29 +9,29 @@ import ( ) const ( - cosmoRouterEventMeterName = "cosmo.router.event" + cosmoRouterEventMeterName = "cosmo.router.messaging.event" cosmoRouterEventMeterVersion = "0.0.1" ) -type otlpEventMetrics struct { +type otlpMessagingEventMetrics struct { instruments *eventInstruments meterProvider *metric.MeterProvider logger *zap.Logger meter otelmetric.Meter } -func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*otlpEventMetrics, error) { +func newOtlpMessagingEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*otlpMessagingEventMetrics, error) { meter := meterProvider.Meter( cosmoRouterEventMeterName, otelmetric.WithInstrumentationVersion(cosmoRouterEventMeterVersion), ) - instruments, err := newEventInstruments(meter) + instruments, err := newMessagingEventInstruments(meter) if err != nil { return nil, err } - return &otlpEventMetrics{ + return &otlpMessagingEventMetrics{ instruments: instruments, meterProvider: meterProvider, logger: logger, @@ -39,15 +39,14 @@ func newOtlpEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider }, nil } -// Unified methods -func (o *otlpEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { +func (o *otlpMessagingEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { o.instruments.producedMessages.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { +func (o *otlpMessagingEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { o.instruments.consumedMessages.Add(ctx, 1, opts...) } -func (o *otlpEventMetrics) Flush(ctx context.Context) error { +func (o *otlpMessagingEventMetrics) Flush(ctx context.Context) error { return o.meterProvider.ForceFlush(ctx) } diff --git a/router/pkg/metric/prom_event_metric_store.go b/router/pkg/metric/prom_messaging_event_metric_store.go similarity index 53% rename from router/pkg/metric/prom_event_metric_store.go rename to router/pkg/metric/prom_messaging_event_metric_store.go index 5c221637c4..544ab82398 100644 --- a/router/pkg/metric/prom_event_metric_store.go +++ b/router/pkg/metric/prom_messaging_event_metric_store.go @@ -9,29 +9,29 @@ import ( ) const ( - cosmoRouterEventPromMeterName = "cosmo.router.event.prometheus" + cosmoRouterEventPromMeterName = "cosmo.router.messaging.event.prometheus" cosmoRouterEventPromMeterVersion = "0.0.1" ) -type promEventMetrics struct { +type promMessagingEventMetrics struct { instruments *eventInstruments meterProvider *metric.MeterProvider logger *zap.Logger meter otelmetric.Meter } -func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*promEventMetrics, error) { +func newPromMessagingEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*promMessagingEventMetrics, error) { meter := meterProvider.Meter( cosmoRouterEventPromMeterName, otelmetric.WithInstrumentationVersion(cosmoRouterEventPromMeterVersion), ) - instruments, err := newEventInstruments(meter) + instruments, err := newMessagingEventInstruments(meter) if err != nil { return nil, err } - return &promEventMetrics{ + return &promMessagingEventMetrics{ instruments: instruments, meterProvider: meterProvider, logger: logger, @@ -39,13 +39,14 @@ func newPromEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider }, nil } -// Unified methods -func (p *promEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { +func (p *promMessagingEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { p.instruments.producedMessages.Add(ctx, 1, opts...) } -func (p *promEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { +func (p *promMessagingEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { p.instruments.consumedMessages.Add(ctx, 1, opts...) } -func (p *promEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } +func (p *promMessagingEventMetrics) Flush(ctx context.Context) error { + return p.meterProvider.ForceFlush(ctx) +} diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index c030233496..3c73c3f1fa 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -62,10 +62,11 @@ const ( // Messaging metrics attributes const ( - MessagingOperationName = attribute.Key("messaging.operation.name") - MessagingSystem = attribute.Key("messaging.system") - MessagingErrorType = attribute.Key("error.type") - MessagingDestinationName = attribute.Key("messaging.destination.name") + MessagingOperationName = attribute.Key("wg.messaging.operation.name") + MessagingSystem = attribute.Key("wg.messaging.system") + MessagingError = attribute.Key("wg.messaging.error") + MessagingDestinationName = attribute.Key("wg.messaging.destination.name") + ProviderId = attribute.Key("wg.provider.id") ) const ( diff --git a/router/pkg/pubsub/datasource/provider.go b/router/pkg/pubsub/datasource/provider.go index 8dd3a9b609..8f92de92a0 100644 --- a/router/pkg/pubsub/datasource/provider.go +++ b/router/pkg/pubsub/datasource/provider.go @@ -34,5 +34,5 @@ type ProviderBuilder[P, E any] interface { } type ProviderOpts struct { - EventMetricStore metric.EventMetricStore + MessagingEventMetricStore metric.MessagingEventMetricStore } diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 5e28fd40a3..540b461d83 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -21,6 +21,11 @@ var ( errClientClosed = errors.New("client closed") ) +const ( + kafkaReceive = "receive" + kafkaProduce = "produce" +) + // Adapter defines the interface for Kafka adapter operations type Adapter interface { Subscribe(ctx context.Context, event SubscriptionEventConfiguration, updater resolve.SubscriptionUpdater) error @@ -35,17 +40,21 @@ type Adapter interface { // It uses a single write client to produce messages and a client per topic to consume messages. // Each client polls the Kafka topic for new records and updates the subscriptions with the new data. type ProviderAdapter struct { - ctx context.Context - opts []kgo.Opt - logger *zap.Logger - writeClient *kgo.Client - closeWg sync.WaitGroup - cancel context.CancelFunc - eventMetricStore metric.EventMetricStore + ctx context.Context + opts []kgo.Opt + logger *zap.Logger + writeClient *kgo.Client + closeWg sync.WaitGroup + cancel context.CancelFunc + messagingEventMetricStore metric.MessagingEventMetricStore +} + +type PollerOpts struct { + providerId string } // topicPoller polls the Kafka topic for new records and calls the updateTriggers function. -func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, updater resolve.SubscriptionUpdater, providerId string) error { +func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, updater resolve.SubscriptionUpdater, pollerOpts PollerOpts) error { for { select { case <-p.ctx.Done(): // Close the poller if the application context was canceled @@ -91,7 +100,12 @@ func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, u r := iter.Next() p.logger.Debug("subscription update", zap.String("topic", r.Topic), zap.ByteString("data", r.Value)) - p.eventMetricStore.Consume(p.ctx, metric.MessagingEvent{OperationName: "receive", MessagingSystem: metric.ProviderTypeKafka, DestinationName: r.Topic}) + p.messagingEventMetricStore.Consume(p.ctx, metric.MessagingEvent{ + ProviderId: pollerOpts.providerId, + OperationName: kafkaReceive, + MessagingSystem: metric.ProviderTypeKafka, + DestinationName: r.Topic, + }) updater.Update(r.Value) } } @@ -132,7 +146,7 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent defer p.closeWg.Done() - err := p.topicPoller(ctx, client, updater, event.ProviderID) + err := p.topicPoller(ctx, client, updater, PollerOpts{providerId: event.ProviderID}) if err != nil { if errors.Is(err, errClientClosed) || errors.Is(err, context.Canceled) { log.Debug("poller canceled", zap.Error(err)) @@ -183,11 +197,22 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu if pErr != nil { log.Error("publish error", zap.Error(pErr)) // failure emission: include error.type generic - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "produce", MessagingSystem: metric.ProviderTypeKafka, ErrorType: "error", DestinationName: event.Topic}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: kafkaProduce, + MessagingSystem: metric.ProviderTypeKafka, + Error: true, + DestinationName: event.Topic, + }) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "produce", MessagingSystem: metric.ProviderTypeKafka, DestinationName: event.Topic}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: kafkaProduce, + MessagingSystem: metric.ProviderTypeKafka, + DestinationName: event.Topic, + }) return nil } @@ -235,19 +260,19 @@ func NewProviderAdapter(ctx context.Context, logger *zap.Logger, opts []kgo.Opt, logger = zap.NewNop() } - var store metric.EventMetricStore - if providerOpts.EventMetricStore != nil { - store = providerOpts.EventMetricStore + var store metric.MessagingEventMetricStore + if providerOpts.MessagingEventMetricStore != nil { + store = providerOpts.MessagingEventMetricStore } else { store = metric.NewNoopEventMetricStore() } return &ProviderAdapter{ - ctx: ctx, - logger: logger.With(zap.String("pubsub", "kafka")), - opts: opts, - closeWg: sync.WaitGroup{}, - cancel: cancel, - eventMetricStore: store, + ctx: ctx, + logger: logger.With(zap.String("pubsub", "kafka")), + opts: opts, + closeWg: sync.WaitGroup{}, + cancel: cancel, + messagingEventMetricStore: store, }, nil } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index c73aecd081..edaacd1b92 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -18,6 +18,12 @@ import ( "go.uber.org/zap" ) +const ( + natsRequest = "request" + natsPublish = "publish" + natsReceive = "receive" +) + // Adapter defines the methods that a NATS adapter should implement type Adapter interface { // Subscribe subscribes to the given events and sends updates to the updater @@ -34,17 +40,17 @@ type Adapter interface { // ProviderAdapter implements the AdapterInterface for NATS pub/sub type ProviderAdapter struct { - ctx context.Context - client *nats.Conn - js jetstream.JetStream - logger *zap.Logger - closeWg sync.WaitGroup - hostName string - routerListenAddr string - url string - opts []nats.Option - flushTimeout time.Duration - eventMetricStore metric.EventMetricStore + ctx context.Context + client *nats.Conn + js jetstream.JetStream + logger *zap.Logger + closeWg sync.WaitGroup + hostName string + routerListenAddr string + url string + opts []nats.Option + flushTimeout time.Duration + messagingEventMetricStore metric.MessagingEventMetricStore } // getInstanceIdentifier returns an identifier for the current instance. @@ -135,8 +141,9 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent for msg := range msgBatch.Messages() { log.Debug("subscription update", zap.String("message_subject", msg.Subject()), zap.ByteString("data", msg.Data())) - p.eventMetricStore.Consume(p.ctx, metric.MessagingEvent{ - OperationName: "receive", + p.messagingEventMetricStore.Consume(p.ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: natsReceive, MessagingSystem: metric.ProviderTypeNats, DestinationName: msg.Subject(), }) @@ -177,7 +184,12 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent select { case msg := <-msgChan: log.Debug("subscription update", zap.String("message_subject", msg.Subject), zap.ByteString("data", msg.Data)) - p.eventMetricStore.Consume(p.ctx, metric.MessagingEvent{OperationName: "receive", MessagingSystem: metric.ProviderTypeNats, DestinationName: msg.Subject}) + p.messagingEventMetricStore.Consume(p.ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: natsReceive, + MessagingSystem: metric.ProviderTypeNats, + DestinationName: msg.Subject, + }) updater.Update(msg.Data) case <-p.ctx.Done(): // When the application context is done, we stop the subscriptions @@ -222,10 +234,21 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv err := p.client.Publish(event.Subject, event.Data) if err != nil { log.Error("publish error", zap.Error(err)) - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeNats, ErrorType: "error", DestinationName: event.Subject}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: natsPublish, + MessagingSystem: metric.ProviderTypeNats, + Error: true, + DestinationName: event.Subject, + }) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } else { - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeNats, DestinationName: event.Subject}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: natsPublish, + MessagingSystem: metric.ProviderTypeNats, + DestinationName: event.Subject, + }) } return nil @@ -247,11 +270,22 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv msg, err := p.client.RequestWithContext(ctx, event.Subject, event.Data) if err != nil { log.Error("request error", zap.Error(err)) - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "request", MessagingSystem: metric.ProviderTypeNats, ErrorType: "error", DestinationName: event.Subject}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: natsRequest, + MessagingSystem: metric.ProviderTypeNats, + Error: true, + DestinationName: event.Subject, + }) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) } - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "request", MessagingSystem: metric.ProviderTypeNats, DestinationName: event.Subject}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: natsRequest, + MessagingSystem: metric.ProviderTypeNats, + DestinationName: event.Subject, + }) // We don't collect metrics on err here as it's an error related to the writer _, err = w.Write(msg.Data) @@ -325,14 +359,14 @@ func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats } return &ProviderAdapter{ - ctx: ctx, - logger: logger.With(zap.String("pubsub", "nats")), - closeWg: sync.WaitGroup{}, - hostName: hostName, - routerListenAddr: routerListenAddr, - url: url, - opts: opts, - flushTimeout: 10 * time.Second, - eventMetricStore: providerOpts.EventMetricStore, + ctx: ctx, + logger: logger.With(zap.String("pubsub", "nats")), + closeWg: sync.WaitGroup{}, + hostName: hostName, + routerListenAddr: routerListenAddr, + url: url, + opts: opts, + flushTimeout: 10 * time.Second, + messagingEventMetricStore: providerOpts.MessagingEventMetricStore, }, nil } diff --git a/router/pkg/pubsub/pubsub.go b/router/pkg/pubsub/pubsub.go index 81705dbeab..a2e3b16a99 100644 --- a/router/pkg/pubsub/pubsub.go +++ b/router/pkg/pubsub/pubsub.go @@ -52,7 +52,7 @@ func (e *ProviderNotDefinedError) Error() string { // BuildProvidersAndDataSources is a generic function that builds providers and data sources for the given // EventsConfiguration and DataSourceConfigurationWithMetadata -func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store metric.EventMetricStore, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { +func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store metric.MessagingEventMetricStore, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { if store == nil { store = metric.NewNoopEventMetricStore() } @@ -116,7 +116,7 @@ func build[P GetID, E GetEngineEventConfiguration]( builder pubsub_datasource.ProviderBuilder[P, E], providersData []P, dsConfs []dsConfAndEvents[E], - store metric.EventMetricStore, + store metric.MessagingEventMetricStore, ) ([]pubsub_datasource.Provider, []plan.DataSource, error) { var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource @@ -138,7 +138,7 @@ func build[P GetID, E GetEngineEventConfiguration]( continue } provider, err := builder.BuildProvider(providerData, pubsub_datasource.ProviderOpts{ - EventMetricStore: store, + MessagingEventMetricStore: store, }) if err != nil { return nil, nil, err diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index b5e727f45f..af63161499 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -13,6 +13,11 @@ import ( "go.uber.org/zap" ) +const ( + redisPublish = "publish" + redisReceive = "receive" +) + // Adapter defines the methods that a Redis adapter should implement type Adapter interface { // Subscribe subscribes to the given events and sends updates to the updater @@ -28,24 +33,24 @@ type Adapter interface { func NewProviderAdapter(ctx context.Context, logger *zap.Logger, urls []string, clusterEnabled bool, opts datasource.ProviderOpts) Adapter { ctx, cancel := context.WithCancel(ctx) return &ProviderAdapter{ - ctx: ctx, - cancel: cancel, - logger: logger, - urls: urls, - clusterEnabled: clusterEnabled, - eventMetricStore: opts.EventMetricStore, + ctx: ctx, + cancel: cancel, + logger: logger, + urls: urls, + clusterEnabled: clusterEnabled, + messagingEventMetricStore: opts.MessagingEventMetricStore, } } type ProviderAdapter struct { - ctx context.Context - cancel context.CancelFunc - conn rd.RDCloser - logger *zap.Logger - closeWg sync.WaitGroup - urls []string - clusterEnabled bool - eventMetricStore metric.EventMetricStore + ctx context.Context + cancel context.CancelFunc + conn rd.RDCloser + logger *zap.Logger + closeWg sync.WaitGroup + urls []string + clusterEnabled bool + messagingEventMetricStore metric.MessagingEventMetricStore } func (p *ProviderAdapter) Startup(ctx context.Context) error { @@ -111,7 +116,12 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent return } log.Debug("subscription update", zap.String("message_channel", msg.Channel), zap.String("data", msg.Payload)) - p.eventMetricStore.Consume(ctx, metric.MessagingEvent{OperationName: "receive", MessagingSystem: metric.ProviderTypeRedis, DestinationName: msg.Channel}) + p.messagingEventMetricStore.Consume(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: redisReceive, + MessagingSystem: metric.ProviderTypeRedis, + DestinationName: msg.Channel, + }) updater.Update([]byte(msg.Payload)) case <-p.ctx.Done(): // When the application context is done, we stop the subscription if it is not already done @@ -150,10 +160,21 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu intCmd := p.conn.Publish(ctx, event.Channel, data) if intCmd.Err() != nil { log.Error("publish error", zap.Error(intCmd.Err())) - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeRedis, ErrorType: "error", DestinationName: event.Channel}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: redisPublish, + MessagingSystem: metric.ProviderTypeRedis, + Error: true, + DestinationName: event.Channel, + }) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) } - p.eventMetricStore.Produce(ctx, metric.MessagingEvent{OperationName: "publish", MessagingSystem: metric.ProviderTypeRedis, DestinationName: event.Channel}) + p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ + ProviderId: event.ProviderID, + OperationName: redisPublish, + MessagingSystem: metric.ProviderTypeRedis, + DestinationName: event.Channel, + }) return nil } From 176e960870465eaa571dccd01682777ddf654579 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 01:31:12 +0530 Subject: [PATCH 29/40] fix: tests --- ...rometheus_messaging_event_metrics_test.go} | 84 +++++++++++-------- ...est.go => messaging_event_metrics_test.go} | 63 +++++++------- 2 files changed, 84 insertions(+), 63 deletions(-) rename router-tests/{prometheus_event_metrics_test.go => prometheus_messaging_event_metrics_test.go} (80%) rename router-tests/telemetry/{event_metrics_test.go => messaging_event_metrics_test.go} (92%) diff --git a/router-tests/prometheus_event_metrics_test.go b/router-tests/prometheus_messaging_event_metrics_test.go similarity index 80% rename from router-tests/prometheus_event_metrics_test.go rename to router-tests/prometheus_messaging_event_metrics_test.go index a7b3bffb7d..35223bc015 100644 --- a/router-tests/prometheus_event_metrics_test.go +++ b/router-tests/prometheus_messaging_event_metrics_test.go @@ -51,18 +51,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "produce", operation.GetValue()) - require.Nil(t, findMetricLabelByName(metrics, "error_type")) + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) - system := findMetricLabelByName(metrics, "messaging_system") + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "kafka", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) @@ -118,18 +120,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) - require.Nil(t, findMetricLabelByName(metrics, "error_type")) + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) - system := findMetricLabelByName(metrics, "messaging_system") + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "kafka", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) @@ -168,17 +172,19 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "publish", operation.GetValue()) - require.Nil(t, findMetricLabelByName(metrics, "error_type")) - system := findMetricLabelByName(metrics, "messaging_system") + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "nats", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyNats.12")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) @@ -210,18 +216,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "request", operation.GetValue()) - require.Nil(t, findMetricLabelByName(metrics, "error_type")) + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) - system := findMetricLabelByName(metrics, "messaging_system") + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "nats", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "getEmployeeMyNats.12")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) @@ -284,17 +292,19 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") metrics := family.GetMetric() - require.Nil(t, findMetricLabelByName(metrics, "error_type")) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) - system := findMetricLabelByName(metrics, "messaging_system") + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "nats", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated.3")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) @@ -335,18 +345,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "publish", operation.GetValue()) - require.Nil(t, findMetricLabelByName(metrics, "error_type")) + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) - system := findMetricLabelByName(metrics, "messaging_system") + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "redis", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) require.Equal(t, float64(2), metrics[0].Counter.GetValue()) @@ -401,18 +413,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_client_consumed_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - require.Nil(t, findMetricLabelByName(metrics, "error_type")) - operation := findMetricLabelByName(metrics, "messaging_operation_name") + errLabel := findMetricLabelByName(metrics, "wg_messaging_error") + require.NotNil(t, errLabel) + require.Equal(t, "false", errLabel.GetValue()) + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) - system := findMetricLabelByName(metrics, "messaging_system") + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "redis", system.GetValue()) - destination := findMetricLabelByName(metrics, "messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) diff --git a/router-tests/telemetry/event_metrics_test.go b/router-tests/telemetry/messaging_event_metrics_test.go similarity index 92% rename from router-tests/telemetry/event_metrics_test.go rename to router-tests/telemetry/messaging_event_metrics_test.go index a03ef52ddf..7d8aff9d38 100644 --- a/router-tests/telemetry/event_metrics_test.go +++ b/router-tests/telemetry/messaging_event_metrics_test.go @@ -52,9 +52,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -71,8 +71,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -125,9 +126,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.consumed.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -144,8 +145,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -181,9 +183,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -199,8 +201,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -229,9 +232,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -247,8 +250,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -309,9 +313,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.consumed.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -328,8 +332,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -367,9 +372,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -386,8 +391,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -439,9 +445,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.client.consumed.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -458,8 +464,9 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) - _, hasErr := attrs.Value(otelattrs.MessagingError) - require.False(t, hasErr) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) + require.True(t, hasErr) + require.False(t, errVal.AsBool()) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) From 8a412052e7937966c858c48ab1db1209d36075fd Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 01:38:22 +0530 Subject: [PATCH 30/40] fix: tests --- ...prometheus_messaging_event_metrics_test.go | 28 +++++++++++++++++++ .../telemetry/messaging_event_metrics_test.go | 28 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/router-tests/prometheus_messaging_event_metrics_test.go b/router-tests/prometheus_messaging_event_metrics_test.go index 35223bc015..77ba0095af 100644 --- a/router-tests/prometheus_messaging_event_metrics_test.go +++ b/router-tests/prometheus_messaging_event_metrics_test.go @@ -67,6 +67,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-kafka", provider.GetValue()) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) @@ -136,6 +140,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-kafka", provider.GetValue()) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) @@ -187,6 +195,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyNats.12")) + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-nats", provider.GetValue()) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) @@ -232,6 +244,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "getEmployeeMyNats.12")) + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-nats", provider.GetValue()) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) }) @@ -307,6 +323,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated.3")) + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-nats", provider.GetValue()) + require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) @@ -361,6 +381,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-redis", provider.GetValue()) + require.Equal(t, float64(2), metrics[0].Counter.GetValue()) }) }) @@ -428,6 +452,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) + + provider := findMetricLabelByName(metrics, "wg_provider_id") + require.NotNil(t, provider) + require.Equal(t, "my-redis", provider.GetValue()) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) diff --git a/router-tests/telemetry/messaging_event_metrics_test.go b/router-tests/telemetry/messaging_event_metrics_test.go index 7d8aff9d38..476c34d6ee 100644 --- a/router-tests/telemetry/messaging_event_metrics_test.go +++ b/router-tests/telemetry/messaging_event_metrics_test.go @@ -71,6 +71,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-kafka", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -145,6 +149,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-kafka", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -201,6 +209,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-nats", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -250,6 +262,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-nats", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -332,6 +348,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-nats", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -391,6 +411,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-redis", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -464,6 +488,10 @@ func TestFlakyEventMetrics(t *testing.T) { destination, _ := attrs.Value(otelattrs.MessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) + provider, hasProvider := attrs.Value(otelattrs.ProviderId) + require.True(t, hasProvider) + require.Equal(t, "my-redis", provider.AsString()) + errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) From fdbe1f869c091a76dc8378df3c86ea086dc319b1 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 01:40:18 +0530 Subject: [PATCH 31/40] fix: tests --- router-tests/prometheus_messaging_event_metrics_test.go | 2 +- router-tests/telemetry/messaging_event_metrics_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/router-tests/prometheus_messaging_event_metrics_test.go b/router-tests/prometheus_messaging_event_metrics_test.go index 77ba0095af..566aa49efa 100644 --- a/router-tests/prometheus_messaging_event_metrics_test.go +++ b/router-tests/prometheus_messaging_event_metrics_test.go @@ -325,7 +325,7 @@ func TestFlakyEventMetrics(t *testing.T) { provider := findMetricLabelByName(metrics, "wg_provider_id") require.NotNil(t, provider) - require.Equal(t, "my-nats", provider.GetValue()) + require.Equal(t, "default", provider.GetValue()) require.Equal(t, float64(1), metrics[0].Counter.GetValue()) }) diff --git a/router-tests/telemetry/messaging_event_metrics_test.go b/router-tests/telemetry/messaging_event_metrics_test.go index 476c34d6ee..7c421dd108 100644 --- a/router-tests/telemetry/messaging_event_metrics_test.go +++ b/router-tests/telemetry/messaging_event_metrics_test.go @@ -350,7 +350,7 @@ func TestFlakyEventMetrics(t *testing.T) { provider, hasProvider := attrs.Value(otelattrs.ProviderId) require.True(t, hasProvider) - require.Equal(t, "my-nats", provider.AsString()) + require.Equal(t, "default", provider.AsString()) errVal, hasErr := attrs.Value(otelattrs.MessagingError) require.True(t, hasErr) From c56b7c1a0cf1b1939ffedb622e6efde4268dd7bf Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 02:41:09 +0530 Subject: [PATCH 32/40] fix: tests --- .../telemetry/messaging_event_metrics_test.go | 70 +++++++++---------- router/core/plan_generator.go | 6 +- router/pkg/config/config.schema.json | 4 +- .../metric/messaging_event_metric_store.go | 20 +++--- router/pkg/otel/attributes.go | 10 +-- 5 files changed, 57 insertions(+), 53 deletions(-) diff --git a/router-tests/telemetry/messaging_event_metrics_test.go b/router-tests/telemetry/messaging_event_metrics_test.go index 7c421dd108..67b7acc72c 100644 --- a/router-tests/telemetry/messaging_event_metrics_test.go +++ b/router-tests/telemetry/messaging_event_metrics_test.go @@ -62,20 +62,20 @@ func TestFlakyEventMetrics(t *testing.T) { attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "produce", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "kafka", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "my-kafka", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -140,20 +140,20 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "receive", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "kafka", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "my-kafka", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -200,20 +200,20 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "publish", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "nats", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "my-nats", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -253,20 +253,20 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "request", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "nats", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "my-nats", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -339,20 +339,20 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "receive", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "nats", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "default", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -402,20 +402,20 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "publish", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "redis", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "my-redis", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) @@ -479,20 +479,20 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.MessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) require.Equal(t, "receive", operation.AsString()) - system, _ := attrs.Value(otelattrs.MessagingSystem) + system, _ := attrs.Value(otelattrs.WgMessagingSystem) require.Equal(t, "redis", system.AsString()) - destination, _ := attrs.Value(otelattrs.MessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) - provider, hasProvider := attrs.Value(otelattrs.ProviderId) + provider, hasProvider := attrs.Value(otelattrs.WgProviderId) require.True(t, hasProvider) require.Equal(t, "my-redis", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.MessagingError) + errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) require.True(t, hasErr) require.False(t, errVal.AsBool()) diff --git a/router/core/plan_generator.go b/router/core/plan_generator.go index 7a05ec8e7c..7f5119f823 100644 --- a/router/core/plan_generator.go +++ b/router/core/plan_generator.go @@ -7,6 +7,8 @@ import ( "net/http" "os" + "github.com/wundergraph/cosmo/router/pkg/metric" + log "github.com/jensneuse/abstractlogger" "github.com/wundergraph/graphql-go-tools/v2/pkg/ast" "github.com/wundergraph/graphql-go-tools/v2/pkg/astnormalization" @@ -253,7 +255,9 @@ func (pg *PlanGenerator) buildRouterConfig(configFilePath string) (*nodev1.Route } func (pg *PlanGenerator) loadConfiguration(routerConfig *nodev1.RouterConfig, logger *zap.Logger, maxDataSourceCollectorsConcurrency uint) error { - routerEngineConfig := RouterEngineConfiguration{} + routerEngineConfig := RouterEngineConfiguration{ + MessagingEventMetricStore: metric.NewNoopEventMetricStore(), + } natSources := map[string]*nats.ProviderAdapter{} kafkaSources := map[string]*kafka.ProviderAdapter{} for _, ds := range routerConfig.GetEngineConfig().GetDatasourceConfigurations() { diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 4df91c44fc..f68d53ab46 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -1176,10 +1176,10 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, - "event_metrics": { + "messaging_event_metrics": { "type": "boolean", "default": false, - "description": "Enable the collection of event metrics. The default value is false." + "description": "Enable the collection of messaging event metrics. The default value is false." }, "circuit_breaker": { "type": "boolean", diff --git a/router/pkg/metric/messaging_event_metric_store.go b/router/pkg/metric/messaging_event_metric_store.go index d797d35554..3ac0a621fc 100644 --- a/router/pkg/metric/messaging_event_metric_store.go +++ b/router/pkg/metric/messaging_event_metric_store.go @@ -87,15 +87,15 @@ func (e *MessagingEventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetri func (e *MessagingEventMetrics) Produce(ctx context.Context, event MessagingEvent) { attrs := []attribute.KeyValue{ - otel.MessagingOperationName.String(event.OperationName), - otel.MessagingSystem.String(string(event.MessagingSystem)), - otel.MessagingError.Bool(event.Error), + otel.WgMessagingOperationName.String(event.OperationName), + otel.WgMessagingSystem.String(string(event.MessagingSystem)), + otel.WgMessagingError.Bool(event.Error), } if event.ProviderId != "" { - attrs = append(attrs, otel.ProviderId.String(event.ProviderId)) + attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) } if event.DestinationName != "" { - attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) + attrs = append(attrs, otel.WgMessagingDestinationName.String(event.DestinationName)) } opt := e.withAttrs(attrs...) @@ -106,15 +106,15 @@ func (e *MessagingEventMetrics) Produce(ctx context.Context, event MessagingEven func (e *MessagingEventMetrics) Consume(ctx context.Context, event MessagingEvent) { attrs := []attribute.KeyValue{ - otel.MessagingOperationName.String(event.OperationName), - otel.MessagingSystem.String(string(event.MessagingSystem)), - otel.MessagingError.Bool(event.Error), + otel.WgMessagingOperationName.String(event.OperationName), + otel.WgMessagingSystem.String(string(event.MessagingSystem)), + otel.WgMessagingError.Bool(event.Error), } if event.ProviderId != "" { - attrs = append(attrs, otel.ProviderId.String(event.ProviderId)) + attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) } if event.DestinationName != "" { - attrs = append(attrs, otel.MessagingDestinationName.String(event.DestinationName)) + attrs = append(attrs, otel.WgMessagingDestinationName.String(event.DestinationName)) } opt := e.withAttrs(attrs...) diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index 3c73c3f1fa..ee173dd923 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -62,11 +62,11 @@ const ( // Messaging metrics attributes const ( - MessagingOperationName = attribute.Key("wg.messaging.operation.name") - MessagingSystem = attribute.Key("wg.messaging.system") - MessagingError = attribute.Key("wg.messaging.error") - MessagingDestinationName = attribute.Key("wg.messaging.destination.name") - ProviderId = attribute.Key("wg.provider.id") + WgMessagingOperationName = attribute.Key("wg.messaging.operation.name") + WgMessagingSystem = attribute.Key("wg.messaging.system") + WgMessagingError = attribute.Key("wg.messaging.error") + WgMessagingDestinationName = attribute.Key("wg.messaging.destination.name") + WgProviderId = attribute.Key("wg.provider.id") ) const ( From e945f7d34e2a6729f2edef6b07e92163d05e9fc4 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 02:54:17 +0530 Subject: [PATCH 33/40] fix: tests --- demo/pkg/subgraphs/subgraphs.go | 4 +-- ...prometheus_messaging_event_metrics_test.go | 14 +++++----- .../telemetry/messaging_event_metrics_test.go | 28 +++++++++---------- router-tests/testenv/testenv.go | 2 +- .../metric/messaging_event_measurements.go | 4 +-- .../oltp_messaging_event_metric_store.go | 2 +- .../prom_messaging_event_metric_store.go | 2 +- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index 7e2c750b41..6fa8ecbacf 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -6,8 +6,6 @@ import ( "encoding/json" "errors" "fmt" - rmetric "github.com/wundergraph/cosmo/router/pkg/metric" - "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" "io" "log" "net/http" @@ -23,6 +21,8 @@ import ( "github.com/99designs/gqlgen/graphql/playground" "github.com/nats-io/nats.go" "github.com/nats-io/nats.go/jetstream" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" + "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" natsPubsub "github.com/wundergraph/cosmo/router/pkg/pubsub/nats" "golang.org/x/sync/errgroup" diff --git a/router-tests/prometheus_messaging_event_metrics_test.go b/router-tests/prometheus_messaging_event_metrics_test.go index 566aa49efa..07b5c1460e 100644 --- a/router-tests/prometheus_messaging_event_metrics_test.go +++ b/router-tests/prometheus_messaging_event_metrics_test.go @@ -51,7 +51,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -124,7 +124,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -180,7 +180,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -228,7 +228,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -308,7 +308,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_received_messages_total") metrics := family.GetMetric() errLabel := findMetricLabelByName(metrics, "wg_messaging_error") @@ -365,7 +365,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -437,7 +437,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") + family := findMetricFamilyByName(mf, "messaging_events_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) diff --git a/router-tests/telemetry/messaging_event_metrics_test.go b/router-tests/telemetry/messaging_event_metrics_test.go index 67b7acc72c..f5982678f2 100644 --- a/router-tests/telemetry/messaging_event_metrics_test.go +++ b/router-tests/telemetry/messaging_event_metrics_test.go @@ -52,9 +52,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -130,9 +130,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -191,9 +191,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -244,9 +244,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -329,9 +329,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -392,9 +392,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -469,9 +469,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.event") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.events.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 89f9d9aa2e..3d45434518 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -10,7 +10,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" "io" "log" "math/rand" @@ -64,6 +63,7 @@ import ( "github.com/wundergraph/cosmo/router/pkg/controlplane/configpoller" "github.com/wundergraph/cosmo/router/pkg/logging" rmetric "github.com/wundergraph/cosmo/router/pkg/metric" + "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" pubsubNats "github.com/wundergraph/cosmo/router/pkg/pubsub/nats" ) diff --git a/router/pkg/metric/messaging_event_measurements.go b/router/pkg/metric/messaging_event_measurements.go index ead75303ea..e521e88642 100644 --- a/router/pkg/metric/messaging_event_measurements.go +++ b/router/pkg/metric/messaging_event_measurements.go @@ -7,8 +7,8 @@ import ( ) const ( - messagingSentMessages = "messaging.event.sent.messages" - messagingConsumedMessages = "messaging.event.received.messages" + messagingSentMessages = "messaging.events.sent.messages" + messagingConsumedMessages = "messaging.events.received.messages" ) var ( diff --git a/router/pkg/metric/oltp_messaging_event_metric_store.go b/router/pkg/metric/oltp_messaging_event_metric_store.go index 5358030271..222ee8f3aa 100644 --- a/router/pkg/metric/oltp_messaging_event_metric_store.go +++ b/router/pkg/metric/oltp_messaging_event_metric_store.go @@ -9,7 +9,7 @@ import ( ) const ( - cosmoRouterEventMeterName = "cosmo.router.messaging.event" + cosmoRouterEventMeterName = "cosmo.router.messaging.events" cosmoRouterEventMeterVersion = "0.0.1" ) diff --git a/router/pkg/metric/prom_messaging_event_metric_store.go b/router/pkg/metric/prom_messaging_event_metric_store.go index 544ab82398..355f7dc351 100644 --- a/router/pkg/metric/prom_messaging_event_metric_store.go +++ b/router/pkg/metric/prom_messaging_event_metric_store.go @@ -9,7 +9,7 @@ import ( ) const ( - cosmoRouterEventPromMeterName = "cosmo.router.messaging.event.prometheus" + cosmoRouterEventPromMeterName = "cosmo.router.messaging.events.prometheus" cosmoRouterEventPromMeterVersion = "0.0.1" ) From 87c67fc6f8438103c1abe53d071b0364bb9a47fb Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 14:48:28 +0530 Subject: [PATCH 34/40] fix: review comments --- ...prometheus_messaging_event_metrics_test.go | 57 ++++++++++--------- .../telemetry/messaging_event_metrics_test.go | 49 +++++++--------- .../metric/messaging_event_measurements.go | 4 +- .../metric/messaging_event_metric_store.go | 10 +++- router/pkg/otel/attributes.go | 1 + router/pkg/pubsub/kafka/adapter.go | 2 +- router/pkg/pubsub/nats/adapter.go | 13 ++++- router/pkg/pubsub/redis/adapter.go | 15 ++++- 8 files changed, 84 insertions(+), 67 deletions(-) diff --git a/router-tests/prometheus_messaging_event_metrics_test.go b/router-tests/prometheus_messaging_event_metrics_test.go index 07b5c1460e..e3580d1f3d 100644 --- a/router-tests/prometheus_messaging_event_metrics_test.go +++ b/router-tests/prometheus_messaging_event_metrics_test.go @@ -51,15 +51,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "produce", operation.GetValue()) - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "kafka", system.GetValue()) @@ -124,15 +124,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_received_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "kafka", system.GetValue()) @@ -180,15 +180,16 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "publish", operation.GetValue()) - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) + system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "nats", system.GetValue()) @@ -228,15 +229,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "request", operation.GetValue()) - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "nats", system.GetValue()) @@ -308,12 +309,12 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_received_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") metrics := family.GetMetric() - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) @@ -365,15 +366,15 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_sent_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "publish", operation.GetValue()) - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) system := findMetricLabelByName(metrics, "wg_messaging_system") require.Equal(t, "redis", system.GetValue()) @@ -437,13 +438,13 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_events_received_messages_total") + family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - errLabel := findMetricLabelByName(metrics, "wg_messaging_error") - require.NotNil(t, errLabel) - require.Equal(t, "false", errLabel.GetValue()) + errLabel := findMetricLabelByName(metrics, "wg_error_type") + require.Nil(t, errLabel) + operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") require.Equal(t, "receive", operation.GetValue()) diff --git a/router-tests/telemetry/messaging_event_metrics_test.go b/router-tests/telemetry/messaging_event_metrics_test.go index f5982678f2..402023c842 100644 --- a/router-tests/telemetry/messaging_event_metrics_test.go +++ b/router-tests/telemetry/messaging_event_metrics_test.go @@ -54,7 +54,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -75,9 +75,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "my-kafka", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -132,7 +131,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.received.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -153,9 +152,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "my-kafka", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -193,7 +191,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -213,9 +211,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "my-nats", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -246,7 +243,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -266,9 +263,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "my-nats", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -331,7 +327,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.received.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -352,9 +348,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "default", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) @@ -394,7 +389,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.sent.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -415,9 +410,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "my-redis", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(2), sum.DataPoints[0].Value) }) @@ -471,7 +465,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.events.received.messages") + metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -492,9 +486,8 @@ func TestFlakyEventMetrics(t *testing.T) { require.True(t, hasProvider) require.Equal(t, "my-redis", provider.AsString()) - errVal, hasErr := attrs.Value(otelattrs.WgMessagingError) - require.True(t, hasErr) - require.False(t, errVal.AsBool()) + _, hasErr := attrs.Value(otelattrs.WgErrorType) + require.False(t, hasErr) require.Equal(t, int64(1), sum.DataPoints[0].Value) }) diff --git a/router/pkg/metric/messaging_event_measurements.go b/router/pkg/metric/messaging_event_measurements.go index e521e88642..ead75303ea 100644 --- a/router/pkg/metric/messaging_event_measurements.go +++ b/router/pkg/metric/messaging_event_measurements.go @@ -7,8 +7,8 @@ import ( ) const ( - messagingSentMessages = "messaging.events.sent.messages" - messagingConsumedMessages = "messaging.events.received.messages" + messagingSentMessages = "messaging.event.sent.messages" + messagingConsumedMessages = "messaging.event.received.messages" ) var ( diff --git a/router/pkg/metric/messaging_event_metric_store.go b/router/pkg/metric/messaging_event_metric_store.go index 3ac0a621fc..807095f66b 100644 --- a/router/pkg/metric/messaging_event_metric_store.go +++ b/router/pkg/metric/messaging_event_metric_store.go @@ -26,7 +26,7 @@ type MessagingEvent struct { ProviderId string // The id of the provider defined in the configuration OperationName string // The operation name that is specific to the messaging system MessagingSystem ProviderType // The messaging system type that are supported - Error bool // Indicates if the operation resulted in an error or not (true or false) + ErrorType string // Optional error type, e.g., "publish_error" or "receive_error". If empty, the attribute is not set DestinationName string // The name of the destination queue / topic / channel } @@ -89,7 +89,9 @@ func (e *MessagingEventMetrics) Produce(ctx context.Context, event MessagingEven attrs := []attribute.KeyValue{ otel.WgMessagingOperationName.String(event.OperationName), otel.WgMessagingSystem.String(string(event.MessagingSystem)), - otel.WgMessagingError.Bool(event.Error), + } + if event.ErrorType != "" { + attrs = append(attrs, otel.WgErrorType.String(event.ErrorType)) } if event.ProviderId != "" { attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) @@ -108,7 +110,9 @@ func (e *MessagingEventMetrics) Consume(ctx context.Context, event MessagingEven attrs := []attribute.KeyValue{ otel.WgMessagingOperationName.String(event.OperationName), otel.WgMessagingSystem.String(string(event.MessagingSystem)), - otel.WgMessagingError.Bool(event.Error), + } + if event.ErrorType != "" { + attrs = append(attrs, otel.WgErrorType.String(event.ErrorType)) } if event.ProviderId != "" { attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index ee173dd923..156792f5c3 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -67,6 +67,7 @@ const ( WgMessagingError = attribute.Key("wg.messaging.error") WgMessagingDestinationName = attribute.Key("wg.messaging.destination.name") WgProviderId = attribute.Key("wg.provider.id") + WgErrorType = attribute.Key("wg.error.type") ) const ( diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 540b461d83..89a66262d2 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -201,7 +201,7 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu ProviderId: event.ProviderID, OperationName: kafkaProduce, MessagingSystem: metric.ProviderTypeKafka, - Error: true, + ErrorType: "publish_error", DestinationName: event.Topic, }) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index edaacd1b92..3e2b118a14 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -238,7 +238,7 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv ProviderId: event.ProviderID, OperationName: natsPublish, MessagingSystem: metric.ProviderTypeNats, - Error: true, + ErrorType: "publish_error", DestinationName: event.Subject, }) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) @@ -274,7 +274,7 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv ProviderId: event.ProviderID, OperationName: natsRequest, MessagingSystem: metric.ProviderTypeNats, - Error: true, + ErrorType: "publish_error", DestinationName: event.Subject, }) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) @@ -358,6 +358,13 @@ func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats logger = zap.NewNop() } + var store metric.MessagingEventMetricStore + if providerOpts.MessagingEventMetricStore != nil { + store = providerOpts.MessagingEventMetricStore + } else { + store = metric.NewNoopEventMetricStore() + } + return &ProviderAdapter{ ctx: ctx, logger: logger.With(zap.String("pubsub", "nats")), @@ -367,6 +374,6 @@ func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats url: url, opts: opts, flushTimeout: 10 * time.Second, - messagingEventMetricStore: providerOpts.MessagingEventMetricStore, + messagingEventMetricStore: store, }, nil } diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index af63161499..e2fffd0661 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -32,13 +32,24 @@ type Adapter interface { func NewProviderAdapter(ctx context.Context, logger *zap.Logger, urls []string, clusterEnabled bool, opts datasource.ProviderOpts) Adapter { ctx, cancel := context.WithCancel(ctx) + if logger == nil { + logger = zap.NewNop() + } + + var store metric.MessagingEventMetricStore + if opts.MessagingEventMetricStore != nil { + store = opts.MessagingEventMetricStore + } else { + store = metric.NewNoopEventMetricStore() + } + return &ProviderAdapter{ ctx: ctx, cancel: cancel, logger: logger, urls: urls, clusterEnabled: clusterEnabled, - messagingEventMetricStore: opts.MessagingEventMetricStore, + messagingEventMetricStore: store, } } @@ -164,7 +175,7 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu ProviderId: event.ProviderID, OperationName: redisPublish, MessagingSystem: metric.ProviderTypeRedis, - Error: true, + ErrorType: "publish_error", DestinationName: event.Channel, }) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) From eed45fc7091c079818346f9d18a43cd00464e146 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 14:49:47 +0530 Subject: [PATCH 35/40] fix: error entry --- router/pkg/pubsub/nats/adapter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index 3e2b118a14..059a4e9ee6 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -274,7 +274,7 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv ProviderId: event.ProviderID, OperationName: natsRequest, MessagingSystem: metric.ProviderTypeNats, - ErrorType: "publish_error", + ErrorType: "request_error", DestinationName: event.Subject, }) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) From 430c421654081938c90ace78c112c1bad7535f29 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 18:30:53 +0530 Subject: [PATCH 36/40] refactor: naming to use stream metrics --- demo/pkg/subgraphs/subgraphs.go | 4 +- ...t.go => prometheus_stream_metrics_test.go} | 56 +++---- ...metrics_test.go => stream_metrics_test.go} | 70 ++++---- router-tests/testenv/testenv.go | 22 +-- router/core/factoryresolver.go | 15 +- router/core/graph_server.go | 26 +-- router/core/plan_generator.go | 4 +- router/core/router.go | 20 +-- router/pkg/config/config.go | 42 ++--- router/pkg/config/config.schema.json | 8 +- .../pkg/config/testdata/config_defaults.json | 4 +- router/pkg/config/testdata/config_full.json | 4 +- router/pkg/metric/config.go | 8 +- .../metric/messaging_event_metric_store.go | 153 ------------------ router/pkg/metric/noop_event_metrics.go | 15 -- router/pkg/metric/noop_stream_metrics.go | 15 ++ .../oltp_messaging_event_metric_store.go | 52 ------ router/pkg/metric/oltp_stream_metric_store.go | 52 ++++++ ...c_store.go => prom_stream_metric_store.go} | 16 +- ...measurements.go => stream_measurements.go} | 10 +- router/pkg/metric/stream_metric_store.go | 153 ++++++++++++++++++ router/pkg/otel/attributes.go | 11 +- router/pkg/pubsub/datasource/provider.go | 3 +- router/pkg/pubsub/kafka/adapter.go | 66 ++++---- router/pkg/pubsub/nats/adapter.go | 112 ++++++------- router/pkg/pubsub/pubsub.go | 11 +- router/pkg/pubsub/pubsub_test.go | 8 +- router/pkg/pubsub/redis/adapter.go | 68 ++++---- 28 files changed, 515 insertions(+), 513 deletions(-) rename router-tests/{prometheus_messaging_event_metrics_test.go => prometheus_stream_metrics_test.go} (87%) rename router-tests/telemetry/{messaging_event_metrics_test.go => stream_metrics_test.go} (88%) delete mode 100644 router/pkg/metric/messaging_event_metric_store.go delete mode 100644 router/pkg/metric/noop_event_metrics.go create mode 100644 router/pkg/metric/noop_stream_metrics.go delete mode 100644 router/pkg/metric/oltp_messaging_event_metric_store.go create mode 100644 router/pkg/metric/oltp_stream_metric_store.go rename router/pkg/metric/{prom_messaging_event_metric_store.go => prom_stream_metric_store.go} (56%) rename router/pkg/metric/{messaging_event_measurements.go => stream_measurements.go} (71%) create mode 100644 router/pkg/metric/stream_metric_store.go diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index 6fa8ecbacf..323fb46292 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -213,7 +213,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID := map[string]natsPubsub.Adapter{} defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ - MessagingEventMetricStore: rmetric.NewNoopEventMetricStore(), + StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) if err != nil { return nil, fmt.Errorf("failed to create default nats adapter: %w", err) @@ -221,7 +221,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID["default"] = defaultAdapter myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ - MessagingEventMetricStore: rmetric.NewNoopEventMetricStore(), + StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) if err != nil { return nil, fmt.Errorf("failed to create my-nats adapter: %w", err) diff --git a/router-tests/prometheus_messaging_event_metrics_test.go b/router-tests/prometheus_stream_metrics_test.go similarity index 87% rename from router-tests/prometheus_messaging_event_metrics_test.go rename to router-tests/prometheus_stream_metrics_test.go index e3580d1f3d..3d8db02830 100644 --- a/router-tests/prometheus_messaging_event_metrics_test.go +++ b/router-tests/prometheus_stream_metrics_test.go @@ -51,20 +51,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "produce", operation.GetValue()) errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "kafka", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) provider := findMetricLabelByName(metrics, "wg_provider_id") @@ -124,20 +124,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") + family := findMetricFamilyByName(mf, "streams_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "receive", operation.GetValue()) errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "kafka", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated")) provider := findMetricLabelByName(metrics, "wg_provider_id") @@ -180,20 +180,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "publish", operation.GetValue()) errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "nats", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyNats.12")) provider := findMetricLabelByName(metrics, "wg_provider_id") @@ -229,20 +229,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "request", operation.GetValue()) errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "nats", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "getEmployeeMyNats.12")) provider := findMetricLabelByName(metrics, "wg_provider_id") @@ -309,19 +309,19 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") + family := findMetricFamilyByName(mf, "streams_received_messages_total") metrics := family.GetMetric() errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "receive", operation.GetValue()) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "nats", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdated.3")) provider := findMetricLabelByName(metrics, "wg_provider_id") @@ -366,20 +366,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_sent_messages_total") + family := findMetricFamilyByName(mf, "streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "publish", operation.GetValue()) errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "redis", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) provider := findMetricLabelByName(metrics, "wg_provider_id") @@ -438,20 +438,20 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "messaging_event_received_messages_total") + family := findMetricFamilyByName(mf, "streams_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) errLabel := findMetricLabelByName(metrics, "wg_error_type") require.Nil(t, errLabel) - operation := findMetricLabelByName(metrics, "wg_messaging_operation_name") + operation := findMetricLabelByName(metrics, "wg_stream_operation_name") require.Equal(t, "receive", operation.GetValue()) - system := findMetricLabelByName(metrics, "wg_messaging_system") + system := findMetricLabelByName(metrics, "wg_provider_type") require.Equal(t, "redis", system.GetValue()) - destination := findMetricLabelByName(metrics, "wg_messaging_destination_name") + destination := findMetricLabelByName(metrics, "wg_destination_name") require.True(t, strings.HasSuffix(destination.GetValue(), "employeeUpdatedMyRedis")) provider := findMetricLabelByName(metrics, "wg_provider_id") diff --git a/router-tests/telemetry/messaging_event_metrics_test.go b/router-tests/telemetry/stream_metrics_test.go similarity index 88% rename from router-tests/telemetry/messaging_event_metrics_test.go rename to router-tests/telemetry/stream_metrics_test.go index 402023c842..580131bdc3 100644 --- a/router-tests/telemetry/messaging_event_metrics_test.go +++ b/router-tests/telemetry/stream_metrics_test.go @@ -52,9 +52,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -62,13 +62,13 @@ func TestFlakyEventMetrics(t *testing.T) { attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "produce", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "kafka", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) @@ -129,9 +129,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") + metricEntry := integration.GetMetricByName(scope, "streams.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -139,13 +139,13 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "receive", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "kafka", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) @@ -189,22 +189,22 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "publish", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "nats", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyNats.12")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) @@ -241,22 +241,22 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "request", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "nats", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "getEmployeeMyNats.12")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) @@ -325,9 +325,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") + metricEntry := integration.GetMetricByName(scope, "streams.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -335,13 +335,13 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "receive", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "nats", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdated.3")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) @@ -387,9 +387,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.sent.messages") + metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -397,13 +397,13 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "publish", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "redis", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) @@ -463,9 +463,9 @@ func TestFlakyEventMetrics(t *testing.T) { rm := metricdata.ResourceMetrics{} require.NoError(t, metricReader.Collect(context.Background(), &rm)) - scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.messaging.events") + scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "messaging.event.received.messages") + metricEntry := integration.GetMetricByName(scope, "streams.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -473,13 +473,13 @@ func TestFlakyEventMetrics(t *testing.T) { require.Len(t, sum.DataPoints, 1) attrs := sum.DataPoints[0].Attributes - operation, _ := attrs.Value(otelattrs.WgMessagingOperationName) + operation, _ := attrs.Value(otelattrs.WgStreamOperationName) require.Equal(t, "receive", operation.AsString()) - system, _ := attrs.Value(otelattrs.WgMessagingSystem) + system, _ := attrs.Value(otelattrs.WgProviderType) require.Equal(t, "redis", system.AsString()) - destination, _ := attrs.Value(otelattrs.WgMessagingDestinationName) + destination, _ := attrs.Value(otelattrs.WgDestinationName) require.True(t, strings.HasSuffix(destination.AsString(), "employeeUpdatedMyRedis")) provider, hasProvider := attrs.Value(otelattrs.WgProviderId) diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 3d45434518..0f5e4beb3f 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -1504,11 +1504,11 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node EngineStats: rmetric.EngineStatsConfig{ Subscription: testConfig.MetricOptions.PrometheusEngineStatsOptions.EnableSubscription, }, - CircuitBreaker: testConfig.MetricOptions.EnablePrometheusCircuitBreakerMetrics, - ExcludeMetrics: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetrics, - ExcludeMetricLabels: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetricLabels, - MessagingEventMetrics: testConfig.MetricOptions.EnablePrometheusEventMetrics, - ExcludeScopeInfo: testConfig.MetricOptions.MetricExclusions.ExcludeScopeInfo, + CircuitBreaker: testConfig.MetricOptions.EnablePrometheusCircuitBreakerMetrics, + ExcludeMetrics: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetrics, + ExcludeMetricLabels: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetricLabels, + Streams: testConfig.MetricOptions.EnablePrometheusEventMetrics, + ExcludeScopeInfo: testConfig.MetricOptions.MetricExclusions.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: testConfig.MetricOptions.PrometheusSchemaFieldUsage.Enabled, IncludeOperationSha: testConfig.MetricOptions.PrometheusSchemaFieldUsage.IncludeOperationSha, @@ -1527,11 +1527,11 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node Enabled: true, }, OTLP: config.MetricsOTLP{ - Enabled: true, - RouterRuntime: testConfig.MetricOptions.EnableRuntimeMetrics, - GraphqlCache: testConfig.MetricOptions.EnableOTLPRouterCache, - MessagingEventMetrics: testConfig.MetricOptions.EnableOTLPEventMetrics, - ConnectionStats: testConfig.MetricOptions.EnableOTLPConnectionMetrics, + Enabled: true, + RouterRuntime: testConfig.MetricOptions.EnableRuntimeMetrics, + GraphqlCache: testConfig.MetricOptions.EnableOTLPRouterCache, + Streams: testConfig.MetricOptions.EnableOTLPEventMetrics, + ConnectionStats: testConfig.MetricOptions.EnableOTLPConnectionMetrics, EngineStats: config.EngineStats{ Subscriptions: testConfig.MetricOptions.OTLPEngineStatsOptions.EnableSubscription, }, @@ -2825,7 +2825,7 @@ func subgraphOptions(ctx context.Context, t testing.TB, logger *zap.Logger, nats natsPubSubByProviderID := make(map[string]pubsubNats.Adapter, len(DemoNatsProviders)) for _, sourceName := range DemoNatsProviders { adapter, err := pubsubNats.NewAdapter(ctx, logger, natsData.Params[0].Url, natsData.Params[0].Opts, "hostname", "listenaddr", datasource.ProviderOpts{ - MessagingEventMetricStore: rmetric.NewNoopEventMetricStore(), + StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) require.NoError(t, err) require.NoError(t, adapter.Startup(ctx)) diff --git a/router/core/factoryresolver.go b/router/core/factoryresolver.go index 96226c3eee..57a3bd172f 100644 --- a/router/core/factoryresolver.go +++ b/router/core/factoryresolver.go @@ -4,11 +4,12 @@ import ( "context" "encoding/json" "fmt" - rmetric "github.com/wundergraph/cosmo/router/pkg/metric" "net/http" "net/url" "slices" + rmetric "github.com/wundergraph/cosmo/router/pkg/metric" + "github.com/buger/jsonparser" "github.com/wundergraph/cosmo/router/pkg/grpcconnector" "github.com/wundergraph/cosmo/router/pkg/pubsub" @@ -208,11 +209,11 @@ func (l *Loader) LoadInternedString(engineConfig *nodev1.EngineConfiguration, st } type RouterEngineConfiguration struct { - Execution config.EngineExecutionConfiguration - Headers *config.HeaderRules - Events config.EventsConfiguration - SubgraphErrorPropagation config.SubgraphErrorPropagationConfiguration - MessagingEventMetricStore rmetric.MessagingEventMetricStore + Execution config.EngineExecutionConfiguration + Headers *config.HeaderRules + Events config.EventsConfiguration + SubgraphErrorPropagation config.SubgraphErrorPropagationConfiguration + StreamMetricStore rmetric.StreamMetricStore } func mapProtoFilterToPlanFilter(input *nodev1.SubscriptionFilterCondition, output *plan.SubscriptionFilterCondition) *plan.SubscriptionFilterCondition { @@ -472,7 +473,7 @@ func (l *Loader) Load(engineConfig *nodev1.EngineConfiguration, subgraphs []*nod factoryProviders, factoryDataSources, err := pubsub.BuildProvidersAndDataSources( l.ctx, routerEngineConfig.Events, - routerEngineConfig.MessagingEventMetricStore, + routerEngineConfig.StreamMetricStore, l.logger, pubSubDS, l.resolver.InstanceData().HostName, diff --git a/router/core/graph_server.go b/router/core/graph_server.go index 2276be69ce..020ed4fd7a 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -518,7 +518,7 @@ type graphMux struct { metricStore rmetric.Store prometheusCacheMetrics *rmetric.CacheMetrics otelCacheMetrics *rmetric.CacheMetrics - messagingEventMetricStore rmetric.MessagingEventMetricStore + streamMetricStore rmetric.StreamMetricStore } // buildOperationCaches creates the caches for the graph mux. @@ -760,8 +760,8 @@ func (s *graphMux) Shutdown(ctx context.Context) error { } } - if s.messagingEventMetricStore != nil { - if aErr := s.messagingEventMetricStore.Shutdown(ctx); aErr != nil { + if s.streamMetricStore != nil { + if aErr := s.streamMetricStore.Shutdown(ctx); aErr != nil { err = errors.Join(err, aErr) } } @@ -781,8 +781,8 @@ func (s *graphServer) buildGraphMux( opts BuildGraphMuxOptions, ) (*graphMux, error) { gm := &graphMux{ - metricStore: rmetric.NewNoopMetrics(), - messagingEventMetricStore: rmetric.NewNoopEventMetricStore(), + metricStore: rmetric.NewNoopMetrics(), + streamMetricStore: rmetric.NewNoopStreamMetricStore(), } httpRouter := chi.NewRouter() @@ -880,8 +880,8 @@ func (s *graphServer) buildGraphMux( } } - if s.metricConfig.OpenTelemetry.MessagingEventMetrics || s.metricConfig.Prometheus.MessagingEventMetrics { - store, err := rmetric.NewMessagingEventMetricStore( + if s.metricConfig.OpenTelemetry.Stream || s.metricConfig.Prometheus.Streams { + store, err := rmetric.NewStreamMetricStore( s.logger, baseMetricAttributes, s.otlpMeterProvider, @@ -890,7 +890,7 @@ func (s *graphServer) buildGraphMux( if err != nil { return nil, err } - gm.messagingEventMetricStore = store + gm.streamMetricStore = store } subgraphs, err := configureSubgraphOverwrites( @@ -1132,11 +1132,11 @@ func (s *graphServer) buildGraphMux( } routerEngineConfig := &RouterEngineConfiguration{ - Execution: s.engineExecutionConfiguration, - Headers: s.headerRules, - Events: s.eventsConfig, - SubgraphErrorPropagation: s.subgraphErrorPropagation, - MessagingEventMetricStore: gm.messagingEventMetricStore, + Execution: s.engineExecutionConfiguration, + Headers: s.headerRules, + Events: s.eventsConfig, + SubgraphErrorPropagation: s.subgraphErrorPropagation, + StreamMetricStore: gm.streamMetricStore, } // map[string]*http.Transport cannot be coerced into map[string]http.RoundTripper, unfortunately diff --git a/router/core/plan_generator.go b/router/core/plan_generator.go index 7f5119f823..3297a4a3bf 100644 --- a/router/core/plan_generator.go +++ b/router/core/plan_generator.go @@ -256,7 +256,7 @@ func (pg *PlanGenerator) buildRouterConfig(configFilePath string) (*nodev1.Route func (pg *PlanGenerator) loadConfiguration(routerConfig *nodev1.RouterConfig, logger *zap.Logger, maxDataSourceCollectorsConcurrency uint) error { routerEngineConfig := RouterEngineConfiguration{ - MessagingEventMetricStore: metric.NewNoopEventMetricStore(), + StreamMetricStore: metric.NewNoopStreamMetricStore(), } natSources := map[string]*nats.ProviderAdapter{} kafkaSources := map[string]*kafka.ProviderAdapter{} @@ -391,4 +391,4 @@ func findOperationName(operation *ast.Document) (operationName []byte) { } } return nil -} \ No newline at end of file +} diff --git a/router/core/router.go b/router/core/router.go index 3c5697b7cf..4bb749fd64 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -2238,11 +2238,11 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { EngineStats: rmetric.EngineStatsConfig{ Subscription: cfg.Metrics.OTLP.EngineStats.Subscriptions, }, - Exporters: openTelemetryExporters, - CircuitBreaker: cfg.Metrics.OTLP.CircuitBreaker, - MessagingEventMetrics: cfg.Metrics.OTLP.MessagingEventMetrics, - ExcludeMetrics: cfg.Metrics.OTLP.ExcludeMetrics, - ExcludeMetricLabels: cfg.Metrics.OTLP.ExcludeMetricLabels, + Exporters: openTelemetryExporters, + CircuitBreaker: cfg.Metrics.OTLP.CircuitBreaker, + Stream: cfg.Metrics.OTLP.Streams, + ExcludeMetrics: cfg.Metrics.OTLP.ExcludeMetrics, + ExcludeMetricLabels: cfg.Metrics.OTLP.ExcludeMetricLabels, }, Prometheus: rmetric.PrometheusConfig{ Enabled: cfg.Metrics.Prometheus.Enabled, @@ -2253,11 +2253,11 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { EngineStats: rmetric.EngineStatsConfig{ Subscription: cfg.Metrics.Prometheus.EngineStats.Subscriptions, }, - CircuitBreaker: cfg.Metrics.Prometheus.CircuitBreaker, - ExcludeMetrics: cfg.Metrics.Prometheus.ExcludeMetrics, - ExcludeMetricLabels: cfg.Metrics.Prometheus.ExcludeMetricLabels, - MessagingEventMetrics: cfg.Metrics.Prometheus.MessagingEventMetrics, - ExcludeScopeInfo: cfg.Metrics.Prometheus.ExcludeScopeInfo, + CircuitBreaker: cfg.Metrics.Prometheus.CircuitBreaker, + ExcludeMetrics: cfg.Metrics.Prometheus.ExcludeMetrics, + ExcludeMetricLabels: cfg.Metrics.Prometheus.ExcludeMetricLabels, + Streams: cfg.Metrics.Prometheus.Streams, + ExcludeScopeInfo: cfg.Metrics.Prometheus.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: cfg.Metrics.Prometheus.SchemaFieldUsage.Enabled, IncludeOperationSha: cfg.Metrics.Prometheus.SchemaFieldUsage.IncludeOperationSha, diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index 9e4cb4715a..cfd10de0c6 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -95,17 +95,17 @@ type EngineStats struct { } type Prometheus struct { - Enabled bool `yaml:"enabled" envDefault:"true" env:"PROMETHEUS_ENABLED"` - Path string `yaml:"path" envDefault:"/metrics" env:"PROMETHEUS_HTTP_PATH"` - ListenAddr string `yaml:"listen_addr" envDefault:"127.0.0.1:8088" env:"PROMETHEUS_LISTEN_ADDR"` - GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"PROMETHEUS_GRAPHQL_CACHE"` - ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"PROMETHEUS_CONNECTION_STATS"` - MessagingEventMetrics bool `yaml:"messaging_event_metrics" envDefault:"false" env:"PROMETHEUS_MESSAGING_EVENT_METRICS"` - EngineStats EngineStats `yaml:"engine_stats" envPrefix:"PROMETHEUS_"` - CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"PROMETHEUS_CIRCUIT_BREAKER"` - ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"PROMETHEUS_EXCLUDE_METRICS"` - ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"PROMETHEUS_EXCLUDE_METRIC_LABELS"` - ExcludeScopeInfo bool `yaml:"exclude_scope_info" envDefault:"false" env:"PROMETHEUS_EXCLUDE_SCOPE_INFO"` + Enabled bool `yaml:"enabled" envDefault:"true" env:"PROMETHEUS_ENABLED"` + Path string `yaml:"path" envDefault:"/metrics" env:"PROMETHEUS_HTTP_PATH"` + ListenAddr string `yaml:"listen_addr" envDefault:"127.0.0.1:8088" env:"PROMETHEUS_LISTEN_ADDR"` + GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"PROMETHEUS_GRAPHQL_CACHE"` + ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"PROMETHEUS_CONNECTION_STATS"` + Streams bool `yaml:"streams" envDefault:"false" env:"PROMETHEUS_STREAM"` + EngineStats EngineStats `yaml:"engine_stats" envPrefix:"PROMETHEUS_"` + CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"PROMETHEUS_CIRCUIT_BREAKER"` + ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"PROMETHEUS_EXCLUDE_METRICS"` + ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"PROMETHEUS_EXCLUDE_METRIC_LABELS"` + ExcludeScopeInfo bool `yaml:"exclude_scope_info" envDefault:"false" env:"PROMETHEUS_EXCLUDE_SCOPE_INFO"` SchemaFieldUsage PrometheusSchemaFieldUsage `yaml:"schema_usage" envPrefix:"PROMETHEUS_SCHEMA_FIELD_USAGE_"` } @@ -132,16 +132,16 @@ type Metrics struct { } type MetricsOTLP struct { - Enabled bool `yaml:"enabled" envDefault:"true" env:"METRICS_OTLP_ENABLED"` - RouterRuntime bool `yaml:"router_runtime" envDefault:"true" env:"METRICS_OTLP_ROUTER_RUNTIME"` - GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"METRICS_OTLP_GRAPHQL_CACHE"` - ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"METRICS_OTLP_CONNECTION_STATS"` - EngineStats EngineStats `yaml:"engine_stats" envPrefix:"METRICS_OTLP_"` - CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"METRICS_OTLP_CIRCUIT_BREAKER"` - MessagingEventMetrics bool `yaml:"messaging_event_metrics" envDefault:"false" env:"METRICS_OTLP_MESSAGING_EVENT_METRICS"` - ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"METRICS_OTLP_EXCLUDE_METRICS"` - ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"METRICS_OTLP_EXCLUDE_METRIC_LABELS"` - Exporters []MetricsOTLPExporter `yaml:"exporters"` + Enabled bool `yaml:"enabled" envDefault:"true" env:"METRICS_OTLP_ENABLED"` + RouterRuntime bool `yaml:"router_runtime" envDefault:"true" env:"METRICS_OTLP_ROUTER_RUNTIME"` + GraphqlCache bool `yaml:"graphql_cache" envDefault:"false" env:"METRICS_OTLP_GRAPHQL_CACHE"` + ConnectionStats bool `yaml:"connection_stats" envDefault:"false" env:"METRICS_OTLP_CONNECTION_STATS"` + EngineStats EngineStats `yaml:"engine_stats" envPrefix:"METRICS_OTLP_"` + CircuitBreaker bool `yaml:"circuit_breaker" envDefault:"false" env:"METRICS_OTLP_CIRCUIT_BREAKER"` + Streams bool `yaml:"streams" envDefault:"false" env:"METRICS_OTLP_STREAM"` + ExcludeMetrics RegExArray `yaml:"exclude_metrics,omitempty" env:"METRICS_OTLP_EXCLUDE_METRICS"` + ExcludeMetricLabels RegExArray `yaml:"exclude_metric_labels,omitempty" env:"METRICS_OTLP_EXCLUDE_METRIC_LABELS"` + Exporters []MetricsOTLPExporter `yaml:"exporters"` } type Telemetry struct { diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index f68d53ab46..43cb1cb65c 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -1066,10 +1066,10 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, - "messaging_event_metrics": { + "stream": { "type": "boolean", "default": false, - "description": "Enable the collection of messaging event metrics. The default value is false." + "description": "Enable the collection of stream metrics. The default value is false." }, "circuit_breaker": { "type": "boolean", @@ -1176,10 +1176,10 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, - "messaging_event_metrics": { + "stream": { "type": "boolean", "default": false, - "description": "Enable the collection of messaging event metrics. The default value is false." + "description": "Enable the collection of stream metrics. The default value is false." }, "circuit_breaker": { "type": "boolean", diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 590d8206df..3d941ac45e 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -40,7 +40,7 @@ "Subscriptions": false }, "CircuitBreaker": false, - "MessagingEventMetrics": false, + "StreamsMetrics": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": null @@ -51,7 +51,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": false, "ConnectionStats": false, - "MessagingEventMetrics": false, + "StreamsMetrics": false, "EngineStats": { "Subscriptions": false }, diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 7a1bbeba61..cf32cd50e5 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -61,7 +61,7 @@ "Subscriptions": true }, "CircuitBreaker": false, - "MessagingEventMetrics": false, + "Streams": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": [ @@ -81,7 +81,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": true, "ConnectionStats": true, - "MessagingEventMetrics": false, + "StreamsMetrics": false, "EngineStats": { "Subscriptions": true }, diff --git a/router/pkg/metric/config.go b/router/pkg/metric/config.go index 8c34bc0b70..8b76b609da 100644 --- a/router/pkg/metric/config.go +++ b/router/pkg/metric/config.go @@ -34,8 +34,8 @@ type PrometheusConfig struct { // Whether or not to exclude scope info ExcludeScopeInfo bool // Prometheus schema field usage configuration - PromSchemaFieldUsage PrometheusSchemaFieldUsage - MessagingEventMetrics bool + PromSchemaFieldUsage PrometheusSchemaFieldUsage + Streams bool } type PrometheusSchemaFieldUsage struct { @@ -79,8 +79,8 @@ type OpenTelemetry struct { // Metric labels to exclude from the OTLP exporter. ExcludeMetricLabels []*regexp.Regexp // TestReader is used for testing purposes. If set, the reader will be used instead of the configured exporters. - TestReader sdkmetric.Reader - MessagingEventMetrics bool + TestReader sdkmetric.Reader + Stream bool } func GetDefaultExporter(cfg *Config) *OpenTelemetryExporter { diff --git a/router/pkg/metric/messaging_event_metric_store.go b/router/pkg/metric/messaging_event_metric_store.go deleted file mode 100644 index 807095f66b..0000000000 --- a/router/pkg/metric/messaging_event_metric_store.go +++ /dev/null @@ -1,153 +0,0 @@ -package metric - -import ( - "context" - "errors" - "fmt" - - "go.opentelemetry.io/otel/attribute" - otelmetric "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/sdk/metric" - "go.uber.org/zap" - - otel "github.com/wundergraph/cosmo/router/pkg/otel" -) - -type ProviderType string - -const ( - ProviderTypeKafka ProviderType = "kafka" - ProviderTypeNats ProviderType = "nats" - ProviderTypeRedis ProviderType = "redis" -) - -// MessagingEvent carries the values for messaging metrics attributes. -type MessagingEvent struct { - ProviderId string // The id of the provider defined in the configuration - OperationName string // The operation name that is specific to the messaging system - MessagingSystem ProviderType // The messaging system type that are supported - ErrorType string // Optional error type, e.g., "publish_error" or "receive_error". If empty, the attribute is not set - DestinationName string // The name of the destination queue / topic / channel -} - -// MessagingEventMetricProvider is the interface that wraps the basic Event metric methods. -type MessagingEventMetricProvider interface { - Produce(ctx context.Context, opts ...otelmetric.AddOption) - Consume(ctx context.Context, opts ...otelmetric.AddOption) - - Flush(ctx context.Context) error -} - -type MessagingEventMetricStore interface { - Produce(ctx context.Context, event MessagingEvent) - Consume(ctx context.Context, event MessagingEvent) - - Flush(ctx context.Context) error - Shutdown(ctx context.Context) error -} - -// MessagingEventMetrics is the store for Event (Kafka/Redis/NATS) metrics. -type MessagingEventMetrics struct { - baseAttributes []attribute.KeyValue - logger *zap.Logger - providers []MessagingEventMetricProvider -} - -func NewMessagingEventMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue, otelProvider, promProvider *metric.MeterProvider, metricsConfig *Config) (*MessagingEventMetrics, error) { - providers := make([]MessagingEventMetricProvider, 0) - - if metricsConfig.OpenTelemetry.MessagingEventMetrics { - otlpMetrics, err := newOtlpMessagingEventMetrics(logger, otelProvider) - if err != nil { - return nil, fmt.Errorf("failed to create otlp event metrics: %w", err) - } - providers = append(providers, otlpMetrics) - } - - if metricsConfig.Prometheus.MessagingEventMetrics { - promMetrics, err := newPromMessagingEventMetrics(logger, promProvider) - if err != nil { - return nil, fmt.Errorf("failed to create prometheus event metrics: %w", err) - } - providers = append(providers, promMetrics) - } - - store := &MessagingEventMetrics{ - baseAttributes: baseAttributes, - logger: logger, - providers: providers, - } - return store, nil -} - -func (e *MessagingEventMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOption { - copied := append([]attribute.KeyValue{}, e.baseAttributes...) - return otelmetric.WithAttributes(append(copied, attrs...)...) -} - -func (e *MessagingEventMetrics) Produce(ctx context.Context, event MessagingEvent) { - attrs := []attribute.KeyValue{ - otel.WgMessagingOperationName.String(event.OperationName), - otel.WgMessagingSystem.String(string(event.MessagingSystem)), - } - if event.ErrorType != "" { - attrs = append(attrs, otel.WgErrorType.String(event.ErrorType)) - } - if event.ProviderId != "" { - attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) - } - if event.DestinationName != "" { - attrs = append(attrs, otel.WgMessagingDestinationName.String(event.DestinationName)) - } - opt := e.withAttrs(attrs...) - - for _, provider := range e.providers { - provider.Produce(ctx, opt) - } -} - -func (e *MessagingEventMetrics) Consume(ctx context.Context, event MessagingEvent) { - attrs := []attribute.KeyValue{ - otel.WgMessagingOperationName.String(event.OperationName), - otel.WgMessagingSystem.String(string(event.MessagingSystem)), - } - if event.ErrorType != "" { - attrs = append(attrs, otel.WgErrorType.String(event.ErrorType)) - } - if event.ProviderId != "" { - attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) - } - if event.DestinationName != "" { - attrs = append(attrs, otel.WgMessagingDestinationName.String(event.DestinationName)) - } - - opt := e.withAttrs(attrs...) - - for _, provider := range e.providers { - provider.Consume(ctx, opt) - } -} - -// Flush flushes the metrics to the backend synchronously. -func (e *MessagingEventMetrics) Flush(ctx context.Context) error { - var err error - - for _, provider := range e.providers { - if errOtlp := provider.Flush(ctx); errOtlp != nil { - err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errOtlp)) - } - } - - return err -} - -// Shutdown flushes the metrics and stops observers if any. -func (e *MessagingEventMetrics) Shutdown(ctx context.Context) error { - var err error - - if errFlush := e.Flush(ctx); errFlush != nil { - err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errFlush)) - } - - return err -} diff --git a/router/pkg/metric/noop_event_metrics.go b/router/pkg/metric/noop_event_metrics.go deleted file mode 100644 index 5a7d12ee13..0000000000 --- a/router/pkg/metric/noop_event_metrics.go +++ /dev/null @@ -1,15 +0,0 @@ -package metric - -import ( - "context" -) - -type NoopEventMetricStore struct{} - -func (n *NoopEventMetricStore) Produce(ctx context.Context, event MessagingEvent) {} -func (n *NoopEventMetricStore) Consume(ctx context.Context, event MessagingEvent) {} - -func (n *NoopEventMetricStore) Flush(ctx context.Context) error { return nil } -func (n *NoopEventMetricStore) Shutdown(ctx context.Context) error { return nil } - -func NewNoopEventMetricStore() *NoopEventMetricStore { return &NoopEventMetricStore{} } diff --git a/router/pkg/metric/noop_stream_metrics.go b/router/pkg/metric/noop_stream_metrics.go new file mode 100644 index 0000000000..c312cc2472 --- /dev/null +++ b/router/pkg/metric/noop_stream_metrics.go @@ -0,0 +1,15 @@ +package metric + +import ( + "context" +) + +type NoopStreamMetricStore struct{} + +func (n *NoopStreamMetricStore) Produce(ctx context.Context, event StreamsEvent) {} +func (n *NoopStreamMetricStore) Consume(ctx context.Context, event StreamsEvent) {} + +func (n *NoopStreamMetricStore) Flush(ctx context.Context) error { return nil } +func (n *NoopStreamMetricStore) Shutdown(ctx context.Context) error { return nil } + +func NewNoopStreamMetricStore() *NoopStreamMetricStore { return &NoopStreamMetricStore{} } diff --git a/router/pkg/metric/oltp_messaging_event_metric_store.go b/router/pkg/metric/oltp_messaging_event_metric_store.go deleted file mode 100644 index 222ee8f3aa..0000000000 --- a/router/pkg/metric/oltp_messaging_event_metric_store.go +++ /dev/null @@ -1,52 +0,0 @@ -package metric - -import ( - "context" - - otelmetric "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/sdk/metric" - "go.uber.org/zap" -) - -const ( - cosmoRouterEventMeterName = "cosmo.router.messaging.events" - cosmoRouterEventMeterVersion = "0.0.1" -) - -type otlpMessagingEventMetrics struct { - instruments *eventInstruments - meterProvider *metric.MeterProvider - logger *zap.Logger - meter otelmetric.Meter -} - -func newOtlpMessagingEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*otlpMessagingEventMetrics, error) { - meter := meterProvider.Meter( - cosmoRouterEventMeterName, - otelmetric.WithInstrumentationVersion(cosmoRouterEventMeterVersion), - ) - - instruments, err := newMessagingEventInstruments(meter) - if err != nil { - return nil, err - } - - return &otlpMessagingEventMetrics{ - instruments: instruments, - meterProvider: meterProvider, - logger: logger, - meter: meter, - }, nil -} - -func (o *otlpMessagingEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.producedMessages.Add(ctx, 1, opts...) -} - -func (o *otlpMessagingEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { - o.instruments.consumedMessages.Add(ctx, 1, opts...) -} - -func (o *otlpMessagingEventMetrics) Flush(ctx context.Context) error { - return o.meterProvider.ForceFlush(ctx) -} diff --git a/router/pkg/metric/oltp_stream_metric_store.go b/router/pkg/metric/oltp_stream_metric_store.go new file mode 100644 index 0000000000..3d7a8573e9 --- /dev/null +++ b/router/pkg/metric/oltp_stream_metric_store.go @@ -0,0 +1,52 @@ +package metric + +import ( + "context" + + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "go.uber.org/zap" +) + +const ( + cosmoRouterStreamEventMeterName = "cosmo.router.streams" + cosmoRouterStreamEventMeterVersion = "0.0.1" +) + +type otlpStreamEventMetrics struct { + instruments *eventInstruments + meterProvider *metric.MeterProvider + logger *zap.Logger + meter otelmetric.Meter +} + +func newOtlpStreamEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*otlpStreamEventMetrics, error) { + meter := meterProvider.Meter( + cosmoRouterStreamEventMeterName, + otelmetric.WithInstrumentationVersion(cosmoRouterStreamEventMeterVersion), + ) + + instruments, err := newStreamEventInstruments(meter) + if err != nil { + return nil, err + } + + return &otlpStreamEventMetrics{ + instruments: instruments, + meterProvider: meterProvider, + logger: logger, + meter: meter, + }, nil +} + +func (o *otlpStreamEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.producedMessages.Add(ctx, 1, opts...) +} + +func (o *otlpStreamEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { + o.instruments.consumedMessages.Add(ctx, 1, opts...) +} + +func (o *otlpStreamEventMetrics) Flush(ctx context.Context) error { + return o.meterProvider.ForceFlush(ctx) +} diff --git a/router/pkg/metric/prom_messaging_event_metric_store.go b/router/pkg/metric/prom_stream_metric_store.go similarity index 56% rename from router/pkg/metric/prom_messaging_event_metric_store.go rename to router/pkg/metric/prom_stream_metric_store.go index 355f7dc351..30309f2444 100644 --- a/router/pkg/metric/prom_messaging_event_metric_store.go +++ b/router/pkg/metric/prom_stream_metric_store.go @@ -9,29 +9,29 @@ import ( ) const ( - cosmoRouterEventPromMeterName = "cosmo.router.messaging.events.prometheus" + cosmoRouterEventPromMeterName = "cosmo.router.streams.prometheus" cosmoRouterEventPromMeterVersion = "0.0.1" ) -type promMessagingEventMetrics struct { +type promStreamEventMetrics struct { instruments *eventInstruments meterProvider *metric.MeterProvider logger *zap.Logger meter otelmetric.Meter } -func newPromMessagingEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*promMessagingEventMetrics, error) { +func newPromStreamEventMetrics(logger *zap.Logger, meterProvider *metric.MeterProvider) (*promStreamEventMetrics, error) { meter := meterProvider.Meter( cosmoRouterEventPromMeterName, otelmetric.WithInstrumentationVersion(cosmoRouterEventPromMeterVersion), ) - instruments, err := newMessagingEventInstruments(meter) + instruments, err := newStreamEventInstruments(meter) if err != nil { return nil, err } - return &promMessagingEventMetrics{ + return &promStreamEventMetrics{ instruments: instruments, meterProvider: meterProvider, logger: logger, @@ -39,14 +39,14 @@ func newPromMessagingEventMetrics(logger *zap.Logger, meterProvider *metric.Mete }, nil } -func (p *promMessagingEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { +func (p *promStreamEventMetrics) Produce(ctx context.Context, opts ...otelmetric.AddOption) { p.instruments.producedMessages.Add(ctx, 1, opts...) } -func (p *promMessagingEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { +func (p *promStreamEventMetrics) Consume(ctx context.Context, opts ...otelmetric.AddOption) { p.instruments.consumedMessages.Add(ctx, 1, opts...) } -func (p *promMessagingEventMetrics) Flush(ctx context.Context) error { +func (p *promStreamEventMetrics) Flush(ctx context.Context) error { return p.meterProvider.ForceFlush(ctx) } diff --git a/router/pkg/metric/messaging_event_measurements.go b/router/pkg/metric/stream_measurements.go similarity index 71% rename from router/pkg/metric/messaging_event_measurements.go rename to router/pkg/metric/stream_measurements.go index ead75303ea..db76ae93c2 100644 --- a/router/pkg/metric/messaging_event_measurements.go +++ b/router/pkg/metric/stream_measurements.go @@ -7,16 +7,16 @@ import ( ) const ( - messagingSentMessages = "messaging.event.sent.messages" - messagingConsumedMessages = "messaging.event.received.messages" + messagingSentMessages = "streams.sent.messages" + messagingConsumedMessages = "streams.received.messages" ) var ( messagingSentMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of messaging event sent messages"), + otelmetric.WithDescription("Number of stream sent messages"), } messagingConsumedMessagesOptions = []otelmetric.Int64CounterOption{ - otelmetric.WithDescription("Number of messaging event consumed messages"), + otelmetric.WithDescription("Number of stream consumed messages"), } ) @@ -25,7 +25,7 @@ type eventInstruments struct { consumedMessages otelmetric.Int64Counter } -func newMessagingEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { +func newStreamEventInstruments(meter otelmetric.Meter) (*eventInstruments, error) { producedCounter, err := meter.Int64Counter( messagingSentMessages, messagingSentMessagesOptions..., diff --git a/router/pkg/metric/stream_metric_store.go b/router/pkg/metric/stream_metric_store.go new file mode 100644 index 0000000000..f157816455 --- /dev/null +++ b/router/pkg/metric/stream_metric_store.go @@ -0,0 +1,153 @@ +package metric + +import ( + "context" + "errors" + "fmt" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "go.uber.org/zap" + + otel "github.com/wundergraph/cosmo/router/pkg/otel" +) + +type ProviderType string + +const ( + ProviderTypeKafka ProviderType = "kafka" + ProviderTypeNats ProviderType = "nats" + ProviderTypeRedis ProviderType = "redis" +) + +// StreamsEvent carries the values for stream metrics attributes. +type StreamsEvent struct { + ProviderId string // The id of the provider defined in the configuration + StreamOperationName string // The stream operation name that is specific to the messaging system + ProviderType ProviderType // The messaging system type that are supported + ErrorType string // Optional error type, e.g., "publish_error" or "receive_error". If empty, the attribute is not set + DestinationName string // The name of the destination queue / topic / channel +} + +// StreamMetricProvider is the interface that wraps the basic Event metric methods. +type StreamMetricProvider interface { + Produce(ctx context.Context, opts ...otelmetric.AddOption) + Consume(ctx context.Context, opts ...otelmetric.AddOption) + + Flush(ctx context.Context) error +} + +type StreamMetricStore interface { + Produce(ctx context.Context, event StreamsEvent) + Consume(ctx context.Context, event StreamsEvent) + + Flush(ctx context.Context) error + Shutdown(ctx context.Context) error +} + +// StreamMetrics is the store for Event (Kafka/Redis/NATS) metrics. +type StreamMetrics struct { + baseAttributes []attribute.KeyValue + logger *zap.Logger + providers []StreamMetricProvider +} + +func NewStreamMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue, otelProvider, promProvider *metric.MeterProvider, metricsConfig *Config) (*StreamMetrics, error) { + providers := make([]StreamMetricProvider, 0) + + if metricsConfig.OpenTelemetry.Stream { + otlpMetrics, err := newOtlpStreamEventMetrics(logger, otelProvider) + if err != nil { + return nil, fmt.Errorf("failed to create otlp stream event metrics: %w", err) + } + providers = append(providers, otlpMetrics) + } + + if metricsConfig.Prometheus.Streams { + promMetrics, err := newPromStreamEventMetrics(logger, promProvider) + if err != nil { + return nil, fmt.Errorf("failed to create prometheus stream event metrics: %w", err) + } + providers = append(providers, promMetrics) + } + + store := &StreamMetrics{ + baseAttributes: baseAttributes, + logger: logger, + providers: providers, + } + return store, nil +} + +func (e *StreamMetrics) withAttrs(attrs ...attribute.KeyValue) otelmetric.AddOption { + copied := append([]attribute.KeyValue{}, e.baseAttributes...) + return otelmetric.WithAttributes(append(copied, attrs...)...) +} + +func (e *StreamMetrics) Produce(ctx context.Context, event StreamsEvent) { + attrs := []attribute.KeyValue{ + otel.WgStreamOperationName.String(event.StreamOperationName), + otel.WgProviderType.String(string(event.ProviderType)), + } + if event.ErrorType != "" { + attrs = append(attrs, otel.WgErrorType.String(event.ErrorType)) + } + if event.ProviderId != "" { + attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) + } + if event.DestinationName != "" { + attrs = append(attrs, otel.WgDestinationName.String(event.DestinationName)) + } + opt := e.withAttrs(attrs...) + + for _, provider := range e.providers { + provider.Produce(ctx, opt) + } +} + +func (e *StreamMetrics) Consume(ctx context.Context, event StreamsEvent) { + attrs := []attribute.KeyValue{ + otel.WgStreamOperationName.String(event.StreamOperationName), + otel.WgProviderType.String(string(event.ProviderType)), + } + if event.ErrorType != "" { + attrs = append(attrs, otel.WgErrorType.String(event.ErrorType)) + } + if event.ProviderId != "" { + attrs = append(attrs, otel.WgProviderId.String(event.ProviderId)) + } + if event.DestinationName != "" { + attrs = append(attrs, otel.WgDestinationName.String(event.DestinationName)) + } + + opt := e.withAttrs(attrs...) + + for _, provider := range e.providers { + provider.Consume(ctx, opt) + } +} + +// Flush flushes the metrics to the backend synchronously. +func (e *StreamMetrics) Flush(ctx context.Context) error { + var err error + + for _, provider := range e.providers { + if errOtlp := provider.Flush(ctx); errOtlp != nil { + err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errOtlp)) + } + } + + return err +} + +// Shutdown flushes the metrics and stops observers if any. +func (e *StreamMetrics) Shutdown(ctx context.Context) error { + var err error + + if errFlush := e.Flush(ctx); errFlush != nil { + err = errors.Join(err, fmt.Errorf("failed to flush metrics: %w", errFlush)) + } + + return err +} diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go index 156792f5c3..08b3b96d95 100644 --- a/router/pkg/otel/attributes.go +++ b/router/pkg/otel/attributes.go @@ -62,12 +62,11 @@ const ( // Messaging metrics attributes const ( - WgMessagingOperationName = attribute.Key("wg.messaging.operation.name") - WgMessagingSystem = attribute.Key("wg.messaging.system") - WgMessagingError = attribute.Key("wg.messaging.error") - WgMessagingDestinationName = attribute.Key("wg.messaging.destination.name") - WgProviderId = attribute.Key("wg.provider.id") - WgErrorType = attribute.Key("wg.error.type") + WgStreamOperationName = attribute.Key("wg.stream.operation.name") + WgProviderType = attribute.Key("wg.provider.type") + WgDestinationName = attribute.Key("wg.destination.name") + WgProviderId = attribute.Key("wg.provider.id") + WgErrorType = attribute.Key("wg.error.type") ) const ( diff --git a/router/pkg/pubsub/datasource/provider.go b/router/pkg/pubsub/datasource/provider.go index 8f92de92a0..d9138630ca 100644 --- a/router/pkg/pubsub/datasource/provider.go +++ b/router/pkg/pubsub/datasource/provider.go @@ -2,6 +2,7 @@ package datasource import ( "context" + "github.com/wundergraph/cosmo/router/pkg/metric" ) @@ -34,5 +35,5 @@ type ProviderBuilder[P, E any] interface { } type ProviderOpts struct { - MessagingEventMetricStore metric.MessagingEventMetricStore + StreamMetricStore metric.StreamMetricStore } diff --git a/router/pkg/pubsub/kafka/adapter.go b/router/pkg/pubsub/kafka/adapter.go index 89a66262d2..e11993b668 100644 --- a/router/pkg/pubsub/kafka/adapter.go +++ b/router/pkg/pubsub/kafka/adapter.go @@ -40,13 +40,13 @@ type Adapter interface { // It uses a single write client to produce messages and a client per topic to consume messages. // Each client polls the Kafka topic for new records and updates the subscriptions with the new data. type ProviderAdapter struct { - ctx context.Context - opts []kgo.Opt - logger *zap.Logger - writeClient *kgo.Client - closeWg sync.WaitGroup - cancel context.CancelFunc - messagingEventMetricStore metric.MessagingEventMetricStore + ctx context.Context + opts []kgo.Opt + logger *zap.Logger + writeClient *kgo.Client + closeWg sync.WaitGroup + cancel context.CancelFunc + streamMetricStore metric.StreamMetricStore } type PollerOpts struct { @@ -100,11 +100,11 @@ func (p *ProviderAdapter) topicPoller(ctx context.Context, client *kgo.Client, u r := iter.Next() p.logger.Debug("subscription update", zap.String("topic", r.Topic), zap.ByteString("data", r.Value)) - p.messagingEventMetricStore.Consume(p.ctx, metric.MessagingEvent{ - ProviderId: pollerOpts.providerId, - OperationName: kafkaReceive, - MessagingSystem: metric.ProviderTypeKafka, - DestinationName: r.Topic, + p.streamMetricStore.Consume(p.ctx, metric.StreamsEvent{ + ProviderId: pollerOpts.providerId, + StreamOperationName: kafkaReceive, + ProviderType: metric.ProviderTypeKafka, + DestinationName: r.Topic, }) updater.Update(r.Value) } @@ -197,21 +197,21 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu if pErr != nil { log.Error("publish error", zap.Error(pErr)) // failure emission: include error.type generic - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: kafkaProduce, - MessagingSystem: metric.ProviderTypeKafka, - ErrorType: "publish_error", - DestinationName: event.Topic, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: kafkaProduce, + ProviderType: metric.ProviderTypeKafka, + ErrorType: "publish_error", + DestinationName: event.Topic, }) return datasource.NewError(fmt.Sprintf("error publishing to Kafka topic %s", event.Topic), pErr) } - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: kafkaProduce, - MessagingSystem: metric.ProviderTypeKafka, - DestinationName: event.Topic, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: kafkaProduce, + ProviderType: metric.ProviderTypeKafka, + DestinationName: event.Topic, }) return nil } @@ -260,19 +260,19 @@ func NewProviderAdapter(ctx context.Context, logger *zap.Logger, opts []kgo.Opt, logger = zap.NewNop() } - var store metric.MessagingEventMetricStore - if providerOpts.MessagingEventMetricStore != nil { - store = providerOpts.MessagingEventMetricStore + var store metric.StreamMetricStore + if providerOpts.StreamMetricStore != nil { + store = providerOpts.StreamMetricStore } else { - store = metric.NewNoopEventMetricStore() + store = metric.NewNoopStreamMetricStore() } return &ProviderAdapter{ - ctx: ctx, - logger: logger.With(zap.String("pubsub", "kafka")), - opts: opts, - closeWg: sync.WaitGroup{}, - cancel: cancel, - messagingEventMetricStore: store, + ctx: ctx, + logger: logger.With(zap.String("pubsub", "kafka")), + opts: opts, + closeWg: sync.WaitGroup{}, + cancel: cancel, + streamMetricStore: store, }, nil } diff --git a/router/pkg/pubsub/nats/adapter.go b/router/pkg/pubsub/nats/adapter.go index 059a4e9ee6..d10f8cf93d 100644 --- a/router/pkg/pubsub/nats/adapter.go +++ b/router/pkg/pubsub/nats/adapter.go @@ -40,17 +40,17 @@ type Adapter interface { // ProviderAdapter implements the AdapterInterface for NATS pub/sub type ProviderAdapter struct { - ctx context.Context - client *nats.Conn - js jetstream.JetStream - logger *zap.Logger - closeWg sync.WaitGroup - hostName string - routerListenAddr string - url string - opts []nats.Option - flushTimeout time.Duration - messagingEventMetricStore metric.MessagingEventMetricStore + ctx context.Context + client *nats.Conn + js jetstream.JetStream + logger *zap.Logger + closeWg sync.WaitGroup + hostName string + routerListenAddr string + url string + opts []nats.Option + flushTimeout time.Duration + streamMetricStore metric.StreamMetricStore } // getInstanceIdentifier returns an identifier for the current instance. @@ -141,11 +141,11 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent for msg := range msgBatch.Messages() { log.Debug("subscription update", zap.String("message_subject", msg.Subject()), zap.ByteString("data", msg.Data())) - p.messagingEventMetricStore.Consume(p.ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: natsReceive, - MessagingSystem: metric.ProviderTypeNats, - DestinationName: msg.Subject(), + p.streamMetricStore.Consume(p.ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: natsReceive, + ProviderType: metric.ProviderTypeNats, + DestinationName: msg.Subject(), }) updater.Update(msg.Data()) @@ -184,11 +184,11 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent select { case msg := <-msgChan: log.Debug("subscription update", zap.String("message_subject", msg.Subject), zap.ByteString("data", msg.Data)) - p.messagingEventMetricStore.Consume(p.ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: natsReceive, - MessagingSystem: metric.ProviderTypeNats, - DestinationName: msg.Subject, + p.streamMetricStore.Consume(p.ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: natsReceive, + ProviderType: metric.ProviderTypeNats, + DestinationName: msg.Subject, }) updater.Update(msg.Data) case <-p.ctx.Done(): @@ -234,20 +234,20 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishAndRequestEv err := p.client.Publish(event.Subject, event.Data) if err != nil { log.Error("publish error", zap.Error(err)) - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: natsPublish, - MessagingSystem: metric.ProviderTypeNats, - ErrorType: "publish_error", - DestinationName: event.Subject, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: natsPublish, + ProviderType: metric.ProviderTypeNats, + ErrorType: "publish_error", + DestinationName: event.Subject, }) return datasource.NewError(fmt.Sprintf("error publishing to NATS subject %s", event.Subject), err) } else { - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: natsPublish, - MessagingSystem: metric.ProviderTypeNats, - DestinationName: event.Subject, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: natsPublish, + ProviderType: metric.ProviderTypeNats, + DestinationName: event.Subject, }) } @@ -270,21 +270,21 @@ func (p *ProviderAdapter) Request(ctx context.Context, event PublishAndRequestEv msg, err := p.client.RequestWithContext(ctx, event.Subject, event.Data) if err != nil { log.Error("request error", zap.Error(err)) - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: natsRequest, - MessagingSystem: metric.ProviderTypeNats, - ErrorType: "request_error", - DestinationName: event.Subject, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: natsRequest, + ProviderType: metric.ProviderTypeNats, + ErrorType: "request_error", + DestinationName: event.Subject, }) return datasource.NewError(fmt.Sprintf("error requesting from NATS subject %s", event.Subject), err) } - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: natsRequest, - MessagingSystem: metric.ProviderTypeNats, - DestinationName: event.Subject, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: natsRequest, + ProviderType: metric.ProviderTypeNats, + DestinationName: event.Subject, }) // We don't collect metrics on err here as it's an error related to the writer @@ -358,22 +358,22 @@ func NewAdapter(ctx context.Context, logger *zap.Logger, url string, opts []nats logger = zap.NewNop() } - var store metric.MessagingEventMetricStore - if providerOpts.MessagingEventMetricStore != nil { - store = providerOpts.MessagingEventMetricStore + var store metric.StreamMetricStore + if providerOpts.StreamMetricStore != nil { + store = providerOpts.StreamMetricStore } else { - store = metric.NewNoopEventMetricStore() + store = metric.NewNoopStreamMetricStore() } return &ProviderAdapter{ - ctx: ctx, - logger: logger.With(zap.String("pubsub", "nats")), - closeWg: sync.WaitGroup{}, - hostName: hostName, - routerListenAddr: routerListenAddr, - url: url, - opts: opts, - flushTimeout: 10 * time.Second, - messagingEventMetricStore: store, + ctx: ctx, + logger: logger.With(zap.String("pubsub", "nats")), + closeWg: sync.WaitGroup{}, + hostName: hostName, + routerListenAddr: routerListenAddr, + url: url, + opts: opts, + flushTimeout: 10 * time.Second, + streamMetricStore: store, }, nil } diff --git a/router/pkg/pubsub/pubsub.go b/router/pkg/pubsub/pubsub.go index a2e3b16a99..b92aaad6f7 100644 --- a/router/pkg/pubsub/pubsub.go +++ b/router/pkg/pubsub/pubsub.go @@ -3,10 +3,11 @@ package pubsub import ( "context" "fmt" - "github.com/wundergraph/cosmo/router/pkg/metric" "slices" "strconv" + "github.com/wundergraph/cosmo/router/pkg/metric" + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" "github.com/wundergraph/cosmo/router/pkg/config" pubsub_datasource "github.com/wundergraph/cosmo/router/pkg/pubsub/datasource" @@ -52,9 +53,9 @@ func (e *ProviderNotDefinedError) Error() string { // BuildProvidersAndDataSources is a generic function that builds providers and data sources for the given // EventsConfiguration and DataSourceConfigurationWithMetadata -func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store metric.MessagingEventMetricStore, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { +func BuildProvidersAndDataSources(ctx context.Context, config config.EventsConfiguration, store metric.StreamMetricStore, logger *zap.Logger, dsConfs []DataSourceConfigurationWithMetadata, hostName string, routerListenAddr string) ([]pubsub_datasource.Provider, []plan.DataSource, error) { if store == nil { - store = metric.NewNoopEventMetricStore() + store = metric.NewNoopStreamMetricStore() } var pubSubProviders []pubsub_datasource.Provider @@ -116,7 +117,7 @@ func build[P GetID, E GetEngineEventConfiguration]( builder pubsub_datasource.ProviderBuilder[P, E], providersData []P, dsConfs []dsConfAndEvents[E], - store metric.MessagingEventMetricStore, + store metric.StreamMetricStore, ) ([]pubsub_datasource.Provider, []plan.DataSource, error) { var pubSubProviders []pubsub_datasource.Provider var outs []plan.DataSource @@ -138,7 +139,7 @@ func build[P GetID, E GetEngineEventConfiguration]( continue } provider, err := builder.BuildProvider(providerData, pubsub_datasource.ProviderOpts{ - MessagingEventMetricStore: store, + StreamMetricStore: store, }) if err != nil { return nil, nil, err diff --git a/router/pkg/pubsub/pubsub_test.go b/router/pkg/pubsub/pubsub_test.go index b232084e19..2173e46c3d 100644 --- a/router/pkg/pubsub/pubsub_test.go +++ b/router/pkg/pubsub/pubsub_test.go @@ -67,7 +67,7 @@ func TestBuild_OK(t *testing.T) { // ctx, kafkaBuilder, config.Providers.Kafka, kafkaDsConfsWithEvents // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopStreamMetricStore()) // Assertions assert.NoError(t, err) @@ -123,7 +123,7 @@ func TestBuild_ProviderError(t *testing.T) { mockBuilder.On("BuildProvider", natsEventSources[0], mock.Anything).Return(nil, errors.New("provider error")) // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopStreamMetricStore()) // Assertions assert.Error(t, err) @@ -178,7 +178,7 @@ func TestBuild_ShouldGetAnErrorIfProviderIsNotDefined(t *testing.T) { mockBuilder.On("TypeID").Return("nats") // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopStreamMetricStore()) // Assertions assert.Error(t, err) @@ -242,7 +242,7 @@ func TestBuild_ShouldNotInitializeProviderIfNotUsed(t *testing.T) { Return(mockPubSubUsedProvider, nil) // Execute the function - providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopEventMetricStore()) + providers, dataSources, err := build(ctx, mockBuilder, natsEventSources, dsConfs, rmetric.NewNoopStreamMetricStore()) // Assertions assert.NoError(t, err) diff --git a/router/pkg/pubsub/redis/adapter.go b/router/pkg/pubsub/redis/adapter.go index e2fffd0661..13f0cbb0e2 100644 --- a/router/pkg/pubsub/redis/adapter.go +++ b/router/pkg/pubsub/redis/adapter.go @@ -36,32 +36,32 @@ func NewProviderAdapter(ctx context.Context, logger *zap.Logger, urls []string, logger = zap.NewNop() } - var store metric.MessagingEventMetricStore - if opts.MessagingEventMetricStore != nil { - store = opts.MessagingEventMetricStore + var store metric.StreamMetricStore + if opts.StreamMetricStore != nil { + store = opts.StreamMetricStore } else { - store = metric.NewNoopEventMetricStore() + store = metric.NewNoopStreamMetricStore() } return &ProviderAdapter{ - ctx: ctx, - cancel: cancel, - logger: logger, - urls: urls, - clusterEnabled: clusterEnabled, - messagingEventMetricStore: store, + ctx: ctx, + cancel: cancel, + logger: logger, + urls: urls, + clusterEnabled: clusterEnabled, + streamMetricStore: store, } } type ProviderAdapter struct { - ctx context.Context - cancel context.CancelFunc - conn rd.RDCloser - logger *zap.Logger - closeWg sync.WaitGroup - urls []string - clusterEnabled bool - messagingEventMetricStore metric.MessagingEventMetricStore + ctx context.Context + cancel context.CancelFunc + conn rd.RDCloser + logger *zap.Logger + closeWg sync.WaitGroup + urls []string + clusterEnabled bool + streamMetricStore metric.StreamMetricStore } func (p *ProviderAdapter) Startup(ctx context.Context) error { @@ -127,11 +127,11 @@ func (p *ProviderAdapter) Subscribe(ctx context.Context, event SubscriptionEvent return } log.Debug("subscription update", zap.String("message_channel", msg.Channel), zap.String("data", msg.Payload)) - p.messagingEventMetricStore.Consume(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: redisReceive, - MessagingSystem: metric.ProviderTypeRedis, - DestinationName: msg.Channel, + p.streamMetricStore.Consume(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: redisReceive, + ProviderType: metric.ProviderTypeRedis, + DestinationName: msg.Channel, }) updater.Update([]byte(msg.Payload)) case <-p.ctx.Done(): @@ -171,21 +171,21 @@ func (p *ProviderAdapter) Publish(ctx context.Context, event PublishEventConfigu intCmd := p.conn.Publish(ctx, event.Channel, data) if intCmd.Err() != nil { log.Error("publish error", zap.Error(intCmd.Err())) - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: redisPublish, - MessagingSystem: metric.ProviderTypeRedis, - ErrorType: "publish_error", - DestinationName: event.Channel, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: redisPublish, + ProviderType: metric.ProviderTypeRedis, + ErrorType: "publish_error", + DestinationName: event.Channel, }) return datasource.NewError(fmt.Sprintf("error publishing to Redis PubSub channel %s", event.Channel), intCmd.Err()) } - p.messagingEventMetricStore.Produce(ctx, metric.MessagingEvent{ - ProviderId: event.ProviderID, - OperationName: redisPublish, - MessagingSystem: metric.ProviderTypeRedis, - DestinationName: event.Channel, + p.streamMetricStore.Produce(ctx, metric.StreamsEvent{ + ProviderId: event.ProviderID, + StreamOperationName: redisPublish, + ProviderType: metric.ProviderTypeRedis, + DestinationName: event.Channel, }) return nil } From 16675957a0d7e90d985afc3265d17580e72751a3 Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 18:34:38 +0530 Subject: [PATCH 37/40] fix: renaming --- router/core/graph_server.go | 2 +- router/core/router.go | 2 +- router/pkg/config/config.schema.json | 4 ++-- router/pkg/config/testdata/config_defaults.json | 4 ++-- router/pkg/config/testdata/config_full.json | 2 +- router/pkg/metric/config.go | 2 +- router/pkg/metric/stream_metric_store.go | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/router/core/graph_server.go b/router/core/graph_server.go index 020ed4fd7a..f79b9f0f92 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -880,7 +880,7 @@ func (s *graphServer) buildGraphMux( } } - if s.metricConfig.OpenTelemetry.Stream || s.metricConfig.Prometheus.Streams { + if s.metricConfig.OpenTelemetry.Streams || s.metricConfig.Prometheus.Streams { store, err := rmetric.NewStreamMetricStore( s.logger, baseMetricAttributes, diff --git a/router/core/router.go b/router/core/router.go index 4bb749fd64..99ce3944c9 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -2240,7 +2240,7 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config { }, Exporters: openTelemetryExporters, CircuitBreaker: cfg.Metrics.OTLP.CircuitBreaker, - Stream: cfg.Metrics.OTLP.Streams, + Streams: cfg.Metrics.OTLP.Streams, ExcludeMetrics: cfg.Metrics.OTLP.ExcludeMetrics, ExcludeMetricLabels: cfg.Metrics.OTLP.ExcludeMetricLabels, }, diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 43cb1cb65c..b2fd9e4f26 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -1066,7 +1066,7 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, - "stream": { + "streams": { "type": "boolean", "default": false, "description": "Enable the collection of stream metrics. The default value is false." @@ -1176,7 +1176,7 @@ "default": false, "description": "Enable the collection of connection stats. The default value is false." }, - "stream": { + "streams": { "type": "boolean", "default": false, "description": "Enable the collection of stream metrics. The default value is false." diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 3d941ac45e..57ca8d5856 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -40,7 +40,7 @@ "Subscriptions": false }, "CircuitBreaker": false, - "StreamsMetrics": false, + "Streams": false, "ExcludeMetrics": null, "ExcludeMetricLabels": null, "Exporters": null @@ -51,7 +51,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": false, "ConnectionStats": false, - "StreamsMetrics": false, + "Streams": false, "EngineStats": { "Subscriptions": false }, diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index cf32cd50e5..d5190f302e 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -81,7 +81,7 @@ "ListenAddr": "127.0.0.1:8088", "GraphqlCache": true, "ConnectionStats": true, - "StreamsMetrics": false, + "Streams": false, "EngineStats": { "Subscriptions": true }, diff --git a/router/pkg/metric/config.go b/router/pkg/metric/config.go index 8b76b609da..7a2c15e620 100644 --- a/router/pkg/metric/config.go +++ b/router/pkg/metric/config.go @@ -80,7 +80,7 @@ type OpenTelemetry struct { ExcludeMetricLabels []*regexp.Regexp // TestReader is used for testing purposes. If set, the reader will be used instead of the configured exporters. TestReader sdkmetric.Reader - Stream bool + Streams bool } func GetDefaultExporter(cfg *Config) *OpenTelemetryExporter { diff --git a/router/pkg/metric/stream_metric_store.go b/router/pkg/metric/stream_metric_store.go index f157816455..2034d2bc70 100644 --- a/router/pkg/metric/stream_metric_store.go +++ b/router/pkg/metric/stream_metric_store.go @@ -56,7 +56,7 @@ type StreamMetrics struct { func NewStreamMetricStore(logger *zap.Logger, baseAttributes []attribute.KeyValue, otelProvider, promProvider *metric.MeterProvider, metricsConfig *Config) (*StreamMetrics, error) { providers := make([]StreamMetricProvider, 0) - if metricsConfig.OpenTelemetry.Stream { + if metricsConfig.OpenTelemetry.Streams { otlpMetrics, err := newOtlpStreamEventMetrics(logger, otelProvider) if err != nil { return nil, fmt.Errorf("failed to create otlp stream event metrics: %w", err) From f4393c0f5aa0b637c3f1d44c570e63fdb26e8fea Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 19:42:47 +0530 Subject: [PATCH 38/40] fix: add router prefix name value --- router-tests/prometheus_stream_metrics_test.go | 14 +++++++------- router-tests/telemetry/stream_metrics_test.go | 14 +++++++------- router/pkg/metric/stream_measurements.go | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/router-tests/prometheus_stream_metrics_test.go b/router-tests/prometheus_stream_metrics_test.go index 3d8db02830..4914fb0c39 100644 --- a/router-tests/prometheus_stream_metrics_test.go +++ b/router-tests/prometheus_stream_metrics_test.go @@ -51,7 +51,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_sent_messages_total") + family := findMetricFamilyByName(mf, "router_streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -124,7 +124,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_received_messages_total") + family := findMetricFamilyByName(mf, "router_streams_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -180,7 +180,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_sent_messages_total") + family := findMetricFamilyByName(mf, "router_streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -229,7 +229,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_sent_messages_total") + family := findMetricFamilyByName(mf, "router_streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -309,7 +309,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_received_messages_total") + family := findMetricFamilyByName(mf, "router_streams_received_messages_total") metrics := family.GetMetric() errLabel := findMetricLabelByName(metrics, "wg_error_type") @@ -366,7 +366,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_sent_messages_total") + family := findMetricFamilyByName(mf, "router_streams_sent_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) @@ -438,7 +438,7 @@ func TestFlakyEventMetrics(t *testing.T) { mf, err := promRegistry.Gather() require.NoError(t, err) - family := findMetricFamilyByName(mf, "streams_received_messages_total") + family := findMetricFamilyByName(mf, "router_streams_received_messages_total") metrics := family.GetMetric() require.Len(t, metrics, 1) diff --git a/router-tests/telemetry/stream_metrics_test.go b/router-tests/telemetry/stream_metrics_test.go index 580131bdc3..136d4a44bd 100644 --- a/router-tests/telemetry/stream_metrics_test.go +++ b/router-tests/telemetry/stream_metrics_test.go @@ -54,7 +54,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -131,7 +131,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.received.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -191,7 +191,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -243,7 +243,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -327,7 +327,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.received.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -389,7 +389,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.sent.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.sent.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) @@ -465,7 +465,7 @@ func TestFlakyEventMetrics(t *testing.T) { scope := integration.GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router.streams") require.NotNil(t, scope) - metricEntry := integration.GetMetricByName(scope, "streams.received.messages") + metricEntry := integration.GetMetricByName(scope, "router.streams.received.messages") require.NotNil(t, metricEntry) sum, _ := metricEntry.Data.(metricdata.Sum[int64]) diff --git a/router/pkg/metric/stream_measurements.go b/router/pkg/metric/stream_measurements.go index db76ae93c2..a5e1dadfb1 100644 --- a/router/pkg/metric/stream_measurements.go +++ b/router/pkg/metric/stream_measurements.go @@ -7,8 +7,8 @@ import ( ) const ( - messagingSentMessages = "streams.sent.messages" - messagingConsumedMessages = "streams.received.messages" + messagingSentMessages = "router.streams.sent.messages" + messagingConsumedMessages = "router.streams.received.messages" ) var ( From 415a81602f6eca6903ccf110dae2e03e5010a2cc Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Tue, 19 Aug 2025 23:56:22 +0530 Subject: [PATCH 39/40] fix: update description --- router/pkg/config/config.schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index b2fd9e4f26..7f68126968 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -1069,7 +1069,7 @@ "streams": { "type": "boolean", "default": false, - "description": "Enable the collection of stream metrics. The default value is false." + "description": "Enable the collection of stream metrics. This contains metrics related to EDFS. The default value is false." }, "circuit_breaker": { "type": "boolean", @@ -1179,7 +1179,7 @@ "streams": { "type": "boolean", "default": false, - "description": "Enable the collection of stream metrics. The default value is false." + "description": "Enable the collection of stream metrics. This contains metrics related to EDFS. The default value is false." }, "circuit_breaker": { "type": "boolean", From 0da26e7d80c1eb0611cc540a65a508f523565a6c Mon Sep 17 00:00:00 2001 From: Milinda Dias Date: Wed, 20 Aug 2025 00:09:58 +0530 Subject: [PATCH 40/40] fix: renaming --- router-tests/prometheus_stream_metrics_test.go | 14 +++++++------- router-tests/telemetry/stream_metrics_test.go | 14 +++++++------- router-tests/testenv/testenv.go | 8 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/router-tests/prometheus_stream_metrics_test.go b/router-tests/prometheus_stream_metrics_test.go index 4914fb0c39..30fa87fe16 100644 --- a/router-tests/prometheus_stream_metrics_test.go +++ b/router-tests/prometheus_stream_metrics_test.go @@ -41,7 +41,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, + EnablePrometheusStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { events.EnsureTopicExists(t, xEnv, "employeeUpdated") @@ -88,7 +88,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, + EnablePrometheusStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { events.EnsureTopicExists(t, xEnv, topic) @@ -167,7 +167,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, + EnablePrometheusStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { @@ -215,7 +215,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, + EnablePrometheusStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { sub, err := xEnv.NatsConnectionMyNats.Subscribe(xEnv.GetPubSubName("getEmployeeMyNats.12"), func(msg *nats.Msg) { _ = msg.Respond([]byte(`{"id": 12, "__typename": "Employee"}`)) }) @@ -264,7 +264,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, ModifyEngineExecutionConfiguration: func(ec *config.EngineExecutionConfiguration) { ec.WebSocketClientReadTimeout = time.Second }, - MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + MetricOptions: testenv.MetricOptions{EnablePrometheusStreamMetrics: true}, }, func(t *testing.T, xEnv *testenv.Environment) { var subscriptionOne struct { employeeUpdated struct { @@ -357,7 +357,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, MetricOptions: testenv.MetricOptions{ - EnablePrometheusEventMetrics: true, + EnablePrometheusStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r1"}) { success } }`}) @@ -401,7 +401,7 @@ func TestFlakyEventMetrics(t *testing.T) { PrometheusRegistry: promRegistry, RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, - MetricOptions: testenv.MetricOptions{EnablePrometheusEventMetrics: true}, + MetricOptions: testenv.MetricOptions{EnablePrometheusStreamMetrics: true}, }, func(t *testing.T, xEnv *testenv.Environment) { topic := "employeeUpdatedMyRedis" diff --git a/router-tests/telemetry/stream_metrics_test.go b/router-tests/telemetry/stream_metrics_test.go index 136d4a44bd..72ac7c654f 100644 --- a/router-tests/telemetry/stream_metrics_test.go +++ b/router-tests/telemetry/stream_metrics_test.go @@ -42,7 +42,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, MetricOptions: testenv.MetricOptions{ - EnableOTLPEventMetrics: true, + EnableOTLPStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { events.EnsureTopicExists(t, xEnv, "employeeUpdated") @@ -93,7 +93,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsKafkaJSONTemplate, EnableKafka: true, MetricOptions: testenv.MetricOptions{ - EnableOTLPEventMetrics: true, + EnableOTLPStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { events.EnsureTopicExists(t, xEnv, topic) @@ -176,7 +176,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, MetricOptions: testenv.MetricOptions{ - EnableOTLPEventMetrics: true, + EnableOTLPStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation UpdateEmployeeNats($update: UpdateEmployeeInput!) { @@ -227,7 +227,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, MetricOptions: testenv.MetricOptions{ - EnableOTLPEventMetrics: true, + EnableOTLPStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { sub, err := xEnv.NatsConnectionMyNats.Subscribe(xEnv.GetPubSubName("getEmployeeMyNats.12"), func(msg *nats.Msg) { _ = msg.Respond([]byte(`{"id": 12, "__typename": "Employee"}`)) }) @@ -279,7 +279,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsNatsJSONTemplate, EnableNats: true, ModifyEngineExecutionConfiguration: func(ec *config.EngineExecutionConfiguration) { ec.WebSocketClientReadTimeout = time.Second }, - MetricOptions: testenv.MetricOptions{EnableOTLPEventMetrics: true}, + MetricOptions: testenv.MetricOptions{EnableOTLPStreamMetrics: true}, }, func(t *testing.T, xEnv *testenv.Environment) { var subscriptionOne struct { employeeUpdated struct { @@ -378,7 +378,7 @@ func TestFlakyEventMetrics(t *testing.T) { RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, MetricOptions: testenv.MetricOptions{ - EnableOTLPEventMetrics: true, + EnableOTLPStreamMetrics: true, }, }, func(t *testing.T, xEnv *testenv.Environment) { xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{Query: `mutation { updateEmployeeMyRedis(id: 3, update: {name: "r1"}) { success } }`}) @@ -426,7 +426,7 @@ func TestFlakyEventMetrics(t *testing.T) { MetricReader: metricReader, RouterConfigJSONTemplate: testenv.ConfigWithEdfsRedisJSONTemplate, EnableRedis: true, - MetricOptions: testenv.MetricOptions{EnableOTLPEventMetrics: true}, + MetricOptions: testenv.MetricOptions{EnableOTLPStreamMetrics: true}, }, func(t *testing.T, xEnv *testenv.Environment) { topic := "employeeUpdatedMyRedis" diff --git a/router-tests/testenv/testenv.go b/router-tests/testenv/testenv.go index 0f5e4beb3f..267483d7bc 100644 --- a/router-tests/testenv/testenv.go +++ b/router-tests/testenv/testenv.go @@ -270,10 +270,10 @@ type MetricOptions struct { PrometheusSchemaFieldUsage PrometheusSchemaFieldUsage EnableOTLPConnectionMetrics bool EnableOTLPCircuitBreakerMetrics bool - EnableOTLPEventMetrics bool + EnableOTLPStreamMetrics bool EnablePrometheusConnectionMetrics bool EnablePrometheusCircuitBreakerMetrics bool - EnablePrometheusEventMetrics bool + EnablePrometheusStreamMetrics bool } type PrometheusSchemaFieldUsage struct { @@ -1507,7 +1507,7 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node CircuitBreaker: testConfig.MetricOptions.EnablePrometheusCircuitBreakerMetrics, ExcludeMetrics: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetrics, ExcludeMetricLabels: testConfig.MetricOptions.MetricExclusions.ExcludedPrometheusMetricLabels, - Streams: testConfig.MetricOptions.EnablePrometheusEventMetrics, + Streams: testConfig.MetricOptions.EnablePrometheusStreamMetrics, ExcludeScopeInfo: testConfig.MetricOptions.MetricExclusions.ExcludeScopeInfo, PromSchemaFieldUsage: rmetric.PrometheusSchemaFieldUsage{ Enabled: testConfig.MetricOptions.PrometheusSchemaFieldUsage.Enabled, @@ -1530,7 +1530,7 @@ func configureRouter(listenerAddr string, testConfig *Config, routerConfig *node Enabled: true, RouterRuntime: testConfig.MetricOptions.EnableRuntimeMetrics, GraphqlCache: testConfig.MetricOptions.EnableOTLPRouterCache, - Streams: testConfig.MetricOptions.EnableOTLPEventMetrics, + Streams: testConfig.MetricOptions.EnableOTLPStreamMetrics, ConnectionStats: testConfig.MetricOptions.EnableOTLPConnectionMetrics, EngineStats: config.EngineStats{ Subscriptions: testConfig.MetricOptions.OTLPEngineStatsOptions.EnableSubscription,