From e4f076186f9a256c3ac86360a79e33714b0dad1b Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 11 Apr 2023 01:39:41 -0400 Subject: [PATCH 01/50] Client configured with TLS using HCP config and retry/throttle --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 2d08b9447df..abf35472619 100644 --- a/go.sum +++ b/go.sum @@ -510,6 +510,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 8bbc289690ec56799a9aa7aa100a44224a17ad5a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:26:56 -0400 Subject: [PATCH 02/50] run go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index abf35472619..2d08b9447df 100644 --- a/go.sum +++ b/go.sum @@ -510,7 +510,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From f6182b4cc3cd25372ca19e07ae0b0a379114b32a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 01:17:14 -0400 Subject: [PATCH 03/50] Remove one abstraction to use the config from deps --- agent/hcp/client/metrics_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index 15bd71097f7..ba50f28626e 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,7 +129,6 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } - defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From 009f08e75fb7ab30f19be048be8b26956134c128 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 13:47:35 -0400 Subject: [PATCH 04/50] Address PR feedback --- agent/hcp/client/metrics_client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index ba50f28626e..15bd71097f7 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,6 +129,7 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } + defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From 41ba7ee52c636bfc4ea18e0e1c1791f756d92e1a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 11 Apr 2023 01:39:41 -0400 Subject: [PATCH 05/50] Client configured with TLS using HCP config and retry/throttle --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 2d08b9447df..abf35472619 100644 --- a/go.sum +++ b/go.sum @@ -510,6 +510,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 8fd34e463c83c064f63997d2713d767b1fa5f9fd Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:26:56 -0400 Subject: [PATCH 06/50] run go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index abf35472619..2d08b9447df 100644 --- a/go.sum +++ b/go.sum @@ -510,7 +510,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 894cef471487797fe2e6948e0b2bd5cfec992702 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 24 Apr 2023 10:57:59 -0400 Subject: [PATCH 07/50] Create new OTELExporter which uses the MetricsClient Add transform because the conversion is in an /internal package --- agent/hcp/telemetry/otel_exporter.go | 53 +++++ agent/hcp/telemetry/otel_exporter_test.go | 133 +++++++++++ agent/hcp/telemetry/otlp_transform.go | 168 ++++++++++++++ agent/hcp/telemetry/otlp_transform_test.go | 258 +++++++++++++++++++++ go.mod | 11 +- go.sum | 22 +- 6 files changed, 640 insertions(+), 5 deletions(-) create mode 100644 agent/hcp/telemetry/otel_exporter.go create mode 100644 agent/hcp/telemetry/otel_exporter_test.go create mode 100644 agent/hcp/telemetry/otlp_transform.go create mode 100644 agent/hcp/telemetry/otlp_transform_test.go diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go new file mode 100644 index 00000000000..365c316717b --- /dev/null +++ b/agent/hcp/telemetry/otel_exporter.go @@ -0,0 +1,53 @@ +package telemetry + +import ( + "context" + + hcpclient "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-multierror" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/aggregation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +// OTELExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. +// The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. +// This allows us to use a custom client - HCP authenticated MetricsClient. +type OTELExporter struct { + client hcpclient.MetricsClient + endpoint string +} + +// Temporality returns the Cumulative temporality for metrics aggregation. +// Telemetry Gateway stores metrics in Prometheus format, so use Cummulative aggregation as default. +func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { + return metricdata.CumulativeTemporality +} + +// Aggregation returns the Aggregation to use for an instrument kind. +func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { + switch kind { + case metric.InstrumentKindObservableGauge: + return aggregation.LastValue{} + case metric.InstrumentKindHistogram: + return aggregation.ExplicitBucketHistogram{ + Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, + NoMinMax: false, + } + } + // for metric.InstrumentKindCounter and others, default to sum. + return aggregation.Sum{} +} + +// Export serializes and transmits metric data to a receiver. +func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + otlpMetrics, merr := transformOTLP(metrics) + err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) + return multierror.Append(merr, err) +} + +// ForceFlush does nothing, as the MetricsClient client holds no state. +func (e *OTELExporter) ForceFlush(ctx context.Context) error { return ctx.Err() } + +// Shutdown does nothing, as the MetricsClient is a HTTP client that requires no graceful shutdown. +func (e *OTELExporter) Shutdown(ctx context.Context) error { return ctx.Err() } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go new file mode 100644 index 00000000000..07e959593a4 --- /dev/null +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -0,0 +1,133 @@ +package telemetry + +import ( + "context" + "fmt" + "testing" + + "github.com/hashicorp/consul/agent/hcp/client" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/aggregation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" +) + +func TestTemporality(t *testing.T) { + exp := &OTELExporter{} + require.Equal(t, metricdata.CumulativeTemporality, exp.Temporality(metric.InstrumentKindCounter)) +} + +func TestAggregation(t *testing.T) { + for name, test := range map[string]struct { + kind metric.InstrumentKind + expAgg aggregation.Aggregation + }{ + "gauge": { + kind: metric.InstrumentKindObservableGauge, + expAgg: aggregation.LastValue{}, + }, + "counter": { + kind: metric.InstrumentKindCounter, + expAgg: aggregation.Sum{}, + }, + "histogram": { + kind: metric.InstrumentKindHistogram, + expAgg: aggregation.ExplicitBucketHistogram{Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, NoMinMax: false}, + }, + } { + t.Run(name, func(t *testing.T) { + exp := &OTELExporter{} + require.Equal(t, test.expAgg, exp.Aggregation(test.kind)) + }) + } +} + +type mockErrMetricsClient struct{} + +func (m *mockErrMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { + return fmt.Errorf("failed to export metrics") +} + +type mockMetricsClient struct{} + +func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { + return nil +} + +func TestExport(t *testing.T) { + for name, test := range map[string]struct { + wantErr string + metrics *metricdata.ResourceMetrics + client client.MetricsClient + }{ + "errorWithExportFailure": { + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + }, + wantErr: "failed to export metrics", + }, + "errorWithTransformFailure": { + wantErr: "unknown aggregation: metricdata.Gauge[int64]", + client: &mockMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + // unsupported, only float64 supported + Data: metricdata.Gauge[int64]{}, + }, + }, + }, + }, + }, + }, + "multierrorTransformExportFailure": { + wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + // unsupported, only float64 supported + Data: metricdata.Gauge[int64]{}, + }, + }, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + exp := &OTELExporter{ + client: test.client, + } + + err := exp.Export(context.Background(), test.metrics) + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + }) + } +} + +func TestForceFlush(t *testing.T) { + exp := &OTELExporter{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + require.Error(t, exp.ForceFlush(ctx)) +} + +func TestShutdown(t *testing.T) { + exp := &OTELExporter{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + require.Error(t, exp.Shutdown(ctx)) +} diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go new file mode 100644 index 00000000000..4a81bf96706 --- /dev/null +++ b/agent/hcp/telemetry/otlp_transform.go @@ -0,0 +1,168 @@ +package telemetry + +import ( + "fmt" + + "github.com/hashicorp/go-multierror" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + cpb "go.opentelemetry.io/proto/otlp/common/v1" + mpb "go.opentelemetry.io/proto/otlp/metrics/v1" + rpb "go.opentelemetry.io/proto/otlp/resource/v1" +) + +// TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm +// contains invalid ScopeMetrics, an error will be returned along with an OTLP +// ResourceMetrics that contains partial OTLP ScopeMetrics. +func transformOTLP(rm *metricdata.ResourceMetrics) (*mpb.ResourceMetrics, error) { + sms, err := scopeMetrics(rm.ScopeMetrics) + return &mpb.ResourceMetrics{ + Resource: &rpb.Resource{ + Attributes: attributes(rm.Resource.Iter()), + }, + ScopeMetrics: sms, + }, err +} + +// scopeMetrics returns a slice of OTLP ScopeMetrics. +func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { + var merr *multierror.Error + out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) + for _, sm := range scopeMetrics { + ms, err := metrics(sm.Metrics) + if err != nil { + merr = multierror.Append(merr, err) + } + + out = append(out, &mpb.ScopeMetrics{ + Scope: &cpb.InstrumentationScope{ + Name: sm.Scope.Name, + Version: sm.Scope.Version, + }, + Metrics: ms, + }) + } + return out, merr +} + +// metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. +func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { + var merr *multierror.Error + out := make([]*mpb.Metric, 0, len(metrics)) + for _, m := range metrics { + o, err := metricType(m) + if err != nil { + merr = multierror.Append(merr, err) + continue + } + out = append(out, o) + } + return out, merr +} + +// metricType identifies the instrument type and converts it to OTLP format. +// only float64 values are accepted since the go metrics sink only receives float64 values. +func metricType(m metricdata.Metrics) (*mpb.Metric, error) { + var err error + out := &mpb.Metric{ + Name: m.Name, + Description: m.Description, + Unit: string(m.Unit), + } + switch a := m.Data.(type) { + case metricdata.Gauge[float64]: + out.Data = &mpb.Metric_Gauge{ + Gauge: &mpb.Gauge{ + DataPoints: dataPoints(a.DataPoints), + }, + } + case metricdata.Sum[float64]: + if a.Temporality != metricdata.CumulativeTemporality { + return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + } + out.Data = &mpb.Metric_Sum{ + Sum: &mpb.Sum{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + IsMonotonic: a.IsMonotonic, + DataPoints: dataPoints(a.DataPoints), + }, + } + case metricdata.Histogram[float64]: + if a.Temporality != metricdata.CumulativeTemporality { + return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + } + out.Data = &mpb.Metric_Histogram{ + Histogram: &mpb.Histogram{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + DataPoints: histogramDataPoints(a.DataPoints), + }, + } + default: + return out, fmt.Errorf("%s: %T", "unknown aggregation", a) + } + return out, err +} + +// DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. +func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { + out := make([]*mpb.NumberDataPoint, 0, len(dataPoints)) + for _, dp := range dataPoints { + ndp := &mpb.NumberDataPoint{ + Attributes: attributes(dp.Attributes.Iter()), + StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), + TimeUnixNano: uint64(dp.Time.UnixNano()), + } + + ndp.Value = &mpb.NumberDataPoint_AsDouble{ + AsDouble: dp.Value, + } + out = append(out, ndp) + } + return out +} + +// HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. +func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { + out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) + for _, dp := range dataPoints { + sum := dp.Sum + hdp := &mpb.HistogramDataPoint{ + Attributes: attributes(dp.Attributes.Iter()), + StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), + TimeUnixNano: uint64(dp.Time.UnixNano()), + Count: dp.Count, + Sum: &sum, + BucketCounts: dp.BucketCounts, + ExplicitBounds: dp.Bounds, + } + if v, ok := dp.Min.Value(); ok { + hdp.Min = &v + } + if v, ok := dp.Max.Value(); ok { + hdp.Max = &v + } + out = append(out, hdp) + } + return out +} + +// attributes transforms items of an attribute iterator into OTLP key-values. +// Currently, labels are only key-value pairs. +func attributes(iter attribute.Iterator) []*cpb.KeyValue { + l := iter.Len() + if iter.Len() == 0 { + return nil + } + + out := make([]*cpb.KeyValue, 0, l) + for iter.Next() { + kv := iter.Attribute() + av := &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{ + StringValue: kv.Value.AsString(), + }, + } + out = append(out, &cpb.KeyValue{Key: string(kv.Key), Value: av}) + } + return out +} diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go new file mode 100644 index 00000000000..6afc030f984 --- /dev/null +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -0,0 +1,258 @@ +package telemetry + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/instrumentation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.17.0" + cpb "go.opentelemetry.io/proto/otlp/common/v1" + mpb "go.opentelemetry.io/proto/otlp/metrics/v1" + rpb "go.opentelemetry.io/proto/otlp/resource/v1" +) + +var ( + + // Common attributes for test cases. + start = time.Date(2000, time.January, 01, 0, 0, 0, 0, time.FixedZone("GMT", 0)) + end = start.Add(30 * time.Second) + + alice = attribute.NewSet(attribute.String("user", "alice")) + bob = attribute.NewSet(attribute.String("user", "bob")) + + pbAlice = &cpb.KeyValue{Key: "user", Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "alice"}, + }} + pbBob = &cpb.KeyValue{Key: "user", Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "bob"}, + }} + + // DataPoint test case : Histogram Datapoints (Histogram) + minA, maxA, sumA = 2.0, 4.0, 90.0 + minB, maxB, sumB = 4.0, 150.0, 234.0 + otelHDP = []metricdata.HistogramDataPoint[float64]{{ + Attributes: alice, + StartTime: start, + Time: end, + Count: 30, + Bounds: []float64{1, 5}, + BucketCounts: []uint64{0, 30, 0}, + Min: metricdata.NewExtrema(minA), + Max: metricdata.NewExtrema(maxA), + Sum: sumA, + }, { + Attributes: bob, + StartTime: start, + Time: end, + Count: 3, + Bounds: []float64{1, 5}, + BucketCounts: []uint64{0, 1, 2}, + Min: metricdata.NewExtrema(minB), + Max: metricdata.NewExtrema(maxB), + Sum: sumB, + }} + + otlpHDP = []*mpb.HistogramDataPoint{{ + Attributes: []*cpb.KeyValue{pbAlice}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Count: 30, + Sum: &sumA, + ExplicitBounds: []float64{1, 5}, + BucketCounts: []uint64{0, 30, 0}, + Min: &minA, + Max: &maxA, + }, { + Attributes: []*cpb.KeyValue{pbBob}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Count: 3, + Sum: &sumB, + ExplicitBounds: []float64{1, 5}, + BucketCounts: []uint64{0, 1, 2}, + Min: &minB, + Max: &maxB, + }} + // DataPoint test case : Number Datapoints (Gauge / Counter) + otelDP = []metricdata.DataPoint[float64]{ + {Attributes: alice, StartTime: start, Time: end, Value: 1.0}, + {Attributes: bob, StartTime: start, Time: end, Value: 2.0}, + } + + otlpDP = []*mpb.NumberDataPoint{ + { + Attributes: []*cpb.KeyValue{pbAlice}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Value: &mpb.NumberDataPoint_AsDouble{AsDouble: 1.0}, + }, + { + Attributes: []*cpb.KeyValue{pbBob}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Value: &mpb.NumberDataPoint_AsDouble{AsDouble: 2.0}, + }, + } + + // Metrics Test Case + // - 3 invalid metrics and 3 Valid to test filtering + // - 1 invalid metric type + // - 2 invalid cummulative temporalities (only cummulative supported) + // - 3 types (Gauge, Counter, and Histogram) supported + otelMetrics = []metricdata.Metrics{ + { + Name: "float64-gauge", + Description: "Gauge with float64 values", + Unit: "1", + Data: metricdata.Gauge[float64]{DataPoints: otelDP}, + }, + { + Name: "float64-sum", + Description: "Sum with float64 values", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: false, + DataPoints: otelDP, + }, + }, + { + Name: "float64-histogram", + Description: "Histogram", + Unit: "1", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: otelHDP, + }, + }, + { + Name: "invalid-sum", + Description: "Sum with invalid temporality", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.DeltaTemporality, + IsMonotonic: false, + DataPoints: otelDP, + }, + }, + { + Name: "invalid-histogram", + Description: "Invalid histogram", + Unit: "1", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.DeltaTemporality, + DataPoints: otelHDP, + }, + }, + { + Name: "unknown", + Description: "Unknown aggregation", + Unit: "1", + Data: metricdata.Histogram[int64]{}, + }, + } + + otlpMetrics = []*mpb.Metric{ + { + Name: "float64-gauge", + Description: "Gauge with float64 values", + Unit: "1", + Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: otlpDP}}, + }, + { + Name: "float64-sum", + Description: "Sum with float64 values", + Unit: "1", + Data: &mpb.Metric_Sum{Sum: &mpb.Sum{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + IsMonotonic: false, + DataPoints: otlpDP, + }}, + }, + { + Name: "float64-histogram", + Description: "Histogram", + Unit: "1", + Data: &mpb.Metric_Histogram{Histogram: &mpb.Histogram{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + DataPoints: otlpHDP, + }}, + }, + } + + // ScopeMetrics Test Cases + otelScopeMetrics = []metricdata.ScopeMetrics{{ + Scope: instrumentation.Scope{ + Name: "test/code/path", + Version: "v0.1.0", + }, + Metrics: otelMetrics, + }} + + otlpScopeMetrics = []*mpb.ScopeMetrics{{ + Scope: &cpb.InstrumentationScope{ + Name: "test/code/path", + Version: "v0.1.0", + }, + Metrics: otlpMetrics, + }} + + // ResourceMetrics Test Cases + otelResourceMetrics = &metricdata.ResourceMetrics{ + Resource: resource.NewSchemaless( + semconv.ServiceName("test server"), + semconv.ServiceVersion("v0.1.0"), + ), + ScopeMetrics: otelScopeMetrics, + } + + otlpResourceMetrics = &mpb.ResourceMetrics{ + Resource: &rpb.Resource{ + Attributes: []*cpb.KeyValue{ + { + Key: "service.name", + Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "test server"}, + }, + }, + { + Key: "service.version", + Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "v0.1.0"}, + }, + }, + }, + }, + ScopeMetrics: otlpScopeMetrics, + } +) + +// TestTransformOTLP runs tests from the "bottom-up" of the metricdata data types. +func TestTransformOTLP(t *testing.T) { + // Histogram DataPoint Test Case (Histograms) + assert.Equal(t, otlpHDP, histogramDataPoints(otelHDP)) + + // Number DataPoint Test Case (Counters / Gauges) + require.Equal(t, otlpDP, dataPoints(otelDP)) + + // Metrics Test Case + m, err := metrics(otelMetrics) + require.Equal(t, otlpMetrics, m) + require.Equal(t, len(otlpMetrics), 3) + require.Contains(t, err.Error(), "3 errors occurred") + + // Scope Metrics Test Case + sm, err := scopeMetrics(otelScopeMetrics) + require.Equal(t, otlpScopeMetrics, sm) + require.Contains(t, err.Error(), "3 errors occurred") + + // Resource Metrics Test Case + rm, err := transformOTLP(otelResourceMetrics) + require.Equal(t, otlpResourceMetrics, rm) + require.Contains(t, err.Error(), "3 errors occurred") +} diff --git a/go.mod b/go.mod index b3f1a2faa35..81b82cbb527 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/go-openapi/runtime v0.24.1 github.com/go-openapi/strfmt v0.21.3 github.com/golang/protobuf v1.5.2 - github.com/google/go-cmp v0.5.8 + github.com/google/go-cmp v0.5.9 github.com/google/gofuzz v1.2.0 github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2 @@ -96,13 +96,16 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 + go.opentelemetry.io/otel v1.15.0-rc.2 + go.opentelemetry.io/otel/sdk v1.15.0-rc.2 + go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.5.0 + golang.org/x/sys v0.6.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -151,6 +154,8 @@ require ( github.com/dimchansky/utfbom v1.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v0.1.0 // indirect github.com/form3tech-oss/jwt-go v3.2.2+incompatible // indirect + github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/analysis v0.21.2 // indirect github.com/go-openapi/errors v0.20.2 // indirect @@ -227,6 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect + go.opentelemetry.io/otel/metric v1.15.0-rc.2 // indirect + go.opentelemetry.io/otel/trace v1.15.0-rc.2 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 2d08b9447df..78d33e0f5a7 100644 --- a/go.sum +++ b/go.sum @@ -313,6 +313,11 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/analysis v0.21.2 h1:hXFrOYFHUAMQdu6zwAiKKJHJQ8kqZs1ux/ru1P1wLJU= @@ -447,8 +452,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-querystring v0.0.0-20170111101155-53e6ce116135/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -1074,6 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/otel v1.15.0-rc.2 h1:ujPMbp/CAdjMTF9E93fr6X2W7Ub2MulkQAmXcTaNkJI= +go.opentelemetry.io/otel v1.15.0-rc.2/go.mod h1:AMBt1VNvSP4KmoaUOQcTWUVj6Te4ZRmD/v3tu0vfytI= +go.opentelemetry.io/otel/metric v1.15.0-rc.2 h1:MA7asjRHRgVdbvqYLQozHkxViPFZJ21qzi9IOKZVTnQ= +go.opentelemetry.io/otel/metric v1.15.0-rc.2/go.mod h1:/e8iKMaajFDmOXPoQsFrtr+B3XADzJS+HHePsQh2gGs= +go.opentelemetry.io/otel/sdk v1.15.0-rc.2 h1:4t4aauPUWLswNEUXAi5s3X94/kg7+NQY+6CeR9F6nEk= +go.opentelemetry.io/otel/sdk v1.15.0-rc.2/go.mod h1:/dv7lLx8SqyUgUxQeFeUUGvHbl45K4dssbaYjj/ObTQ= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 h1:0hZK6hT73+T5BNuFW6AsBwfARsgeThwAQ7JagJ1hbYE= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2/go.mod h1:GUyzM862531kqKgJkhS/LIeekJ652QyjN94dzN1zmgQ= +go.opentelemetry.io/otel/trace v1.15.0-rc.2 h1:LizBnU5zjqWkHUlZu0tC2k7qICXqPCJqoDlWdc07uw8= +go.opentelemetry.io/otel/trace v1.15.0-rc.2/go.mod h1:JBu+wEp/Ra+YOzZlvPjE6raGRlrUJE/R8aW7Wucsn/o= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1338,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From b50057cd7d53f2cc45788df8a802c5f949994cf9 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:32:40 -0400 Subject: [PATCH 08/50] Fix lint error --- agent/hcp/telemetry/otlp_transform.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 4a81bf96706..777911bdda9 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -26,7 +26,7 @@ func transformOTLP(rm *metricdata.ResourceMetrics) (*mpb.ResourceMetrics, error) // scopeMetrics returns a slice of OTLP ScopeMetrics. func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { - var merr *multierror.Error + var merr error out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) for _, sm := range scopeMetrics { ms, err := metrics(sm.Metrics) @@ -47,7 +47,7 @@ func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, // metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { - var merr *multierror.Error + var merr error out := make([]*mpb.Metric, 0, len(metrics)) for _, m := range metrics { o, err := metricType(m) @@ -63,11 +63,10 @@ func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { // metricType identifies the instrument type and converts it to OTLP format. // only float64 values are accepted since the go metrics sink only receives float64 values. func metricType(m metricdata.Metrics) (*mpb.Metric, error) { - var err error out := &mpb.Metric{ Name: m.Name, Description: m.Description, - Unit: string(m.Unit), + Unit: m.Unit, } switch a := m.Data.(type) { case metricdata.Gauge[float64]: @@ -100,7 +99,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { default: return out, fmt.Errorf("%s: %T", "unknown aggregation", a) } - return out, err + return out, nil } // DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. From 0c01542a6429de978f8b129872a52f0d17ef0e4e Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 17:55:06 -0400 Subject: [PATCH 09/50] early return when there are no metrics --- agent/hcp/telemetry/otel_exporter.go | 4 +++ agent/hcp/telemetry/otel_exporter_test.go | 34 +++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 365c316717b..2845a1ec3eb 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -41,6 +41,10 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre // Export serializes and transmits metric data to a receiver. func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + if len(metrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 { + return nil + } + otlpMetrics, merr := transformOTLP(metrics) err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) return multierror.Append(merr, err) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 07e959593a4..38cf35c0a62 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -62,10 +62,35 @@ func TestExport(t *testing.T) { metrics *metricdata.ResourceMetrics client client.MetricsClient }{ + "earlyReturnWithoutScopeMetrics": { + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + {Metrics: []metricdata.Metrics{}}, + }, + }, + }, + "earlyReturnWithoutMetrics": { + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{}, + }, + }, "errorWithExportFailure": { client: &mockErrMetricsClient{}, metrics: &metricdata.ResourceMetrics{ Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "consul.raft.commitTime", + }, + }, + }, + }, }, wantErr: "failed to export metrics", }, @@ -110,8 +135,13 @@ func TestExport(t *testing.T) { } err := exp.Export(context.Background(), test.metrics) - require.Error(t, err) - require.Contains(t, err.Error(), test.wantErr) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NoError(t, err) }) } } From da20fe319a80560ff5c234d4b92eb316530177db Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 14:25:32 -0400 Subject: [PATCH 10/50] Add NewOTELExporter() function --- agent/hcp/telemetry/otel_exporter.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 2845a1ec3eb..34f828c72bf 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -18,6 +18,14 @@ type OTELExporter struct { endpoint string } +// NewOTELExporter returns a configured OTELExporter +func NewOTELExporter(client hcpclient.MetricsClient, endpoint string) *OTELExporter { + return &OTELExporter{ + client: client, + endpoint: endpoint, + } +} + // Temporality returns the Cumulative temporality for metrics aggregation. // Telemetry Gateway stores metrics in Prometheus format, so use Cummulative aggregation as default. func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { From 749b1c83a390c83cd129eeef76cb9fa330f4bd1a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:22:27 -0400 Subject: [PATCH 11/50] Downgrade to metrics SDK version: v1.15.0-rc.1 --- agent/hcp/telemetry/otel_exporter.go | 4 ++-- agent/hcp/telemetry/otel_exporter_test.go | 12 ++++++------ agent/hcp/telemetry/otlp_transform.go | 4 ++-- agent/hcp/telemetry/otlp_transform_test.go | 8 ++++---- go.mod | 10 +++++----- go.sum | 20 ++++++++++---------- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 34f828c72bf..b857cacfccf 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -48,12 +48,12 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre } // Export serializes and transmits metric data to a receiver. -func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { +func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { if len(metrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 { return nil } - otlpMetrics, merr := transformOTLP(metrics) + otlpMetrics, merr := transformOTLP(&metrics) err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) return multierror.Append(merr, err) } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 38cf35c0a62..89943f09c6b 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -59,12 +59,12 @@ func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *met func TestExport(t *testing.T) { for name, test := range map[string]struct { wantErr string - metrics *metricdata.ResourceMetrics + metrics metricdata.ResourceMetrics client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ {Metrics: []metricdata.Metrics{}}, @@ -73,14 +73,14 @@ func TestExport(t *testing.T) { }, "earlyReturnWithoutMetrics": { client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{}, }, }, "errorWithExportFailure": { client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ { @@ -97,7 +97,7 @@ func TestExport(t *testing.T) { "errorWithTransformFailure": { wantErr: "unknown aggregation: metricdata.Gauge[int64]", client: &mockMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ { @@ -114,7 +114,7 @@ func TestExport(t *testing.T) { "multierrorTransformExportFailure": { wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ { diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 777911bdda9..4ea3ab52907 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -86,7 +86,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { DataPoints: dataPoints(a.DataPoints), }, } - case metricdata.Histogram[float64]: + case metricdata.Histogram: if a.Temporality != metricdata.CumulativeTemporality { return out, fmt.Errorf("%s: %T", "unsupported temporality", a) } @@ -121,7 +121,7 @@ func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoi } // HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. -func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { +func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { sum := dp.Sum diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 6afc030f984..0b26cc0e6a6 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -35,7 +35,7 @@ var ( // DataPoint test case : Histogram Datapoints (Histogram) minA, maxA, sumA = 2.0, 4.0, 90.0 minB, maxB, sumB = 4.0, 150.0, 234.0 - otelHDP = []metricdata.HistogramDataPoint[float64]{{ + otelHDP = []metricdata.HistogramDataPoint{{ Attributes: alice, StartTime: start, Time: end, @@ -125,7 +125,7 @@ var ( Name: "float64-histogram", Description: "Histogram", Unit: "1", - Data: metricdata.Histogram[float64]{ + Data: metricdata.Histogram{ Temporality: metricdata.CumulativeTemporality, DataPoints: otelHDP, }, @@ -144,7 +144,7 @@ var ( Name: "invalid-histogram", Description: "Invalid histogram", Unit: "1", - Data: metricdata.Histogram[float64]{ + Data: metricdata.Histogram{ Temporality: metricdata.DeltaTemporality, DataPoints: otelHDP, }, @@ -153,7 +153,7 @@ var ( Name: "unknown", Description: "Unknown aggregation", Unit: "1", - Data: metricdata.Histogram[int64]{}, + Data: metricdata.Sum[int64]{}, }, } diff --git a/go.mod b/go.mod index 81b82cbb527..02af412dff9 100644 --- a/go.mod +++ b/go.mod @@ -96,9 +96,9 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.0-rc.2 - go.opentelemetry.io/otel/sdk v1.15.0-rc.2 - go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 + go.opentelemetry.io/otel v1.15.0-rc.1 + go.opentelemetry.io/otel/sdk v1.15.0-rc.1 + go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d @@ -232,8 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v1.15.0-rc.2 // indirect - go.opentelemetry.io/otel/trace v1.15.0-rc.2 // indirect + go.opentelemetry.io/otel/metric v1.15.0-rc.1 // indirect + go.opentelemetry.io/otel/trace v1.15.0-rc.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 78d33e0f5a7..485bcb486b9 100644 --- a/go.sum +++ b/go.sum @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.0-rc.2 h1:ujPMbp/CAdjMTF9E93fr6X2W7Ub2MulkQAmXcTaNkJI= -go.opentelemetry.io/otel v1.15.0-rc.2/go.mod h1:AMBt1VNvSP4KmoaUOQcTWUVj6Te4ZRmD/v3tu0vfytI= -go.opentelemetry.io/otel/metric v1.15.0-rc.2 h1:MA7asjRHRgVdbvqYLQozHkxViPFZJ21qzi9IOKZVTnQ= -go.opentelemetry.io/otel/metric v1.15.0-rc.2/go.mod h1:/e8iKMaajFDmOXPoQsFrtr+B3XADzJS+HHePsQh2gGs= -go.opentelemetry.io/otel/sdk v1.15.0-rc.2 h1:4t4aauPUWLswNEUXAi5s3X94/kg7+NQY+6CeR9F6nEk= -go.opentelemetry.io/otel/sdk v1.15.0-rc.2/go.mod h1:/dv7lLx8SqyUgUxQeFeUUGvHbl45K4dssbaYjj/ObTQ= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 h1:0hZK6hT73+T5BNuFW6AsBwfARsgeThwAQ7JagJ1hbYE= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2/go.mod h1:GUyzM862531kqKgJkhS/LIeekJ652QyjN94dzN1zmgQ= -go.opentelemetry.io/otel/trace v1.15.0-rc.2 h1:LizBnU5zjqWkHUlZu0tC2k7qICXqPCJqoDlWdc07uw8= -go.opentelemetry.io/otel/trace v1.15.0-rc.2/go.mod h1:JBu+wEp/Ra+YOzZlvPjE6raGRlrUJE/R8aW7Wucsn/o= +go.opentelemetry.io/otel v1.15.0-rc.1 h1:KgZyVIfe3rPjWZHAZE0A9sH5U4tjyh1VeP+BFIgq944= +go.opentelemetry.io/otel v1.15.0-rc.1/go.mod h1:IZXh/uN07z/0si8lWvFW2FkwzAmSGE4DhF4quJIsLnY= +go.opentelemetry.io/otel/metric v1.15.0-rc.1 h1:ueivGgoyP2c58JZvmJriF35k238mVyRtlODD6BRgowU= +go.opentelemetry.io/otel/metric v1.15.0-rc.1/go.mod h1:bpPBxLwoWWmiK+Hmb6ZaG0zDLIi59lK7M+GjgZ5PN+4= +go.opentelemetry.io/otel/sdk v1.15.0-rc.1 h1:WtWiH5l19vwpdGIx9/Wou9l7a/butFoAOuJktWnlOro= +go.opentelemetry.io/otel/sdk v1.15.0-rc.1/go.mod h1:93NwQ8NqCb/QSUF7URdJur5Fvfm9rztE+2eJLpsKdWc= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 h1:PYmYfBlAZeg4y4VNjrKY24yrD2Jzb47x7Dz6rAI9lXg= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1/go.mod h1:LH4ApPwmFOe8mvPS7a56gW4WT9IhtwuEA/mN8FsVDX0= +go.opentelemetry.io/otel/trace v1.15.0-rc.1 h1:xK6jLm8h2KFhdItNvzAuNvnoWjRPU9u7whXNNBMxjtc= +go.opentelemetry.io/otel/trace v1.15.0-rc.1/go.mod h1:2cLx8hBNS4rUWB+JA9PuCGggQl+KJioCaoV2CKewY4s= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= From 383b366050b13b8dce4175da3d75044f2ca92e48 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:38:40 -0400 Subject: [PATCH 12/50] Fix imports --- agent/hcp/telemetry/otel_exporter.go | 1 + agent/hcp/telemetry/otel_exporter_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index b857cacfccf..35be58ed791 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -4,6 +4,7 @@ import ( "context" hcpclient "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 89943f09c6b..cff7802a3a5 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/hashicorp/consul/agent/hcp/client" + "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" From 1d222b144e167c1d232d9294cb415d48914ccef1 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 9 May 2023 17:20:32 -0400 Subject: [PATCH 13/50] fix small nits with comments and url.URL --- agent/hcp/telemetry/doc.go | 12 ++++++++++++ agent/hcp/telemetry/otel_exporter.go | 23 +++++++++++++---------- agent/hcp/telemetry/otel_exporter_test.go | 4 ++-- 3 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 agent/hcp/telemetry/doc.go diff --git a/agent/hcp/telemetry/doc.go b/agent/hcp/telemetry/doc.go new file mode 100644 index 00000000000..4ef18f39bd3 --- /dev/null +++ b/agent/hcp/telemetry/doc.go @@ -0,0 +1,12 @@ +// Package telemetry implements functionality to collect, aggregate, convert and export +// telemetry data in OpenTelemetry Protocol (OTLP) format. +// +// The entrypoint is the OpenTelemetry (OTEL) go-metrics sink which: +// - Receives metric data. +// - Aggregates metric data using the OTEL Go Metrics SDK. +// - Exports metric data using a configurable OTEL exporter. +// +// The package also provides an OTEL exporter implementation to be used within the sink, which: +// - Transforms metric data from the Metrics SDK OTEL representation to OTLP format. +// - Exports OTLP metric data to an external endpoint using a configurable client. +package telemetry diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 35be58ed791..b0bd81e585a 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -2,28 +2,29 @@ package telemetry import ( "context" - - hcpclient "github.com/hashicorp/consul/agent/hcp/client" + "net/url" "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" "go.opentelemetry.io/otel/sdk/metric/metricdata" + + hcpclient "github.com/hashicorp/consul/agent/hcp/client" ) // OTELExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. // The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. // This allows us to use a custom client - HCP authenticated MetricsClient. type OTELExporter struct { - client hcpclient.MetricsClient - endpoint string + client hcpclient.MetricsClient + url url.URL } // NewOTELExporter returns a configured OTELExporter -func NewOTELExporter(client hcpclient.MetricsClient, endpoint string) *OTELExporter { +func NewOTELExporter(client hcpclient.MetricsClient, url url.URL) *OTELExporter { return &OTELExporter{ - client: client, - endpoint: endpoint, + client: client, + url: url, } } @@ -34,6 +35,8 @@ func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporali } // Aggregation returns the Aggregation to use for an instrument kind. +// The default implementation provided by the OTEL Metrics SDK library DefaultAggregationSelector panics. +// This custom version replicates that logic, but removes the panic. func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { switch kind { case metric.InstrumentKindObservableGauge: @@ -55,12 +58,12 @@ func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMe } otlpMetrics, merr := transformOTLP(&metrics) - err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) + err := e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) return multierror.Append(merr, err) } -// ForceFlush does nothing, as the MetricsClient client holds no state. +// ForceFlush is a no-op, as the MetricsClient client holds no state. func (e *OTELExporter) ForceFlush(ctx context.Context) error { return ctx.Err() } -// Shutdown does nothing, as the MetricsClient is a HTTP client that requires no graceful shutdown. +// Shutdown is a no-op, as the MetricsClient is a HTTP client that requires no graceful shutdown. func (e *OTELExporter) Shutdown(ctx context.Context) error { return ctx.Err() } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index cff7802a3a5..6434e54f023 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -5,14 +5,14 @@ import ( "fmt" "testing" - "github.com/hashicorp/consul/agent/hcp/client" - "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" "go.opentelemetry.io/otel/sdk/metric/metricdata" "go.opentelemetry.io/otel/sdk/resource" metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" + + "github.com/hashicorp/consul/agent/hcp/client" ) func TestTemporality(t *testing.T) { From 5564bcebcd8cc59971c344aec47aff458bda9857 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 13:14:01 -0400 Subject: [PATCH 14/50] Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile --- agent/hcp/telemetry/otel_exporter_test.go | 41 +++++++++++++++------- agent/hcp/telemetry/otlp_transform_test.go | 1 + 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 6434e54f023..756f20af792 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -16,11 +16,13 @@ import ( ) func TestTemporality(t *testing.T) { + t.Parallel() exp := &OTELExporter{} require.Equal(t, metricdata.CumulativeTemporality, exp.Temporality(metric.InstrumentKindCounter)) } func TestAggregation(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { kind metric.InstrumentKind expAgg aggregation.Aggregation @@ -38,33 +40,34 @@ func TestAggregation(t *testing.T) { expAgg: aggregation.ExplicitBucketHistogram{Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, NoMinMax: false}, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() exp := &OTELExporter{} require.Equal(t, test.expAgg, exp.Aggregation(test.kind)) }) } } -type mockErrMetricsClient struct{} - -func (m *mockErrMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { - return fmt.Errorf("failed to export metrics") +type mockMetricsClient struct { + exportErr error } -type mockMetricsClient struct{} - func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { - return nil + return m.exportErr } func TestExport(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { wantErr string metrics metricdata.ResourceMetrics client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: nil, + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ @@ -73,14 +76,18 @@ func TestExport(t *testing.T) { }, }, "earlyReturnWithoutMetrics": { - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: nil, + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{}, }, }, "errorWithExportFailure": { - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: fmt.Errorf("failed to export metrics."), + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ @@ -114,7 +121,9 @@ func TestExport(t *testing.T) { }, "multierrorTransformExportFailure": { wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: fmt.Errorf("failed to export metrics"), + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ @@ -130,7 +139,9 @@ func TestExport(t *testing.T) { }, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() exp := &OTELExporter{ client: test.client, } @@ -148,17 +159,21 @@ func TestExport(t *testing.T) { } func TestForceFlush(t *testing.T) { + t.Parallel() exp := &OTELExporter{} ctx, cancel := context.WithCancel(context.Background()) cancel() - require.Error(t, exp.ForceFlush(ctx)) + err := exp.ForceFlush(ctx) + require.ErrorIs(t, err, context.Canceled) } func TestShutdown(t *testing.T) { + t.Parallel() exp := &OTELExporter{} ctx, cancel := context.WithCancel(context.Background()) cancel() - require.Error(t, exp.Shutdown(ctx)) + err := exp.Shutdown(ctx) + require.ErrorIs(t, err, context.Canceled) } diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 0b26cc0e6a6..5d763929249 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -234,6 +234,7 @@ var ( // TestTransformOTLP runs tests from the "bottom-up" of the metricdata data types. func TestTransformOTLP(t *testing.T) { + t.Parallel() // Histogram DataPoint Test Case (Histograms) assert.Equal(t, otlpHDP, histogramDataPoints(otelHDP)) From 424a065b7e6e037de0b53b2727edd732e0455e19 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 13:53:30 -0400 Subject: [PATCH 15/50] Cleanup error handling and clarify empty metrics case --- agent/hcp/telemetry/otel_exporter.go | 10 ++--- agent/hcp/telemetry/otel_exporter_test.go | 43 ++------------------ agent/hcp/telemetry/otlp_transform.go | 47 +++++++++------------- agent/hcp/telemetry/otlp_transform_test.go | 15 +++---- 4 files changed, 33 insertions(+), 82 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index b0bd81e585a..da5e774e59d 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -4,7 +4,6 @@ import ( "context" "net/url" - "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" "go.opentelemetry.io/otel/sdk/metric/metricdata" @@ -53,13 +52,12 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre // Export serializes and transmits metric data to a receiver. func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { - if len(metrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 { + otlpMetrics := transformOTLP(&metrics) + emptyMetrics := len(otlpMetrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 + if emptyMetrics { return nil } - - otlpMetrics, merr := transformOTLP(&metrics) - err := e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) - return multierror.Append(merr, err) + return e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) } // ForceFlush is a no-op, as the MetricsClient client holds no state. diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 756f20af792..bf8ffb5aea0 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -69,10 +69,8 @@ func TestExport(t *testing.T) { exportErr: nil, }, metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - {Metrics: []metricdata.Metrics{}}, - }, + Resource: resource.Empty(), + ScopeMetrics: nil, }, }, "earlyReturnWithoutMetrics": { @@ -95,6 +93,7 @@ func TestExport(t *testing.T) { Metrics: []metricdata.Metrics{ { Name: "consul.raft.commitTime", + Data: metricdata.Gauge[float64]{}, }, }, }, @@ -102,42 +101,6 @@ func TestExport(t *testing.T) { }, wantErr: "failed to export metrics", }, - "errorWithTransformFailure": { - wantErr: "unknown aggregation: metricdata.Gauge[int64]", - client: &mockMetricsClient{}, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - { - Metrics: []metricdata.Metrics{ - { - // unsupported, only float64 supported - Data: metricdata.Gauge[int64]{}, - }, - }, - }, - }, - }, - }, - "multierrorTransformExportFailure": { - wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", - client: &mockMetricsClient{ - exportErr: fmt.Errorf("failed to export metrics"), - }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - { - Metrics: []metricdata.Metrics{ - { - // unsupported, only float64 supported - Data: metricdata.Gauge[int64]{}, - }, - }, - }, - }, - }, - }, } { test := test t.Run(name, func(t *testing.T) { diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 4ea3ab52907..f4547c56813 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -3,7 +3,6 @@ package telemetry import ( "fmt" - "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/metric/metricdata" cpb "go.opentelemetry.io/proto/otlp/common/v1" @@ -14,26 +13,21 @@ import ( // TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm // contains invalid ScopeMetrics, an error will be returned along with an OTLP // ResourceMetrics that contains partial OTLP ScopeMetrics. -func transformOTLP(rm *metricdata.ResourceMetrics) (*mpb.ResourceMetrics, error) { - sms, err := scopeMetrics(rm.ScopeMetrics) +func transformOTLP(rm *metricdata.ResourceMetrics) *mpb.ResourceMetrics { + sms := scopeMetricsToPB(rm.ScopeMetrics) return &mpb.ResourceMetrics{ Resource: &rpb.Resource{ - Attributes: attributes(rm.Resource.Iter()), + Attributes: attributesToPB(rm.Resource.Iter()), }, ScopeMetrics: sms, - }, err + } } // scopeMetrics returns a slice of OTLP ScopeMetrics. -func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { - var merr error +func scopeMetricsToPB(scopeMetrics []metricdata.ScopeMetrics) []*mpb.ScopeMetrics { out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) for _, sm := range scopeMetrics { - ms, err := metrics(sm.Metrics) - if err != nil { - merr = multierror.Append(merr, err) - } - + ms := metricsToPB(sm.Metrics) out = append(out, &mpb.ScopeMetrics{ Scope: &cpb.InstrumentationScope{ Name: sm.Scope.Name, @@ -42,27 +36,26 @@ func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, Metrics: ms, }) } - return out, merr + return out } // metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. -func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { - var merr error +func metricsToPB(metrics []metricdata.Metrics) []*mpb.Metric { out := make([]*mpb.Metric, 0, len(metrics)) for _, m := range metrics { - o, err := metricType(m) + o, err := metricTypeToPB(m) if err != nil { - merr = multierror.Append(merr, err) + // TODO: Emit metric when a transformation occurs. continue } out = append(out, o) } - return out, merr + return out } // metricType identifies the instrument type and converts it to OTLP format. // only float64 values are accepted since the go metrics sink only receives float64 values. -func metricType(m metricdata.Metrics) (*mpb.Metric, error) { +func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { out := &mpb.Metric{ Name: m.Name, Description: m.Description, @@ -72,7 +65,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { case metricdata.Gauge[float64]: out.Data = &mpb.Metric_Gauge{ Gauge: &mpb.Gauge{ - DataPoints: dataPoints(a.DataPoints), + DataPoints: dataPointsToPB(a.DataPoints), }, } case metricdata.Sum[float64]: @@ -83,7 +76,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { Sum: &mpb.Sum{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, IsMonotonic: a.IsMonotonic, - DataPoints: dataPoints(a.DataPoints), + DataPoints: dataPointsToPB(a.DataPoints), }, } case metricdata.Histogram: @@ -93,7 +86,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { out.Data = &mpb.Metric_Histogram{ Histogram: &mpb.Histogram{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, - DataPoints: histogramDataPoints(a.DataPoints), + DataPoints: histogramDataPointsToPB(a.DataPoints), }, } default: @@ -103,11 +96,11 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { } // DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. -func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { +func dataPointsToPB(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { out := make([]*mpb.NumberDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { ndp := &mpb.NumberDataPoint{ - Attributes: attributes(dp.Attributes.Iter()), + Attributes: attributesToPB(dp.Attributes.Iter()), StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), TimeUnixNano: uint64(dp.Time.UnixNano()), } @@ -121,12 +114,12 @@ func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoi } // HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. -func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { +func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { sum := dp.Sum hdp := &mpb.HistogramDataPoint{ - Attributes: attributes(dp.Attributes.Iter()), + Attributes: attributesToPB(dp.Attributes.Iter()), StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), TimeUnixNano: uint64(dp.Time.UnixNano()), Count: dp.Count, @@ -147,7 +140,7 @@ func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint) []*mpb.Hist // attributes transforms items of an attribute iterator into OTLP key-values. // Currently, labels are only key-value pairs. -func attributes(iter attribute.Iterator) []*cpb.KeyValue { +func attributesToPB(iter attribute.Iterator) []*cpb.KeyValue { l := iter.Len() if iter.Len() == 0 { return nil diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 5d763929249..1f2aae4a5bc 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -236,24 +236,21 @@ var ( func TestTransformOTLP(t *testing.T) { t.Parallel() // Histogram DataPoint Test Case (Histograms) - assert.Equal(t, otlpHDP, histogramDataPoints(otelHDP)) + assert.Equal(t, otlpHDP, histogramDataPointsToPB(otelHDP)) // Number DataPoint Test Case (Counters / Gauges) - require.Equal(t, otlpDP, dataPoints(otelDP)) + require.Equal(t, otlpDP, dataPointsToPB(otelDP)) // Metrics Test Case - m, err := metrics(otelMetrics) + m := metricsToPB(otelMetrics) require.Equal(t, otlpMetrics, m) require.Equal(t, len(otlpMetrics), 3) - require.Contains(t, err.Error(), "3 errors occurred") // Scope Metrics Test Case - sm, err := scopeMetrics(otelScopeMetrics) + sm := scopeMetricsToPB(otelScopeMetrics) require.Equal(t, otlpScopeMetrics, sm) - require.Contains(t, err.Error(), "3 errors occurred") - // Resource Metrics Test Case - rm, err := transformOTLP(otelResourceMetrics) + // // Resource Metrics Test Case + rm := transformOTLP(otelResourceMetrics) require.Equal(t, otlpResourceMetrics, rm) - require.Contains(t, err.Error(), "3 errors occurred") } From 470a11df09346212346e37ed9ad28abef794dcfe Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 14:00:48 -0400 Subject: [PATCH 16/50] Fix input/expected naming in otel_transform_test.go --- agent/hcp/telemetry/otel_exporter_test.go | 42 +++++++-------- agent/hcp/telemetry/otlp_transform.go | 2 +- agent/hcp/telemetry/otlp_transform_test.go | 62 +++++++++++----------- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index bf8ffb5aea0..6008121998d 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -65,40 +65,31 @@ func TestExport(t *testing.T) { client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { - client: &mockMetricsClient{ - exportErr: nil, - }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: nil, - }, + client: &mockMetricsClient{}, + metrics: mutateMetrics(nil), }, "earlyReturnWithoutMetrics": { - client: &mockMetricsClient{ - exportErr: nil, - }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{}, + client: &mockMetricsClient{}, + metrics: mutateMetrics([]metricdata.ScopeMetrics{ + {Metrics: []metricdata.Metrics{}}, }, + ), }, "errorWithExportFailure": { client: &mockMetricsClient{ exportErr: fmt.Errorf("failed to export metrics."), }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - { - Metrics: []metricdata.Metrics{ - { - Name: "consul.raft.commitTime", - Data: metricdata.Gauge[float64]{}, - }, + metrics: mutateMetrics([]metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "consul.raft.commitTime", + Data: metricdata.Gauge[float64]{}, }, }, }, }, + ), wantErr: "failed to export metrics", }, } { @@ -140,3 +131,10 @@ func TestShutdown(t *testing.T) { err := exp.Shutdown(ctx) require.ErrorIs(t, err, context.Canceled) } + +func mutateMetrics(m []metricdata.ScopeMetrics) metricdata.ResourceMetrics { + return metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: m, + } +} diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index f4547c56813..231646a7300 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -45,7 +45,7 @@ func metricsToPB(metrics []metricdata.Metrics) []*mpb.Metric { for _, m := range metrics { o, err := metricTypeToPB(m) if err != nil { - // TODO: Emit metric when a transformation occurs. + // TODO: Emit metric when a transformation failure occurs. continue } out = append(out, o) diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 1f2aae4a5bc..c331cb00239 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -35,7 +35,7 @@ var ( // DataPoint test case : Histogram Datapoints (Histogram) minA, maxA, sumA = 2.0, 4.0, 90.0 minB, maxB, sumB = 4.0, 150.0, 234.0 - otelHDP = []metricdata.HistogramDataPoint{{ + inputHDP = []metricdata.HistogramDataPoint{{ Attributes: alice, StartTime: start, Time: end, @@ -57,7 +57,7 @@ var ( Sum: sumB, }} - otlpHDP = []*mpb.HistogramDataPoint{{ + expectedHDP = []*mpb.HistogramDataPoint{{ Attributes: []*cpb.KeyValue{pbAlice}, StartTimeUnixNano: uint64(start.UnixNano()), TimeUnixNano: uint64(end.UnixNano()), @@ -79,12 +79,12 @@ var ( Max: &maxB, }} // DataPoint test case : Number Datapoints (Gauge / Counter) - otelDP = []metricdata.DataPoint[float64]{ + inputDP = []metricdata.DataPoint[float64]{ {Attributes: alice, StartTime: start, Time: end, Value: 1.0}, {Attributes: bob, StartTime: start, Time: end, Value: 2.0}, } - otlpDP = []*mpb.NumberDataPoint{ + expectedDP = []*mpb.NumberDataPoint{ { Attributes: []*cpb.KeyValue{pbAlice}, StartTimeUnixNano: uint64(start.UnixNano()), @@ -104,12 +104,12 @@ var ( // - 1 invalid metric type // - 2 invalid cummulative temporalities (only cummulative supported) // - 3 types (Gauge, Counter, and Histogram) supported - otelMetrics = []metricdata.Metrics{ + inputMetrics = []metricdata.Metrics{ { Name: "float64-gauge", Description: "Gauge with float64 values", Unit: "1", - Data: metricdata.Gauge[float64]{DataPoints: otelDP}, + Data: metricdata.Gauge[float64]{DataPoints: inputDP}, }, { Name: "float64-sum", @@ -118,7 +118,7 @@ var ( Data: metricdata.Sum[float64]{ Temporality: metricdata.CumulativeTemporality, IsMonotonic: false, - DataPoints: otelDP, + DataPoints: inputDP, }, }, { @@ -127,7 +127,7 @@ var ( Unit: "1", Data: metricdata.Histogram{ Temporality: metricdata.CumulativeTemporality, - DataPoints: otelHDP, + DataPoints: inputHDP, }, }, { @@ -137,7 +137,7 @@ var ( Data: metricdata.Sum[float64]{ Temporality: metricdata.DeltaTemporality, IsMonotonic: false, - DataPoints: otelDP, + DataPoints: inputDP, }, }, { @@ -146,7 +146,7 @@ var ( Unit: "1", Data: metricdata.Histogram{ Temporality: metricdata.DeltaTemporality, - DataPoints: otelHDP, + DataPoints: inputHDP, }, }, { @@ -157,12 +157,12 @@ var ( }, } - otlpMetrics = []*mpb.Metric{ + expectedMetrics = []*mpb.Metric{ { Name: "float64-gauge", Description: "Gauge with float64 values", Unit: "1", - Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: otlpDP}}, + Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: expectedDP}}, }, { Name: "float64-sum", @@ -171,7 +171,7 @@ var ( Data: &mpb.Metric_Sum{Sum: &mpb.Sum{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, IsMonotonic: false, - DataPoints: otlpDP, + DataPoints: expectedDP, }}, }, { @@ -180,38 +180,38 @@ var ( Unit: "1", Data: &mpb.Metric_Histogram{Histogram: &mpb.Histogram{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, - DataPoints: otlpHDP, + DataPoints: expectedHDP, }}, }, } // ScopeMetrics Test Cases - otelScopeMetrics = []metricdata.ScopeMetrics{{ + inputScopeMetrics = []metricdata.ScopeMetrics{{ Scope: instrumentation.Scope{ Name: "test/code/path", Version: "v0.1.0", }, - Metrics: otelMetrics, + Metrics: inputMetrics, }} - otlpScopeMetrics = []*mpb.ScopeMetrics{{ + expectedScopeMetrics = []*mpb.ScopeMetrics{{ Scope: &cpb.InstrumentationScope{ Name: "test/code/path", Version: "v0.1.0", }, - Metrics: otlpMetrics, + Metrics: expectedMetrics, }} // ResourceMetrics Test Cases - otelResourceMetrics = &metricdata.ResourceMetrics{ + inputResourceMetrics = &metricdata.ResourceMetrics{ Resource: resource.NewSchemaless( semconv.ServiceName("test server"), semconv.ServiceVersion("v0.1.0"), ), - ScopeMetrics: otelScopeMetrics, + ScopeMetrics: inputScopeMetrics, } - otlpResourceMetrics = &mpb.ResourceMetrics{ + expectedResourceMetrics = &mpb.ResourceMetrics{ Resource: &rpb.Resource{ Attributes: []*cpb.KeyValue{ { @@ -228,7 +228,7 @@ var ( }, }, }, - ScopeMetrics: otlpScopeMetrics, + ScopeMetrics: expectedScopeMetrics, } ) @@ -236,21 +236,21 @@ var ( func TestTransformOTLP(t *testing.T) { t.Parallel() // Histogram DataPoint Test Case (Histograms) - assert.Equal(t, otlpHDP, histogramDataPointsToPB(otelHDP)) + assert.Equal(t, expectedHDP, histogramDataPointsToPB(inputHDP)) // Number DataPoint Test Case (Counters / Gauges) - require.Equal(t, otlpDP, dataPointsToPB(otelDP)) + require.Equal(t, expectedDP, dataPointsToPB(inputDP)) // Metrics Test Case - m := metricsToPB(otelMetrics) - require.Equal(t, otlpMetrics, m) - require.Equal(t, len(otlpMetrics), 3) + m := metricsToPB(inputMetrics) + require.Equal(t, expectedMetrics, m) + require.Equal(t, len(expectedMetrics), 3) // Scope Metrics Test Case - sm := scopeMetricsToPB(otelScopeMetrics) - require.Equal(t, otlpScopeMetrics, sm) + sm := scopeMetricsToPB(inputScopeMetrics) + require.Equal(t, expectedScopeMetrics, sm) // // Resource Metrics Test Case - rm := transformOTLP(otelResourceMetrics) - require.Equal(t, otlpResourceMetrics, rm) + rm := transformOTLP(inputResourceMetrics) + require.Equal(t, expectedResourceMetrics, rm) } From be0b01b79631cb209fa36af9bd37264fe3a31265 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 14:15:32 -0400 Subject: [PATCH 17/50] add comment for metric tracking --- agent/hcp/telemetry/otel_exporter.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index da5e774e59d..39c5118411d 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -61,7 +61,13 @@ func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMe } // ForceFlush is a no-op, as the MetricsClient client holds no state. -func (e *OTELExporter) ForceFlush(ctx context.Context) error { return ctx.Err() } +func (e *OTELExporter) ForceFlush(ctx context.Context) error { + // TODO: Emit metric when this operation occurs. + return ctx.Err() +} // Shutdown is a no-op, as the MetricsClient is a HTTP client that requires no graceful shutdown. -func (e *OTELExporter) Shutdown(ctx context.Context) error { return ctx.Err() } +func (e *OTELExporter) Shutdown(ctx context.Context) error { + // TODO: Emit metric when this operation occurs. + return ctx.Err() +} From 325bb4d3a778455364f6f9ccbe5de0e674fcafa4 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 14:37:27 -0400 Subject: [PATCH 18/50] Add a general isEmpty method --- agent/hcp/telemetry/otel_exporter.go | 3 +-- agent/hcp/telemetry/otlp_transform.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 39c5118411d..4c774f5acb0 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -53,8 +53,7 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre // Export serializes and transmits metric data to a receiver. func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { otlpMetrics := transformOTLP(&metrics) - emptyMetrics := len(otlpMetrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 - if emptyMetrics { + if isEmpty(otlpMetrics) { return nil } return e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 231646a7300..d91af179c0c 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -10,6 +10,25 @@ import ( rpb "go.opentelemetry.io/proto/otlp/resource/v1" ) +// isEmpty verifies if the given OTLP protobuf metrics contains metric data. +// isEmpty returns true if no ScopeMetrics exist or all metrics within ScopeMetrics are empty. +func isEmpty(rm *mpb.ResourceMetrics) bool { + // No ScopeMetrics + if len(rm.ScopeMetrics) == 0 { + return true + } + + // If any inner metrics contain data, return false. + for _, v := range rm.ScopeMetrics { + if len(v.Metrics) != 0 { + return false + } + } + + // All inner metrics are empty. + return true +} + // TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm // contains invalid ScopeMetrics, an error will be returned along with an OTLP // ResourceMetrics that contains partial OTLP ScopeMetrics. From a0352ac883e1e4d0153b0c8c0d67c1dd2118a679 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 16:34:52 -0400 Subject: [PATCH 19/50] Add clear error types --- agent/hcp/telemetry/otlp_transform.go | 12 +++- agent/hcp/telemetry/otlp_transform_test.go | 69 ++++++++++++++-------- 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index d91af179c0c..37b071e2d56 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -1,6 +1,7 @@ package telemetry import ( + "errors" "fmt" "go.opentelemetry.io/otel/attribute" @@ -10,6 +11,11 @@ import ( rpb "go.opentelemetry.io/proto/otlp/resource/v1" ) +var ( + aggregationErr = errors.New("unsupported aggregation") + temporalityErr = errors.New("unsupported temporality") +) + // isEmpty verifies if the given OTLP protobuf metrics contains metric data. // isEmpty returns true if no ScopeMetrics exist or all metrics within ScopeMetrics are empty. func isEmpty(rm *mpb.ResourceMetrics) bool { @@ -89,7 +95,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { } case metricdata.Sum[float64]: if a.Temporality != metricdata.CumulativeTemporality { - return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + return out, fmt.Errorf("error: %w: %T", temporalityErr, a) } out.Data = &mpb.Metric_Sum{ Sum: &mpb.Sum{ @@ -100,7 +106,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { } case metricdata.Histogram: if a.Temporality != metricdata.CumulativeTemporality { - return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + return out, fmt.Errorf("error: %w: %T", temporalityErr, a) } out.Data = &mpb.Metric_Histogram{ Histogram: &mpb.Histogram{ @@ -109,7 +115,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { }, } default: - return out, fmt.Errorf("%s: %T", "unknown aggregation", a) + return out, fmt.Errorf("error: %w: %T", aggregationErr, a) } return out, nil } diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index c331cb00239..7b622967e9a 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -99,6 +99,34 @@ var ( }, } + invalidSumTemporality = metricdata.Metrics{ + Name: "invalid-sum", + Description: "Sum with invalid temporality", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.DeltaTemporality, + IsMonotonic: false, + DataPoints: inputDP, + }, + } + + invalidSumAgg = metricdata.Metrics{ + Name: "unknown", + Description: "Unknown aggregation", + Unit: "1", + Data: metricdata.Sum[int64]{}, + } + + invalidHistTemporality = metricdata.Metrics{ + Name: "invalid-histogram", + Description: "Invalid histogram", + Unit: "1", + Data: metricdata.Histogram{ + Temporality: metricdata.DeltaTemporality, + DataPoints: inputHDP, + }, + } + // Metrics Test Case // - 3 invalid metrics and 3 Valid to test filtering // - 1 invalid metric type @@ -130,31 +158,9 @@ var ( DataPoints: inputHDP, }, }, - { - Name: "invalid-sum", - Description: "Sum with invalid temporality", - Unit: "1", - Data: metricdata.Sum[float64]{ - Temporality: metricdata.DeltaTemporality, - IsMonotonic: false, - DataPoints: inputDP, - }, - }, - { - Name: "invalid-histogram", - Description: "Invalid histogram", - Unit: "1", - Data: metricdata.Histogram{ - Temporality: metricdata.DeltaTemporality, - DataPoints: inputHDP, - }, - }, - { - Name: "unknown", - Description: "Unknown aggregation", - Unit: "1", - Data: metricdata.Sum[int64]{}, - }, + invalidSumTemporality, + invalidHistTemporality, + invalidSumAgg, } expectedMetrics = []*mpb.Metric{ @@ -241,6 +247,19 @@ func TestTransformOTLP(t *testing.T) { // Number DataPoint Test Case (Counters / Gauges) require.Equal(t, expectedDP, dataPointsToPB(inputDP)) + // MetricType Error Test Cases + _, err := metricTypeToPB(invalidHistTemporality) + require.Error(t, err) + require.ErrorIs(t, err, temporalityErr) + + _, err = metricTypeToPB(invalidSumTemporality) + require.Error(t, err) + require.ErrorIs(t, err, temporalityErr) + + _, err = metricTypeToPB(invalidSumAgg) + require.Error(t, err) + require.ErrorIs(t, err, aggregationErr) + // Metrics Test Case m := metricsToPB(inputMetrics) require.Equal(t, expectedMetrics, m) From 2d356f677aed84eea40c342d7f629561a15c5b70 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 00:00:55 -0400 Subject: [PATCH 20/50] update to latest version 1.15.0 of OTEL --- agent/hcp/telemetry/otel_exporter.go | 4 ++-- agent/hcp/telemetry/otel_exporter_test.go | 6 ++--- agent/hcp/telemetry/otlp_transform.go | 4 ++-- agent/hcp/telemetry/otlp_transform_test.go | 6 ++--- go.mod | 14 +++++------ go.sum | 28 +++++++++++----------- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 4c774f5acb0..02f6a07f837 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -51,8 +51,8 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre } // Export serializes and transmits metric data to a receiver. -func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { - otlpMetrics := transformOTLP(&metrics) +func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + otlpMetrics := transformOTLP(metrics) if isEmpty(otlpMetrics) { return nil } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 6008121998d..0e3d3fcc1e4 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -61,7 +61,7 @@ func TestExport(t *testing.T) { t.Parallel() for name, test := range map[string]struct { wantErr string - metrics metricdata.ResourceMetrics + metrics *metricdata.ResourceMetrics client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { @@ -132,8 +132,8 @@ func TestShutdown(t *testing.T) { require.ErrorIs(t, err, context.Canceled) } -func mutateMetrics(m []metricdata.ScopeMetrics) metricdata.ResourceMetrics { - return metricdata.ResourceMetrics{ +func mutateMetrics(m []metricdata.ScopeMetrics) *metricdata.ResourceMetrics { + return &metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: m, } diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 37b071e2d56..7ba1650ffd0 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -104,7 +104,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { DataPoints: dataPointsToPB(a.DataPoints), }, } - case metricdata.Histogram: + case metricdata.Histogram[float64]: if a.Temporality != metricdata.CumulativeTemporality { return out, fmt.Errorf("error: %w: %T", temporalityErr, a) } @@ -139,7 +139,7 @@ func dataPointsToPB(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDat } // HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. -func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { +func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { sum := dp.Sum diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 7b622967e9a..1c22e9a5cd7 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -35,7 +35,7 @@ var ( // DataPoint test case : Histogram Datapoints (Histogram) minA, maxA, sumA = 2.0, 4.0, 90.0 minB, maxB, sumB = 4.0, 150.0, 234.0 - inputHDP = []metricdata.HistogramDataPoint{{ + inputHDP = []metricdata.HistogramDataPoint[float64]{{ Attributes: alice, StartTime: start, Time: end, @@ -121,7 +121,7 @@ var ( Name: "invalid-histogram", Description: "Invalid histogram", Unit: "1", - Data: metricdata.Histogram{ + Data: metricdata.Histogram[float64]{ Temporality: metricdata.DeltaTemporality, DataPoints: inputHDP, }, @@ -153,7 +153,7 @@ var ( Name: "float64-histogram", Description: "Histogram", Unit: "1", - Data: metricdata.Histogram{ + Data: metricdata.Histogram[float64]{ Temporality: metricdata.CumulativeTemporality, DataPoints: inputHDP, }, diff --git a/go.mod b/go.mod index 02af412dff9..ffafa515729 100644 --- a/go.mod +++ b/go.mod @@ -96,16 +96,16 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.0-rc.1 - go.opentelemetry.io/otel/sdk v1.15.0-rc.1 - go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 + go.opentelemetry.io/otel v1.15.1 + go.opentelemetry.io/otel/sdk v1.15.1 + go.opentelemetry.io/otel/sdk/metric v0.38.1 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.6.0 + golang.org/x/sys v0.7.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -154,7 +154,7 @@ require ( github.com/dimchansky/utfbom v1.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v0.1.0 // indirect github.com/form3tech-oss/jwt-go v3.2.2+incompatible // indirect - github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/logr v1.2.4 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/analysis v0.21.2 // indirect @@ -232,8 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v1.15.0-rc.1 // indirect - go.opentelemetry.io/otel/trace v1.15.0-rc.1 // indirect + go.opentelemetry.io/otel/metric v0.38.1 // indirect + go.opentelemetry.io/otel/trace v1.15.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 485bcb486b9..4412cb4947e 100644 --- a/go.sum +++ b/go.sum @@ -314,8 +314,8 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.0-rc.1 h1:KgZyVIfe3rPjWZHAZE0A9sH5U4tjyh1VeP+BFIgq944= -go.opentelemetry.io/otel v1.15.0-rc.1/go.mod h1:IZXh/uN07z/0si8lWvFW2FkwzAmSGE4DhF4quJIsLnY= -go.opentelemetry.io/otel/metric v1.15.0-rc.1 h1:ueivGgoyP2c58JZvmJriF35k238mVyRtlODD6BRgowU= -go.opentelemetry.io/otel/metric v1.15.0-rc.1/go.mod h1:bpPBxLwoWWmiK+Hmb6ZaG0zDLIi59lK7M+GjgZ5PN+4= -go.opentelemetry.io/otel/sdk v1.15.0-rc.1 h1:WtWiH5l19vwpdGIx9/Wou9l7a/butFoAOuJktWnlOro= -go.opentelemetry.io/otel/sdk v1.15.0-rc.1/go.mod h1:93NwQ8NqCb/QSUF7URdJur5Fvfm9rztE+2eJLpsKdWc= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 h1:PYmYfBlAZeg4y4VNjrKY24yrD2Jzb47x7Dz6rAI9lXg= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1/go.mod h1:LH4ApPwmFOe8mvPS7a56gW4WT9IhtwuEA/mN8FsVDX0= -go.opentelemetry.io/otel/trace v1.15.0-rc.1 h1:xK6jLm8h2KFhdItNvzAuNvnoWjRPU9u7whXNNBMxjtc= -go.opentelemetry.io/otel/trace v1.15.0-rc.1/go.mod h1:2cLx8hBNS4rUWB+JA9PuCGggQl+KJioCaoV2CKewY4s= +go.opentelemetry.io/otel v1.15.1 h1:3Iwq3lfRByPaws0f6bU3naAqOR1n5IeDWd9390kWHa8= +go.opentelemetry.io/otel v1.15.1/go.mod h1:mHHGEHVDLal6YrKMmk9LqC4a3sF5g+fHfrttQIB1NTc= +go.opentelemetry.io/otel/metric v0.38.1 h1:2MM7m6wPw9B8Qv8iHygoAgkbejed59uUR6ezR5T3X2s= +go.opentelemetry.io/otel/metric v0.38.1/go.mod h1:FwqNHD3I/5iX9pfrRGZIlYICrJv0rHEUl2Ln5vdIVnQ= +go.opentelemetry.io/otel/sdk v1.15.1 h1:5FKR+skgpzvhPQHIEfcwMYjCBr14LWzs3uSqKiQzETI= +go.opentelemetry.io/otel/sdk v1.15.1/go.mod h1:8rVtxQfrbmbHKfqzpQkT5EzZMcbMBwTzNAggbEAM0KA= +go.opentelemetry.io/otel/sdk/metric v0.38.1 h1:EkO5wI4NT/fUaoPMGc0fKV28JaWe7q4vfVpEVasGb+8= +go.opentelemetry.io/otel/sdk/metric v0.38.1/go.mod h1:Rn4kSXFF9ZQZ5lL1pxQjCbK4seiO+U7s0ncmIFJaj34= +go.opentelemetry.io/otel/trace v1.15.1 h1:uXLo6iHJEzDfrNC0L0mNjItIp06SyaBQxu5t3xMlngY= +go.opentelemetry.io/otel/trace v1.15.1/go.mod h1:IWdQG/5N1x7f6YUlmdLeJvH9yxtuJAfc4VW5Agv9r/8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1354,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From 22bb2ee90d9609ec1365caab1870fc45f6af41e0 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 11 Apr 2023 01:39:41 -0400 Subject: [PATCH 21/50] Client configured with TLS using HCP config and retry/throttle --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 4412cb4947e..945a4855911 100644 --- a/go.sum +++ b/go.sum @@ -516,6 +516,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 22be78f224f62a9d7730f6c7f8882b56cf682966 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:26:56 -0400 Subject: [PATCH 22/50] run go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index 945a4855911..4412cb4947e 100644 --- a/go.sum +++ b/go.sum @@ -516,7 +516,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 478181a983c133a885b6382e04aefd8b43551b61 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 01:17:14 -0400 Subject: [PATCH 23/50] Remove one abstraction to use the config from deps --- agent/hcp/client/metrics_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index 15bd71097f7..ba50f28626e 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,7 +129,6 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } - defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From c2ffaab30d7df9f2e567753274043055de83f262 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 13:47:35 -0400 Subject: [PATCH 24/50] Address PR feedback --- agent/hcp/client/metrics_client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index ba50f28626e..15bd71097f7 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,6 +129,7 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } + defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From 864e6d7db283ced90e1c0fc65c5b8a5d6477aace Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 24 Apr 2023 10:57:15 -0400 Subject: [PATCH 25/50] Initialize OTELSink with sync.Map for all the instrument stores. --- agent/hcp/telemetry/otel_sink.go | 197 +++++++++++++++++++ agent/hcp/telemetry/otel_sink_test.go | 262 ++++++++++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 agent/hcp/telemetry/otel_sink.go create mode 100644 agent/hcp/telemetry/otel_sink_test.go diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go new file mode 100644 index 00000000000..deeccec0158 --- /dev/null +++ b/agent/hcp/telemetry/otel_sink.go @@ -0,0 +1,197 @@ +package telemetry + +import ( + "bytes" + "context" + "fmt" + "strings" + "sync" + "time" + + gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-hclog" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/instrument" + otelsdk "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" +) + +const defaultExportInterval = 10 * time.Second + +// Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. +var gauges sync.Map = sync.Map{} + +type GaugeValue struct { + Value float64 + Labels []attribute.KeyValue +} + +type OTELSinkOpts struct { + Endpoint string + Reader otelsdk.Reader + Logger hclog.Logger + ExportInterval time.Duration + Ctx context.Context +} + +type OTELSink struct { + spaceReplacer *strings.Replacer + logger hclog.Logger + ctx context.Context + + meterProvider *otelsdk.MeterProvider + meter *otelmetric.Meter + exportInterval time.Duration + + gaugeInstruments sync.Map + counterInstruments sync.Map + histogramInstruments sync.Map +} + +func NewOTELReader(client client.MetricsClient) otelsdk.Reader { + exp := &OTELExporter{ + client: client, + } + return otelsdk.NewPeriodicReader(exp, otelsdk.WithInterval(defaultExportInterval)) +} + +func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { + if opts.Logger == nil || opts.Reader == nil || opts.Endpoint == "" || opts.Ctx == nil { + return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") + } + + // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. + res := resource.NewSchemaless() + meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) + meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") + + return &OTELSink{ + meterProvider: meterProvider, + meter: &meter, + spaceReplacer: strings.NewReplacer(" ", "_"), + ctx: opts.Ctx, + }, nil +} + +// SetGauge emits a Consul gauge metric. +func (o *OTELSink) SetGauge(key []string, val float32) { + o.SetGaugeWithLabels(key, val, nil) +} + +// AddSample emits a Consul histogram metric. +func (o *OTELSink) AddSample(key []string, val float32) { + o.AddSampleWithLabels(key, val, nil) +} + +// IncrCounter emits a Consul counter metric. +func (o *OTELSink) IncrCounter(key []string, val float32) { + o.IncrCounterWithLabels(key, val, nil) +} + +// AddSampleWithLabels emits a Consul gauge metric that gets +// registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key, labels) + + // Set value in global Gauge store. + g := &GaugeValue{ + Value: float64(val), + Labels: toAttributes(labels), + } + gauges.Store(k, g) + + // If instrument does not exist, create it and register callback to get last value in global Gauge store. + if _, ok := o.gaugeInstruments.Load(k); !ok { + inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(gaugeCallback(k))) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + o.gaugeInstruments.Store(k, &inst) + } +} + +// AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key, labels) + var inst *instrument.Float64Histogram + v, ok := o.histogramInstruments.Load(k) + if !ok { + v, err := (*o.meter).Float64Histogram(k) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + inst = &v + o.histogramInstruments.Store(k, v) + } else { + inst = v.(*instrument.Float64Histogram) + } + + attrs := toAttributes(labels) + (*inst).Record(o.ctx, float64(val), attrs...) +} + +// IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key, labels) + var inst *instrument.Float64Counter + v, ok := o.histogramInstruments.Load(k) + if !ok { + v, err := (*o.meter).Float64Counter(k) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + inst = &v + o.histogramInstruments.Store(k, v) + } else { + inst = v.(*instrument.Float64Counter) + } + + attrs := toAttributes(labels) + (*inst).Add(o.ctx, float64(val), attrs...) +} + +// EmitKey unsupported. +func (o *OTELSink) EmitKey(key []string, val float32) {} + +// flattenKey key along with its labels. +func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { + buf := &bytes.Buffer{} + joined := strings.Join(parts, ".") + + o.spaceReplacer.WriteString(buf, joined) + + return buf.String() +} + +func toAttributes(labels []gometrics.Label) []attribute.KeyValue { + if len(labels) == 0 { + return nil + } + attrs := make([]attribute.KeyValue, len(labels)) + for i, label := range labels { + attrs[i] = attribute.KeyValue{ + Key: attribute.Key(label.Name), + Value: attribute.StringValue(label.Value), + } + } + + return attrs +} + +func gaugeCallback(key string) instrument.Float64Callback { + // Closures keep a reference to the key string, so we don't have to worry about it. + // These get garbage collected as the closure completes. + return func(_ context.Context, obs instrument.Float64Observer) error { + if val, ok := gauges.LoadAndDelete(key); ok { + v := val.(*GaugeValue) + obs.Observe(v.Value, v.Labels...) + } + return nil + } +} diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go new file mode 100644 index 00000000000..440e5d3b1cf --- /dev/null +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -0,0 +1,262 @@ +package telemetry + +import ( + "context" + "io" + "testing" + + gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" +) + +var ( + attrs = attribute.NewSet(attribute.KeyValue{ + Key: attribute.Key("server.id"), + Value: attribute.StringValue("test"), + }) + + expectedMetrics = map[string]metricdata.Metrics{ + "consul.raft.leader": { + Name: "consul.raft.leader", + Description: "", + Unit: "", + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(0)), + }, + }, + }, + }, + "consul.autopilot.healthy": { + Name: "consul.autopilot.healthy", + Description: "", + Unit: "", + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: attrs, + Value: float64(float32(1.23)), + }, + }, + }, + }, + "consul.raft.state.leader": { + Name: "consul.raft.state.leader", + Description: "", + Unit: "", + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(23.23)), + }, + }, + }, + }, + "consul.raft.apply": { + Name: "consul.raft.apply", + Description: "", + Unit: "", + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: attrs, + Value: float64(float32(1.44)), + }, + }, + }, + }, + "consul.raft.leader.lastContact": { + Name: "consul.raft.leader.lastContact", + Description: "", + Unit: "", + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Count: 1, + Sum: float64(float32(45.32)), + Min: metricdata.NewExtrema(float64(float32(45.32))), + Max: metricdata.NewExtrema(float64(float32(45.32))), + }, + }, + }, + }, + "consul.raft.commitTime": { + Name: "consul.raft.commitTime", + Description: "", + Unit: "", + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: attrs, + Count: 1, + Sum: float64(float32(26.34)), + Min: metricdata.NewExtrema(float64(float32(26.34))), + Max: metricdata.NewExtrema(float64(float32(26.34))), + }, + }, + }, + }, + } +) + +func TestNewOTELSink(t *testing.T) { + for name, test := range map[string]struct { + wantErr string + opts *OTELSinkOpts + }{ + "failsWithEmptyLogger": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + opts: &OTELSinkOpts{ + Logger: nil, + Reader: metric.NewManualReader(), + Endpoint: "test.com", + }, + }, + "failsWithEmptyExporter": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + opts: &OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: nil, + Endpoint: "test.com", + }, + }, + "failsWithInvalidEndpoint": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + opts: &OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: metric.NewManualReader(), + Endpoint: "", + }, + }, + } { + t.Run(name, func(t *testing.T) { + sink, err := NewOTELSink(test.opts) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NotNil(t, sink) + }) + } +} + +func TestOTELSink(t *testing.T) { + // Manual reader outputs the aggregated metrics when reader.Collect is called. + reader := metric.NewManualReader() + + ctx := context.Background() + opts := &OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: reader, + Endpoint: "test.com", + Ctx: ctx, + } + + sink, err := NewOTELSink(opts) + require.NoError(t, err) + + labels := []gometrics.Label{ + { + Name: "server.id", + Value: "test", + }, + } + + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + + var collected metricdata.ResourceMetrics + err = reader.Collect(ctx, &collected) + require.NoError(t, err) + + // Validate resource + require.Equal(t, resource.NewSchemaless(), collected.Resource) + + // Validate metrics + for _, actual := range collected.ScopeMetrics[0].Metrics { + name := actual.Name + expected, ok := expectedMetrics[name] + require.True(t, ok, "metric key %s should be in expectedMetrics map", name) + isSameMetrics(t, expected, actual) + } +} + +// compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. +func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { + require.Equal(t, expected.Name, actual.Name, "different .Name field") + require.Equal(t, expected.Description, actual.Description, "different .Description field") + require.Equal(t, expected.Unit, actual.Unit, "different .Unit field") + + switch expectedData := expected.Data.(type) { + case metricdata.Gauge[float64]: + actualData, ok := actual.Data.(metricdata.Gauge[float64]) + require.True(t, ok, "different metric types: expected metricdata.Gauge[float64]") + + isSameData(t, expectedData.DataPoints, actualData.DataPoints) + case metricdata.Sum[float64]: + actualData, ok := actual.Data.(metricdata.Sum[float64]) + require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") + + isSameData(t, expectedData.DataPoints, actualData.DataPoints) + case metricdata.Histogram[float64]: + actualData, ok := actual.Data.(metricdata.Histogram[float64]) + require.True(t, ok, "different metric types: expected metricdata.Histogram[float64]") + + isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) + } +} + +func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { + require.Equal(t, len(expected), len(actual), "different datapoints length") + + // Only verify the value and the attributes. + for i, dp := range expected { + currActual := actual[i] + require.Equal(t, dp.Value, currActual.Value, "different datapoint value") + require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") + + iter := dp.Attributes.Iter() + for iter.Next() { + attr := iter.Attribute() + require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") + } + } +} + +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { + require.Equal(t, len(expected), len(actual), "different histogram datapoint length") + + // Only verify the value and the attributes. + for i, dp := range expected { + currActual := actual[i] + require.Equal(t, dp.Sum, currActual.Sum, "different histogram datapoint .Sum value") + require.Equal(t, dp.Max, currActual.Max, "different histogram datapoint .Max value") + require.Equal(t, dp.Min, currActual.Min, "different histogram datapoint .Min value") + require.Equal(t, dp.Count, currActual.Count, "different histogram datapoint .Count value") + + require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") + + iter := dp.Attributes.Iter() + for iter.Next() { + attr := iter.Attribute() + require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") + } + } +} From 05c418bef5f8025dd75bdc77337595cf0396a591 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 09:35:10 -0400 Subject: [PATCH 26/50] Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests. --- agent/hcp/telemetry/otel_sink.go | 21 +++++++++------------ agent/hcp/telemetry/otel_sink_test.go | 25 +++++++------------------ 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index deeccec0158..ef926dc32d4 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -19,8 +19,6 @@ import ( "go.opentelemetry.io/otel/sdk/resource" ) -const defaultExportInterval = 10 * time.Second - // Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. var gauges sync.Map = sync.Map{} @@ -30,11 +28,9 @@ type GaugeValue struct { } type OTELSinkOpts struct { - Endpoint string - Reader otelsdk.Reader - Logger hclog.Logger - ExportInterval time.Duration - Ctx context.Context + Reader otelsdk.Reader + Logger hclog.Logger + Ctx context.Context } type OTELSink struct { @@ -51,15 +47,16 @@ type OTELSink struct { histogramInstruments sync.Map } -func NewOTELReader(client client.MetricsClient) otelsdk.Reader { - exp := &OTELExporter{ - client: client, +func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { + exporter := &OTELExporter{ + client: client, + endpoint: endpoint, } - return otelsdk.NewPeriodicReader(exp, otelsdk.WithInterval(defaultExportInterval)) + return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { - if opts.Logger == nil || opts.Reader == nil || opts.Endpoint == "" || opts.Ctx == nil { + if opts.Logger == nil || opts.Reader == nil || opts.Ctx == nil { return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") } diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 440e5d3b1cf..5e1cfbc53b4 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -116,25 +116,15 @@ func TestNewOTELSink(t *testing.T) { "failsWithEmptyLogger": { wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", opts: &OTELSinkOpts{ - Logger: nil, - Reader: metric.NewManualReader(), - Endpoint: "test.com", + Logger: nil, + Reader: metric.NewManualReader(), }, }, "failsWithEmptyExporter": { wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", opts: &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: nil, - Endpoint: "test.com", - }, - }, - "failsWithInvalidEndpoint": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", - opts: &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: metric.NewManualReader(), - Endpoint: "", + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: nil, }, }, } { @@ -157,10 +147,9 @@ func TestOTELSink(t *testing.T) { ctx := context.Background() opts := &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: reader, - Endpoint: "test.com", - Ctx: ctx, + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: reader, + Ctx: ctx, } sink, err := NewOTELSink(opts) From 72ae2057384e3d25b90bf15ba6e39d20d4357bc0 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 14:37:46 -0400 Subject: [PATCH 27/50] Switch to mutex instead of sync.Map to avoid type assertion --- agent/hcp/telemetry/otel_sink.go | 109 +++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ef926dc32d4..2fe208660bd 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -8,10 +8,10 @@ import ( "sync" "time" - gometrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/go-hclog" + gometrics "github.com/armon/go-metrics" "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/instrument" @@ -20,11 +20,37 @@ import ( ) // Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. -var gauges sync.Map = sync.Map{} +var gauges *GlobalGaugeStore type GaugeValue struct { - Value float64 - Labels []attribute.KeyValue + Value float64 + Attributes []attribute.KeyValue +} + +type GlobalGaugeStore struct { + store map[string]*GaugeValue + mutex sync.Mutex +} + +// LoadAndDelete will read a Gauge value and delete it. +// Within the Gauge callbacks we delete the value once we have registed it with OTEL to ensure +// we only emit a Gauge value once. +func (g *GlobalGaugeStore) LoadAndDelete(key string) (*GaugeValue, bool) { + g.mutex.Lock() + defer g.mutex.Unlock() + + gauge, ok := g.store[key] + + delete(g.store, key) + + return gauge, ok +} + +func (g *GlobalGaugeStore) Store(key string, gauge *GaugeValue) { + g.mutex.Lock() + defer g.mutex.Unlock() + + g.store[key] = gauge } type OTELSinkOpts struct { @@ -42,9 +68,11 @@ type OTELSink struct { meter *otelmetric.Meter exportInterval time.Duration - gaugeInstruments sync.Map - counterInstruments sync.Map - histogramInstruments sync.Map + gaugeInstruments map[string]*instrument.Float64ObservableGauge + counterInstruments map[string]*instrument.Float64Counter + histogramInstruments map[string]*instrument.Float64Histogram + + mutex sync.Mutex } func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { @@ -65,11 +93,21 @@ func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") + // Init global gauge store. + gauges = &GlobalGaugeStore{ + store: make(map[string]*GaugeValue, 0), + mutex: sync.Mutex{}, + } + return &OTELSink{ - meterProvider: meterProvider, - meter: &meter, - spaceReplacer: strings.NewReplacer(" ", "_"), - ctx: opts.Ctx, + meterProvider: meterProvider, + meter: &meter, + spaceReplacer: strings.NewReplacer(" ", "_"), + ctx: opts.Ctx, + mutex: sync.Mutex{}, + gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), + counterInstruments: make(map[string]*instrument.Float64Counter, 0), + histogramInstruments: make(map[string]*instrument.Float64Histogram, 0), }, nil } @@ -95,37 +133,41 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // Set value in global Gauge store. g := &GaugeValue{ - Value: float64(val), - Labels: toAttributes(labels), + Value: float64(val), + Attributes: toAttributes(labels), } gauges.Store(k, g) - // If instrument does not exist, create it and register callback to get last value in global Gauge store. - if _, ok := o.gaugeInstruments.Load(k); !ok { + o.mutex.Lock() + defer o.mutex.Unlock() + + // If instrument does not exist, create it and register callback to emit last value in global Gauge store. + if _, ok := o.gaugeInstruments[k]; !ok { inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - o.gaugeInstruments.Store(k, &inst) + o.gaugeInstruments[k] = &inst } } // AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key, labels) - var inst *instrument.Float64Histogram - v, ok := o.histogramInstruments.Load(k) + + o.mutex.Lock() + defer o.mutex.Unlock() + + inst, ok := o.histogramInstruments[k] if !ok { - v, err := (*o.meter).Float64Histogram(k) + histogram, err := (*o.meter).Float64Histogram(k) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - inst = &v - o.histogramInstruments.Store(k, v) - } else { - inst = v.(*instrument.Float64Histogram) + inst = &histogram + o.histogramInstruments[k] = inst } attrs := toAttributes(labels) @@ -135,18 +177,20 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key, labels) - var inst *instrument.Float64Counter - v, ok := o.histogramInstruments.Load(k) + + o.mutex.Lock() + defer o.mutex.Unlock() + + inst, ok := o.counterInstruments[k] if !ok { - v, err := (*o.meter).Float64Counter(k) + counter, err := (*o.meter).Float64Counter(k) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - inst = &v - o.histogramInstruments.Store(k, v) - } else { - inst = v.(*instrument.Float64Counter) + + inst = &counter + o.counterInstruments[k] = inst } attrs := toAttributes(labels) @@ -185,9 +229,8 @@ func gaugeCallback(key string) instrument.Float64Callback { // Closures keep a reference to the key string, so we don't have to worry about it. // These get garbage collected as the closure completes. return func(_ context.Context, obs instrument.Float64Observer) error { - if val, ok := gauges.LoadAndDelete(key); ok { - v := val.(*GaugeValue) - obs.Observe(v.Value, v.Labels...) + if gauge, ok := gauges.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, gauge.Attributes...) } return nil } From 83fba0a9bc530e6976ecb2f5f492af5a7c74f6fb Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 15:24:04 -0400 Subject: [PATCH 28/50] Add gauge store --- agent/hcp/telemetry/gauge_store.go | 75 +++++++++++++++++++++++++ agent/hcp/telemetry/gauge_store_test.go | 61 ++++++++++++++++++++ agent/hcp/telemetry/otel_sink.go | 57 +++---------------- 3 files changed, 144 insertions(+), 49 deletions(-) create mode 100644 agent/hcp/telemetry/gauge_store.go create mode 100644 agent/hcp/telemetry/gauge_store_test.go diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go new file mode 100644 index 00000000000..082a7edd78b --- /dev/null +++ b/agent/hcp/telemetry/gauge_store.go @@ -0,0 +1,75 @@ +package telemetry + +import ( + "sync" + + "go.opentelemetry.io/otel/attribute" +) + +// Global store for Gauge values as workaround for async OpenTelemetry Gauge instrument. +var once sync.Once +var globalGauges *gaugeStore + +type gaugeStore struct { + store map[string]*gaugeValue + mutex sync.Mutex +} + +// gaugeValues hold both the float64 value and the labels. +type gaugeValue struct { + Value float64 + Attributes []attribute.KeyValue +} + +// initGaugeStore initializes the global gauge store. +// initGaugeStore not thread-safe so it must only be init once. +func initGaugeStore() { + // Avoid double initialization with sync.Once + once.Do(func() { + if globalGauges != nil { + return + } + + globalGauges = &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + mutex: sync.Mutex{}, + } + }) +} + +// LoadAndDelete will read a Gauge value and delete it. +// Within the OTEL Gauge callbacks we must delete the value once we have read it +// to ensure we only emit a Gauge value once, as the callbacks continue to execute every collection cycle. +// The store must be initialized before using this method. +func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { + if g == nil { + return nil, false + } + + g.mutex.Lock() + defer g.mutex.Unlock() + + gauge, ok := g.store[key] + + delete(g.store, key) + + return gauge, ok +} + +// Store adds a gaugeValue to the global gauge store. +// The store must be initialized before using this method. +func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { + if g == nil { + return + } + + g.mutex.Lock() + defer g.mutex.Unlock() + + gv := &gaugeValue{ + Value: float64(value), + Attributes: labels, + } + + g.store[key] = gv +} diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go new file mode 100644 index 00000000000..8fd7aa9dfcc --- /dev/null +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -0,0 +1,61 @@ +package telemetry + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" +) + +func TestGaugeStore(t *testing.T) { + initGaugeStore() + + attributes := []attribute.KeyValue{ + { + Key: attribute.Key("test_key"), + Value: attribute.StringValue("test_value"), + }, + } + + globalGauges.Store("test", float64(1.23), attributes) + + // Should store a new gauge. + val, ok := globalGauges.LoadAndDelete("test") + require.True(t, ok) + require.Equal(t, val.Value, float64(1.23)) + require.Equal(t, val.Attributes, attributes) + + // Gauge with key "test" have been deleted. + val, ok = globalGauges.LoadAndDelete("test") + require.False(t, ok) + + globalGauges.Store("duplicate", float64(1.5), nil) + globalGauges.Store("duplicate", float64(6.7), nil) + + // Gauge with key "duplicate" should hold the latest (last seen) value. + val, ok = globalGauges.LoadAndDelete("duplicate") + require.True(t, ok) + require.Equal(t, val.Value, float64(6.7)) + + // Reset store + globalGauges = nil +} + +func TestGaugeStore_WithoutInit(t *testing.T) { + attributes := []attribute.KeyValue{ + { + Key: attribute.Key("test_key"), + Value: attribute.StringValue("test_value"), + }, + } + + // Should not store since store not init. + globalGauges.Store("test", float64(1.23), attributes) + val, ok := globalGauges.LoadAndDelete("test") + + require.False(t, ok) + require.Nil(t, val) + + // Reset store + globalGauges = nil +} diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 2fe208660bd..eaae26704ae 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -19,40 +19,6 @@ import ( "go.opentelemetry.io/otel/sdk/resource" ) -// Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. -var gauges *GlobalGaugeStore - -type GaugeValue struct { - Value float64 - Attributes []attribute.KeyValue -} - -type GlobalGaugeStore struct { - store map[string]*GaugeValue - mutex sync.Mutex -} - -// LoadAndDelete will read a Gauge value and delete it. -// Within the Gauge callbacks we delete the value once we have registed it with OTEL to ensure -// we only emit a Gauge value once. -func (g *GlobalGaugeStore) LoadAndDelete(key string) (*GaugeValue, bool) { - g.mutex.Lock() - defer g.mutex.Unlock() - - gauge, ok := g.store[key] - - delete(g.store, key) - - return gauge, ok -} - -func (g *GlobalGaugeStore) Store(key string, gauge *GaugeValue) { - g.mutex.Lock() - defer g.mutex.Unlock() - - g.store[key] = gauge -} - type OTELSinkOpts struct { Reader otelsdk.Reader Logger hclog.Logger @@ -64,9 +30,8 @@ type OTELSink struct { logger hclog.Logger ctx context.Context - meterProvider *otelsdk.MeterProvider - meter *otelmetric.Meter - exportInterval time.Duration + meterProvider *otelsdk.MeterProvider + meter *otelmetric.Meter gaugeInstruments map[string]*instrument.Float64ObservableGauge counterInstruments map[string]*instrument.Float64Counter @@ -94,16 +59,14 @@ func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") // Init global gauge store. - gauges = &GlobalGaugeStore{ - store: make(map[string]*GaugeValue, 0), - mutex: sync.Mutex{}, - } + initGaugeStore() return &OTELSink{ - meterProvider: meterProvider, - meter: &meter, spaceReplacer: strings.NewReplacer(" ", "_"), + logger: opts.Logger.Named("otel_sink"), ctx: opts.Ctx, + meterProvider: meterProvider, + meter: &meter, mutex: sync.Mutex{}, gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), counterInstruments: make(map[string]*instrument.Float64Counter, 0), @@ -132,11 +95,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr k := o.flattenKey(key, labels) // Set value in global Gauge store. - g := &GaugeValue{ - Value: float64(val), - Attributes: toAttributes(labels), - } - gauges.Store(k, g) + globalGauges.Store(k, float64(val), toAttributes(labels)) o.mutex.Lock() defer o.mutex.Unlock() @@ -229,7 +188,7 @@ func gaugeCallback(key string) instrument.Float64Callback { // Closures keep a reference to the key string, so we don't have to worry about it. // These get garbage collected as the closure completes. return func(_ context.Context, obs instrument.Float64Observer) error { - if gauge, ok := gauges.LoadAndDelete(key); ok { + if gauge, ok := globalGauges.LoadAndDelete(key); ok { obs.Observe(gauge.Value, gauge.Attributes...) } return nil From 520ba9f52a1a102286ec635a79d1c2e3922fe917 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 15:30:47 -0400 Subject: [PATCH 29/50] Clarify comments --- agent/hcp/telemetry/otel_sink.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index eaae26704ae..72e019d160e 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -169,6 +169,7 @@ func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { return buf.String() } +// toAttributes converts go metrics Labels into OTEL format []attributes.KeyValue func toAttributes(labels []gometrics.Label) []attribute.KeyValue { if len(labels) == 0 { return nil @@ -184,9 +185,10 @@ func toAttributes(labels []gometrics.Label) []attribute.KeyValue { return attrs } +// gaugeCallback returns a callback which gets called when metrics are collected for export. +// the callback obtains the gauge value from the global gauges. func gaugeCallback(key string) instrument.Float64Callback { - // Closures keep a reference to the key string, so we don't have to worry about it. - // These get garbage collected as the closure completes. + // Closures keep a reference to the key string, that get garbage collected when code completes. return func(_ context.Context, obs instrument.Float64Observer) error { if gauge, ok := globalGauges.LoadAndDelete(key); ok { obs.Observe(gauge.Value, gauge.Attributes...) From 190ef2a5f97b80be4dbc5e276cdb6d4ed8afe940 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 18:00:30 -0400 Subject: [PATCH 30/50] return concrete sink type --- agent/hcp/telemetry/otel_sink.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 72e019d160e..05711d14a3e 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -48,7 +48,7 @@ func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } -func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { +func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { if opts.Logger == nil || opts.Reader == nil || opts.Ctx == nil { return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") } From 659a7ddcdb4e4a43f8c000b6f6e4a292d35c8074 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 23:25:27 -0400 Subject: [PATCH 31/50] Fix lint errors --- agent/hcp/telemetry/gauge_store.go | 2 +- agent/hcp/telemetry/gauge_store_test.go | 1 + agent/hcp/telemetry/otel_sink.go | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 082a7edd78b..b070534c389 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -67,7 +67,7 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu defer g.mutex.Unlock() gv := &gaugeValue{ - Value: float64(value), + Value: value, Attributes: labels, } diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 8fd7aa9dfcc..62adf3c7af3 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -28,6 +28,7 @@ func TestGaugeStore(t *testing.T) { // Gauge with key "test" have been deleted. val, ok = globalGauges.LoadAndDelete("test") require.False(t, ok) + require.Nil(t, val) globalGauges.Store("duplicate", float64(1.5), nil) globalGauges.Store("duplicate", float64(6.7), nil) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 05711d14a3e..d7b8071ab0e 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -92,7 +92,7 @@ func (o *OTELSink) IncrCounter(key []string, val float32) { // AddSampleWithLabels emits a Consul gauge metric that gets // registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) // Set value in global Gauge store. globalGauges.Store(k, float64(val), toAttributes(labels)) @@ -113,7 +113,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) o.mutex.Lock() defer o.mutex.Unlock() @@ -135,7 +135,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) o.mutex.Lock() defer o.mutex.Unlock() @@ -160,7 +160,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom func (o *OTELSink) EmitKey(key []string, val float32) {} // flattenKey key along with its labels. -func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { +func (o *OTELSink) flattenKey(parts []string) string { buf := &bytes.Buffer{} joined := strings.Join(parts, ".") From 9659a87634ea7375a941f5a29a60f0a2a905678a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 27 Apr 2023 15:58:56 -0400 Subject: [PATCH 32/50] Move gauge store to be within sink --- agent/hcp/telemetry/gauge_store.go | 42 +++++++++---------------- agent/hcp/telemetry/gauge_store_test.go | 28 +++-------------- agent/hcp/telemetry/otel_sink.go | 25 ++++++--------- 3 files changed, 28 insertions(+), 67 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index b070534c389..7aa7edb2344 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -1,15 +1,13 @@ package telemetry import ( + "context" "sync" "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric/instrument" ) -// Global store for Gauge values as workaround for async OpenTelemetry Gauge instrument. -var once sync.Once -var globalGauges *gaugeStore - type gaugeStore struct { store map[string]*gaugeValue mutex sync.Mutex @@ -21,31 +19,11 @@ type gaugeValue struct { Attributes []attribute.KeyValue } -// initGaugeStore initializes the global gauge store. -// initGaugeStore not thread-safe so it must only be init once. -func initGaugeStore() { - // Avoid double initialization with sync.Once - once.Do(func() { - if globalGauges != nil { - return - } - - globalGauges = &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - mutex: sync.Mutex{}, - } - }) -} - // LoadAndDelete will read a Gauge value and delete it. // Within the OTEL Gauge callbacks we must delete the value once we have read it // to ensure we only emit a Gauge value once, as the callbacks continue to execute every collection cycle. // The store must be initialized before using this method. func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { - if g == nil { - return nil, false - } - g.mutex.Lock() defer g.mutex.Unlock() @@ -59,10 +37,6 @@ func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { // Store adds a gaugeValue to the global gauge store. // The store must be initialized before using this method. func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { - if g == nil { - return - } - g.mutex.Lock() defer g.mutex.Unlock() @@ -73,3 +47,15 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu g.store[key] = gv } + +// gaugeCallback returns a callback which gets called when metrics are collected for export. +// the callback obtains the gauge value from the global gauges. +func (g *gaugeStore) gaugeCallback(key string) instrument.Float64Callback { + // Closures keep a reference to the key string, that get garbage collected when code completes. + return func(_ context.Context, obs instrument.Float64Observer) error { + if gauge, ok := g.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, gauge.Attributes...) + } + return nil + } +} diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 62adf3c7af3..0703faedef8 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -1,6 +1,7 @@ package telemetry import ( + "sync" "testing" "github.com/stretchr/testify/require" @@ -8,7 +9,10 @@ import ( ) func TestGaugeStore(t *testing.T) { - initGaugeStore() + globalGauges := &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + mutex: sync.Mutex{}, + } attributes := []attribute.KeyValue{ { @@ -37,26 +41,4 @@ func TestGaugeStore(t *testing.T) { val, ok = globalGauges.LoadAndDelete("duplicate") require.True(t, ok) require.Equal(t, val.Value, float64(6.7)) - - // Reset store - globalGauges = nil -} - -func TestGaugeStore_WithoutInit(t *testing.T) { - attributes := []attribute.KeyValue{ - { - Key: attribute.Key("test_key"), - Value: attribute.StringValue("test_value"), - }, - } - - // Should not store since store not init. - globalGauges.Store("test", float64(1.23), attributes) - val, ok := globalGauges.LoadAndDelete("test") - - require.False(t, ok) - require.Nil(t, val) - - // Reset store - globalGauges = nil } diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index d7b8071ab0e..fbda518a6e0 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -33,6 +33,8 @@ type OTELSink struct { meterProvider *otelsdk.MeterProvider meter *otelmetric.Meter + gaugeStore *gaugeStore + gaugeInstruments map[string]*instrument.Float64ObservableGauge counterInstruments map[string]*instrument.Float64Counter histogramInstruments map[string]*instrument.Float64Histogram @@ -58,8 +60,10 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") - // Init global gauge store. - initGaugeStore() + gs := &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + mutex: sync.Mutex{}, + } return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), @@ -68,6 +72,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider: meterProvider, meter: &meter, mutex: sync.Mutex{}, + gaugeStore: gs, gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), counterInstruments: make(map[string]*instrument.Float64Counter, 0), histogramInstruments: make(map[string]*instrument.Float64Histogram, 0), @@ -95,14 +100,14 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr k := o.flattenKey(key) // Set value in global Gauge store. - globalGauges.Store(k, float64(val), toAttributes(labels)) + o.gaugeStore.Store(k, float64(val), toAttributes(labels)) o.mutex.Lock() defer o.mutex.Unlock() // If instrument does not exist, create it and register callback to emit last value in global Gauge store. if _, ok := o.gaugeInstruments[k]; !ok { - inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(gaugeCallback(k))) + inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return @@ -184,15 +189,3 @@ func toAttributes(labels []gometrics.Label) []attribute.KeyValue { return attrs } - -// gaugeCallback returns a callback which gets called when metrics are collected for export. -// the callback obtains the gauge value from the global gauges. -func gaugeCallback(key string) instrument.Float64Callback { - // Closures keep a reference to the key string, that get garbage collected when code completes. - return func(_ context.Context, obs instrument.Float64Observer) error { - if gauge, ok := globalGauges.LoadAndDelete(key); ok { - obs.Observe(gauge.Value, gauge.Attributes...) - } - return nil - } -} From 80e01c7200998b828eb8f36605f6cd64f7902061 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 14:30:44 -0400 Subject: [PATCH 33/50] Use context.TODO,rebase and clenaup opts handling --- agent/hcp/telemetry/otel_sink.go | 20 +++++++++----------- agent/hcp/telemetry/otel_sink_test.go | 7 +++---- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index fbda518a6e0..b0fd58010d2 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -22,13 +22,11 @@ import ( type OTELSinkOpts struct { Reader otelsdk.Reader Logger hclog.Logger - Ctx context.Context } type OTELSink struct { spaceReplacer *strings.Replacer logger hclog.Logger - ctx context.Context meterProvider *otelsdk.MeterProvider meter *otelmetric.Meter @@ -43,16 +41,17 @@ type OTELSink struct { } func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { - exporter := &OTELExporter{ - client: client, - endpoint: endpoint, - } + exporter := NewOTELExporter(client, endpoint) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { - if opts.Logger == nil || opts.Reader == nil || opts.Ctx == nil { - return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") + if opts.Logger == nil { + return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Logger") + } + + if opts.Reader == nil { + return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Reader") } // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. @@ -68,7 +67,6 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), logger: opts.Logger.Named("otel_sink"), - ctx: opts.Ctx, meterProvider: meterProvider, meter: &meter, mutex: sync.Mutex{}, @@ -135,7 +133,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet } attrs := toAttributes(labels) - (*inst).Record(o.ctx, float64(val), attrs...) + (*inst).Record(context.TODO(), float64(val), attrs...) } // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. @@ -158,7 +156,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom } attrs := toAttributes(labels) - (*inst).Add(o.ctx, float64(val), attrs...) + (*inst).Add(context.TODO(), float64(val), attrs...) } // EmitKey unsupported. diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 5e1cfbc53b4..4f631e678ca 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -114,14 +114,14 @@ func TestNewOTELSink(t *testing.T) { opts *OTELSinkOpts }{ "failsWithEmptyLogger": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Logger", opts: &OTELSinkOpts{ Logger: nil, Reader: metric.NewManualReader(), }, }, - "failsWithEmptyExporter": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + "failsWithEmptyReader": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Reader", opts: &OTELSinkOpts{ Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: nil, @@ -149,7 +149,6 @@ func TestOTELSink(t *testing.T) { opts := &OTELSinkOpts{ Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: reader, - Ctx: ctx, } sink, err := NewOTELSink(opts) From 7cbed580ff98d74527c95429301cd62762aec8dd Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:28:24 -0400 Subject: [PATCH 34/50] Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1 --- agent/hcp/telemetry/otel_sink_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 4f631e678ca..92a1bbc6316 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -77,8 +77,8 @@ var ( Name: "consul.raft.leader.lastContact", Description: "", Unit: "", - Data: metricdata.Histogram[float64]{ - DataPoints: []metricdata.HistogramDataPoint[float64]{ + Data: metricdata.Histogram{ + DataPoints: []metricdata.HistogramDataPoint{ { Attributes: *attribute.EmptySet(), Count: 1, @@ -93,8 +93,8 @@ var ( Name: "consul.raft.commitTime", Description: "", Unit: "", - Data: metricdata.Histogram[float64]{ - DataPoints: []metricdata.HistogramDataPoint[float64]{ + Data: metricdata.Histogram{ + DataPoints: []metricdata.HistogramDataPoint{ { Attributes: attrs, Count: 1, @@ -203,9 +203,9 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") isSameData(t, expectedData.DataPoints, actualData.DataPoints) - case metricdata.Histogram[float64]: - actualData, ok := actual.Data.(metricdata.Histogram[float64]) - require.True(t, ok, "different metric types: expected metricdata.Histogram[float64]") + case metricdata.Histogram: + actualData, ok := actual.Data.(metricdata.Histogram) + require.True(t, ok, "different metric types: expected metricdata.Histogram") isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) } @@ -228,7 +228,7 @@ func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual [ } } -func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint, actual []metricdata.HistogramDataPoint) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Only verify the value and the attributes. From 91fcfc7639d3329b8957472d4eaacf43121a4c7f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:39:53 -0400 Subject: [PATCH 35/50] Fix imports --- agent/hcp/telemetry/otel_sink.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index b0fd58010d2..124a5a1f5af 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -9,9 +9,9 @@ import ( "time" "github.com/hashicorp/consul/agent/hcp/client" - "github.com/hashicorp/go-hclog" gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/instrument" From 48d69e3477882db49d500211a0c912149e8b8809 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 09:58:29 -0400 Subject: [PATCH 36/50] Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx --- agent/hcp/telemetry/gauge_store.go | 8 ++-- agent/hcp/telemetry/gauge_store_test.go | 2 - agent/hcp/telemetry/otel_sink.go | 49 ++++++++++++------------- agent/hcp/telemetry/otel_sink_test.go | 29 +++++++-------- go.mod | 2 +- 5 files changed, 42 insertions(+), 48 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 7aa7edb2344..972c2683263 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -5,7 +5,7 @@ import ( "sync" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/metric/instrument" + "go.opentelemetry.io/otel/metric" ) type gaugeStore struct { @@ -50,11 +50,11 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu // gaugeCallback returns a callback which gets called when metrics are collected for export. // the callback obtains the gauge value from the global gauges. -func (g *gaugeStore) gaugeCallback(key string) instrument.Float64Callback { +func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { // Closures keep a reference to the key string, that get garbage collected when code completes. - return func(_ context.Context, obs instrument.Float64Observer) error { + return func(_ context.Context, obs metric.Float64Observer) error { if gauge, ok := g.LoadAndDelete(key); ok { - obs.Observe(gauge.Value, gauge.Attributes...) + obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) } return nil } diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 0703faedef8..d4c4eb38d00 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -1,7 +1,6 @@ package telemetry import ( - "sync" "testing" "github.com/stretchr/testify/require" @@ -11,7 +10,6 @@ import ( func TestGaugeStore(t *testing.T) { globalGauges := &gaugeStore{ store: make(map[string]*gaugeValue, 0), - mutex: sync.Mutex{}, } attributes := []attribute.KeyValue{ diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 124a5a1f5af..ac184f45e86 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -4,24 +4,25 @@ import ( "bytes" "context" "fmt" + "net/url" "strings" "sync" "time" - "github.com/hashicorp/consul/agent/hcp/client" - gometrics "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" otelmetric "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/metric/instrument" otelsdk "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" + + "github.com/hashicorp/consul/agent/hcp/client" ) type OTELSinkOpts struct { Reader otelsdk.Reader - Logger hclog.Logger + Ctx context.Context } type OTELSink struct { @@ -33,25 +34,25 @@ type OTELSink struct { gaugeStore *gaugeStore - gaugeInstruments map[string]*instrument.Float64ObservableGauge - counterInstruments map[string]*instrument.Float64Counter - histogramInstruments map[string]*instrument.Float64Histogram + gaugeInstruments map[string]metric.Float64ObservableGauge + counterInstruments map[string]metric.Float64Counter + histogramInstruments map[string]metric.Float64Histogram mutex sync.Mutex } -func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { - exporter := NewOTELExporter(client, endpoint) +func NewOTELReader(client client.MetricsClient, url url.URL, exportInterval time.Duration) otelsdk.Reader { + exporter := NewOTELExporter(client, url) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { - if opts.Logger == nil { - return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Logger") + if opts.Reader == nil { + return nil, fmt.Errorf("ferror: provide valid reader") } - if opts.Reader == nil { - return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Reader") + if opts.Ctx == nil { + return nil, fmt.Errorf("ferror: provide valid context") } // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. @@ -61,19 +62,17 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { gs := &gaugeStore{ store: make(map[string]*gaugeValue, 0), - mutex: sync.Mutex{}, } return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), - logger: opts.Logger.Named("otel_sink"), + logger: hclog.FromContext(opts.Ctx).Named("otel_sink"), meterProvider: meterProvider, meter: &meter, - mutex: sync.Mutex{}, gaugeStore: gs, - gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), - counterInstruments: make(map[string]*instrument.Float64Counter, 0), - histogramInstruments: make(map[string]*instrument.Float64Histogram, 0), + gaugeInstruments: make(map[string]metric.Float64ObservableGauge, 0), + counterInstruments: make(map[string]metric.Float64Counter, 0), + histogramInstruments: make(map[string]metric.Float64Histogram, 0), }, nil } @@ -105,12 +104,12 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // If instrument does not exist, create it and register callback to emit last value in global Gauge store. if _, ok := o.gaugeInstruments[k]; !ok { - inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) + inst, err := (*o.meter).Float64ObservableGauge(k, metric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - o.gaugeInstruments[k] = &inst + o.gaugeInstruments[k] = inst } } @@ -128,12 +127,12 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet o.logger.Error("Failed to emit gauge: %w", err) return } - inst = &histogram + inst = histogram o.histogramInstruments[k] = inst } attrs := toAttributes(labels) - (*inst).Record(context.TODO(), float64(val), attrs...) + inst.Record(context.TODO(), float64(val), metric.WithAttributes(attrs...)) } // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. @@ -151,12 +150,12 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom return } - inst = &counter + inst = counter o.counterInstruments[k] = inst } attrs := toAttributes(labels) - (*inst).Add(context.TODO(), float64(val), attrs...) + inst.Add(context.TODO(), float64(val), metric.WithAttributes(attrs...)) } // EmitKey unsupported. diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 92a1bbc6316..252a53d033a 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -2,11 +2,9 @@ package telemetry import ( "context" - "io" "testing" gometrics "github.com/armon/go-metrics" - "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/metric" @@ -20,7 +18,7 @@ var ( Value: attribute.StringValue("test"), }) - expectedMetrics = map[string]metricdata.Metrics{ + expectedSinkMetrics = map[string]metricdata.Metrics{ "consul.raft.leader": { Name: "consul.raft.leader", Description: "", @@ -77,8 +75,8 @@ var ( Name: "consul.raft.leader.lastContact", Description: "", Unit: "", - Data: metricdata.Histogram{ - DataPoints: []metricdata.HistogramDataPoint{ + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ { Attributes: *attribute.EmptySet(), Count: 1, @@ -93,8 +91,8 @@ var ( Name: "consul.raft.commitTime", Description: "", Unit: "", - Data: metricdata.Histogram{ - DataPoints: []metricdata.HistogramDataPoint{ + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ { Attributes: attrs, Count: 1, @@ -114,17 +112,16 @@ func TestNewOTELSink(t *testing.T) { opts *OTELSinkOpts }{ "failsWithEmptyLogger": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Logger", + wantErr: "ferror: provide valid context", opts: &OTELSinkOpts{ - Logger: nil, Reader: metric.NewManualReader(), }, }, "failsWithEmptyReader": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Reader", + wantErr: "ferror: provide valid reader", opts: &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: nil, + Ctx: context.Background(), }, }, } { @@ -147,8 +144,8 @@ func TestOTELSink(t *testing.T) { ctx := context.Background() opts := &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: reader, + Ctx: context.Background(), } sink, err := NewOTELSink(opts) @@ -180,7 +177,7 @@ func TestOTELSink(t *testing.T) { // Validate metrics for _, actual := range collected.ScopeMetrics[0].Metrics { name := actual.Name - expected, ok := expectedMetrics[name] + expected, ok := expectedSinkMetrics[name] require.True(t, ok, "metric key %s should be in expectedMetrics map", name) isSameMetrics(t, expected, actual) } @@ -203,8 +200,8 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") isSameData(t, expectedData.DataPoints, actualData.DataPoints) - case metricdata.Histogram: - actualData, ok := actual.Data.(metricdata.Histogram) + case metricdata.Histogram[float64]: + actualData, ok := actual.Data.(metricdata.Histogram[float64]) require.True(t, ok, "different metric types: expected metricdata.Histogram") isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) @@ -228,7 +225,7 @@ func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual [ } } -func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint, actual []metricdata.HistogramDataPoint) { +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Only verify the value and the attributes. diff --git a/go.mod b/go.mod index ffafa515729..a079e511b5d 100644 --- a/go.mod +++ b/go.mod @@ -97,6 +97,7 @@ require ( github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 go.opentelemetry.io/otel v1.15.1 + go.opentelemetry.io/otel/metric v0.38.1 go.opentelemetry.io/otel/sdk v1.15.1 go.opentelemetry.io/otel/sdk/metric v0.38.1 go.opentelemetry.io/proto/otlp v0.19.0 @@ -232,7 +233,6 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v0.38.1 // indirect go.opentelemetry.io/otel/trace v1.15.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect From 563330e35558b5fefc1435d113b802cb5897e7c5 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 10:36:37 -0400 Subject: [PATCH 37/50] Add lots of documentation to the OTELSink --- agent/hcp/telemetry/otel_sink.go | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ac184f45e86..5bdeb1b96e3 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -25,27 +25,50 @@ type OTELSinkOpts struct { Ctx context.Context } +// OTELSink captures and aggregates telemetry data as per the OpenTelemetry (OTEL) specification. +// Metric data is exported in OpenTelemetry Protocol (OTLP) wire format. +// This should be used as a Go Metrics backend, as it implements the MetricsSink interface. type OTELSink struct { + // spaceReplacer cleans the flattened key by removing any spaces. spaceReplacer *strings.Replacer logger hclog.Logger + // meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK. + // It handles reading/export of aggregated metric data. + // It enables creation and usage of an OTEL Meter. meterProvider *otelsdk.MeterProvider - meter *otelmetric.Meter - gaugeStore *gaugeStore + // meter is an OTEL Meter, which enables the creation of OTEL instruments. + meter *otelmetric.Meter + // Instrument stores contain an OTEL Instrument per metric name () + // for each gauge, counter and histogram types. + // An instrument allows us to record a measurement for a particular metric, and continuously aggregates metrics. + // We lazy load the creation of these intruments until a metric is seen, and use them repeatedly to record measurements. gaugeInstruments map[string]metric.Float64ObservableGauge counterInstruments map[string]metric.Float64Counter histogramInstruments map[string]metric.Float64Histogram + // gaugeStore is required to hold last-seen values of gauges + // This is a workaround, as OTEL currently does not have synchronous gauge instruments. + // It only allows the registration of "callbacks", which obtain values when the callback is called. + // We must hold gauge values until the callback is called, when the measurement is exported, and can be removed. + gaugeStore *gaugeStore + mutex sync.Mutex } +// NewOTELReader returns a configured OTEL PeriodicReader to export metrics every X seconds. +// It configures the reader with a custom OTELExporter with a MetricsClient to transform and export +// metrics in OTLP format to an external url. func NewOTELReader(client client.MetricsClient, url url.URL, exportInterval time.Duration) otelsdk.Reader { exporter := NewOTELExporter(client, url) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } +// NewOTELSink returns a sink which fits the Go Metrics MetricsSink interface. +// It sets up a MeterProvider and Meter, key pieces of the OTEL Metrics SDK which +// enable us to create OTEL Instruments to record measurements. func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { if opts.Reader == nil { return nil, fmt.Errorf("ferror: provide valid reader") @@ -55,7 +78,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { return nil, fmt.Errorf("ferror: provide valid context") } - // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. + // Setup OTEL Metrics SDK to aggregate, convert and export metrics. res := resource.NewSchemaless() meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") @@ -104,6 +127,9 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // If instrument does not exist, create it and register callback to emit last value in global Gauge store. if _, ok := o.gaugeInstruments[k]; !ok { + // The registration of a callback only needs to happen once, when the instrument is created. + // The callback will be triggered every export cycle for that metric. + // It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle. inst, err := (*o.meter).Float64ObservableGauge(k, metric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) From b98481d860fd6ecb20befc75b8c61e3f7774a1f3 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 10:51:34 -0400 Subject: [PATCH 38/50] Fix gauge store comment and check ok --- agent/hcp/telemetry/gauge_store.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 972c2683263..07faf9fb1c8 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -8,26 +8,32 @@ import ( "go.opentelemetry.io/otel/metric" ) +// gaugeStore holds last seen Gauge values for a particular metric () in the store. +// OTEL does not currently have a synchronous Gauge instrument. Instead, it allows the registration of callbacks. +// The callbacks are called during export, where the Gauge value must be returned. +// This store is a workaround, which holds last seen Gauge values until the callback is called. type gaugeStore struct { store map[string]*gaugeValue mutex sync.Mutex } -// gaugeValues hold both the float64 value and the labels. +// gaugeValues are the last seen measurement for a Gauge metric, which contains a float64 value and labels. type gaugeValue struct { Value float64 Attributes []attribute.KeyValue } // LoadAndDelete will read a Gauge value and delete it. -// Within the OTEL Gauge callbacks we must delete the value once we have read it -// to ensure we only emit a Gauge value once, as the callbacks continue to execute every collection cycle. -// The store must be initialized before using this method. +// Once registered for a metric name, a Gauge callback will continue to execute every collection cycel. +// We must delete the value once we have read it, to avoid repeat values being sent. func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { g.mutex.Lock() defer g.mutex.Unlock() gauge, ok := g.store[key] + if !ok { + return nil, ok + } delete(g.store, key) @@ -35,7 +41,6 @@ func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { } // Store adds a gaugeValue to the global gauge store. -// The store must be initialized before using this method. func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { g.mutex.Lock() defer g.mutex.Unlock() @@ -49,7 +54,6 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu } // gaugeCallback returns a callback which gets called when metrics are collected for export. -// the callback obtains the gauge value from the global gauges. func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { // Closures keep a reference to the key string, that get garbage collected when code completes. return func(_ context.Context, obs metric.Float64Observer) error { From 0162bb6139e5f5f5ed076f8bfb55d290cc121879 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 12:06:35 -0400 Subject: [PATCH 39/50] Add select and ctx.Done() check to gauge callback --- agent/hcp/telemetry/gauge_store.go | 13 +++++++++---- agent/hcp/telemetry/gauge_store_test.go | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 07faf9fb1c8..61feedc3c4e 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -56,10 +56,15 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu // gaugeCallback returns a callback which gets called when metrics are collected for export. func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { // Closures keep a reference to the key string, that get garbage collected when code completes. - return func(_ context.Context, obs metric.Float64Observer) error { - if gauge, ok := g.LoadAndDelete(key); ok { - obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) + return func(ctx context.Context, obs metric.Float64Observer) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + if gauge, ok := g.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) + } + return nil } - return nil } } diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index d4c4eb38d00..b8c332c56c5 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -1,6 +1,7 @@ package telemetry import ( + "context" "testing" "github.com/stretchr/testify/require" @@ -40,3 +41,18 @@ func TestGaugeStore(t *testing.T) { require.True(t, ok) require.Equal(t, val.Value, float64(6.7)) } + +func TestGaugeCallback_Failure(t *testing.T) { + k := "consul.raft.apply" + globalGauges := &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + } + globalGauges.Store(k, 1.23, nil) + + cb := globalGauges.gaugeCallback(k) + ctx, cancel := context.WithCancel(context.Background()) + + cancel() + err := cb(ctx, nil) + require.ErrorIs(t, err, context.Canceled) +} From 899dbbaf72744fccd9fefd85565bf32bc63b8e33 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 12:13:22 -0400 Subject: [PATCH 40/50] use require.Equal for attributes --- agent/hcp/telemetry/otel_sink_test.go | 50 +++++++++++++++++---------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 252a53d033a..b9bf819251d 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "sort" "testing" gometrics "github.com/armon/go-metrics" @@ -174,16 +175,22 @@ func TestOTELSink(t *testing.T) { // Validate resource require.Equal(t, resource.NewSchemaless(), collected.Resource) - // Validate metrics - for _, actual := range collected.ScopeMetrics[0].Metrics { + // Validate Metrics + require.NotEmpty(t, collected.ScopeMetrics) + actualMetrics := collected.ScopeMetrics[0].Metrics + require.Equal(t, len(actualMetrics), len(expectedSinkMetrics)) + + for _, actual := range actualMetrics { name := actual.Name - expected, ok := expectedSinkMetrics[name] + expected, ok := expectedSinkMetrics[actual.Name] require.True(t, ok, "metric key %s should be in expectedMetrics map", name) isSameMetrics(t, expected, actual) } } // compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. +// test metrics should not contain duplicate sums for histograms nor duplicate values for counters/gauges +// to ensure predictable order of data. func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { require.Equal(t, expected.Name, actual.Name, "different .Name field") require.Equal(t, expected.Description, actual.Description, "different .Description field") @@ -211,23 +218,35 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different datapoints length") - // Only verify the value and the attributes. + // Sort for predictable data in order of lowest value. + // Test cases should not contain duplicate values. + sort.Slice(expected, func(i, j int) bool { + return expected[i].Value < expected[j].Value + }) + sort.Slice(actual, func(i, j int) bool { + return expected[i].Value < expected[j].Value + }) + + // Only verify the value and attributes. for i, dp := range expected { currActual := actual[i] require.Equal(t, dp.Value, currActual.Value, "different datapoint value") - require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") - - iter := dp.Attributes.Iter() - for iter.Next() { - attr := iter.Attribute() - require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") - } + require.Equal(t, dp.Attributes, currActual.Attributes, "different attributes") } } func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") + // Sort for predictable data in order of lowest sum. + // Test cases should not contain duplicate sums. + sort.Slice(expected, func(i, j int) bool { + return expected[i].Sum < expected[j].Sum + }) + sort.Slice(actual, func(i, j int) bool { + return expected[i].Sum < expected[j].Sum + }) + // Only verify the value and the attributes. for i, dp := range expected { currActual := actual[i] @@ -235,13 +254,6 @@ func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[ require.Equal(t, dp.Max, currActual.Max, "different histogram datapoint .Max value") require.Equal(t, dp.Min, currActual.Min, "different histogram datapoint .Min value") require.Equal(t, dp.Count, currActual.Count, "different histogram datapoint .Count value") - - require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") - - iter := dp.Attributes.Iter() - for iter.Next() { - attr := iter.Attribute() - require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") - } + require.Equal(t, dp.Attributes, currActual.Attributes, "different attributes") } } From 542d23a76e4ec6cac4a5deaed093ed661805fab9 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 13:16:39 -0400 Subject: [PATCH 41/50] Fixed import naming --- agent/hcp/telemetry/otel_sink.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 5bdeb1b96e3..4a9f40c3e4b 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -12,7 +12,6 @@ import ( gometrics "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/metric" otelmetric "go.opentelemetry.io/otel/metric" otelsdk "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" @@ -45,9 +44,9 @@ type OTELSink struct { // for each gauge, counter and histogram types. // An instrument allows us to record a measurement for a particular metric, and continuously aggregates metrics. // We lazy load the creation of these intruments until a metric is seen, and use them repeatedly to record measurements. - gaugeInstruments map[string]metric.Float64ObservableGauge - counterInstruments map[string]metric.Float64Counter - histogramInstruments map[string]metric.Float64Histogram + gaugeInstruments map[string]otelmetric.Float64ObservableGauge + counterInstruments map[string]otelmetric.Float64Counter + histogramInstruments map[string]otelmetric.Float64Histogram // gaugeStore is required to hold last-seen values of gauges // This is a workaround, as OTEL currently does not have synchronous gauge instruments. @@ -93,9 +92,9 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider: meterProvider, meter: &meter, gaugeStore: gs, - gaugeInstruments: make(map[string]metric.Float64ObservableGauge, 0), - counterInstruments: make(map[string]metric.Float64Counter, 0), - histogramInstruments: make(map[string]metric.Float64Histogram, 0), + gaugeInstruments: make(map[string]otelmetric.Float64ObservableGauge, 0), + counterInstruments: make(map[string]otelmetric.Float64Counter, 0), + histogramInstruments: make(map[string]otelmetric.Float64Histogram, 0), }, nil } @@ -130,7 +129,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // The registration of a callback only needs to happen once, when the instrument is created. // The callback will be triggered every export cycle for that metric. // It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle. - inst, err := (*o.meter).Float64ObservableGauge(k, metric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) + inst, err := (*o.meter).Float64ObservableGauge(k, otelmetric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return @@ -158,7 +157,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet } attrs := toAttributes(labels) - inst.Record(context.TODO(), float64(val), metric.WithAttributes(attrs...)) + inst.Record(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...)) } // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. @@ -181,7 +180,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom } attrs := toAttributes(labels) - inst.Add(context.TODO(), float64(val), metric.WithAttributes(attrs...)) + inst.Add(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...)) } // EmitKey unsupported. From 2d8a18a9a513e2d506e38dcd9e1575fc9873a79f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 15:06:25 -0400 Subject: [PATCH 42/50] Remove float64 calls and add a NewGaugeStore method --- agent/hcp/telemetry/gauge_store.go | 7 +++++++ agent/hcp/telemetry/gauge_store_test.go | 22 +++++++++++----------- agent/hcp/telemetry/otel_sink.go | 6 +----- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 61feedc3c4e..ce905dc72ca 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -23,6 +23,13 @@ type gaugeValue struct { Attributes []attribute.KeyValue } +// NewGaugeStore returns an initialized empty gaugeStore. +func NewGaugeStore() *gaugeStore { + return &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + } +} + // LoadAndDelete will read a Gauge value and delete it. // Once registered for a metric name, a Gauge callback will continue to execute every collection cycel. // We must delete the value once we have read it, to avoid repeat values being sent. diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index b8c332c56c5..34449e98d4f 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -9,9 +9,9 @@ import ( ) func TestGaugeStore(t *testing.T) { - globalGauges := &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - } + t.Parallel() + + globalGauges := NewGaugeStore() attributes := []attribute.KeyValue{ { @@ -20,12 +20,12 @@ func TestGaugeStore(t *testing.T) { }, } - globalGauges.Store("test", float64(1.23), attributes) + globalGauges.Store("test", 1.23, attributes) // Should store a new gauge. val, ok := globalGauges.LoadAndDelete("test") require.True(t, ok) - require.Equal(t, val.Value, float64(1.23)) + require.Equal(t, val.Value, 1.23) require.Equal(t, val.Attributes, attributes) // Gauge with key "test" have been deleted. @@ -33,20 +33,20 @@ func TestGaugeStore(t *testing.T) { require.False(t, ok) require.Nil(t, val) - globalGauges.Store("duplicate", float64(1.5), nil) - globalGauges.Store("duplicate", float64(6.7), nil) + globalGauges.Store("duplicate", 1.5, nil) + globalGauges.Store("duplicate", 6.7, nil) // Gauge with key "duplicate" should hold the latest (last seen) value. val, ok = globalGauges.LoadAndDelete("duplicate") require.True(t, ok) - require.Equal(t, val.Value, float64(6.7)) + require.Equal(t, val.Value, 6.7) } func TestGaugeCallback_Failure(t *testing.T) { + t.Parallel() + k := "consul.raft.apply" - globalGauges := &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - } + globalGauges := NewGaugeStore() globalGauges.Store(k, 1.23, nil) cb := globalGauges.gaugeCallback(k) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 4a9f40c3e4b..6f2f94583ff 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -82,16 +82,12 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") - gs := &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - } - return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), logger: hclog.FromContext(opts.Ctx).Named("otel_sink"), meterProvider: meterProvider, meter: &meter, - gaugeStore: gs, + gaugeStore: NewGaugeStore(), gaugeInstruments: make(map[string]otelmetric.Float64ObservableGauge, 0), counterInstruments: make(map[string]otelmetric.Float64Counter, 0), histogramInstruments: make(map[string]otelmetric.Float64Histogram, 0), From 5defe6a9ff93a48f11c1ec8bf3dbf5fc580f5f92 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 15:40:57 -0400 Subject: [PATCH 43/50] Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store --- agent/hcp/telemetry/gauge_store.go | 4 +-- agent/hcp/telemetry/gauge_store_test.go | 43 +++++++++++++++++++------ agent/hcp/telemetry/otel_sink.go | 2 +- agent/hcp/telemetry/otel_sink_test.go | 37 +++++++++++++++++---- 4 files changed, 66 insertions(+), 20 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index ce905dc72ca..76dfb780666 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -47,8 +47,8 @@ func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { return gauge, ok } -// Store adds a gaugeValue to the global gauge store. -func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { +// Set adds a gaugeValue to the global gauge store. +func (g *gaugeStore) Set(key string, value float64, labels []attribute.KeyValue) { g.mutex.Lock() defer g.mutex.Unlock() diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 34449e98d4f..167b2e06e1b 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "sync" "testing" "github.com/stretchr/testify/require" @@ -11,7 +12,7 @@ import ( func TestGaugeStore(t *testing.T) { t.Parallel() - globalGauges := NewGaugeStore() + gaugeStore := NewGaugeStore() attributes := []attribute.KeyValue{ { @@ -20,24 +21,24 @@ func TestGaugeStore(t *testing.T) { }, } - globalGauges.Store("test", 1.23, attributes) + gaugeStore.Set("test", 1.23, attributes) // Should store a new gauge. - val, ok := globalGauges.LoadAndDelete("test") + val, ok := gaugeStore.LoadAndDelete("test") require.True(t, ok) require.Equal(t, val.Value, 1.23) require.Equal(t, val.Attributes, attributes) // Gauge with key "test" have been deleted. - val, ok = globalGauges.LoadAndDelete("test") + val, ok = gaugeStore.LoadAndDelete("test") require.False(t, ok) require.Nil(t, val) - globalGauges.Store("duplicate", 1.5, nil) - globalGauges.Store("duplicate", 6.7, nil) + gaugeStore.Set("duplicate", 1.5, nil) + gaugeStore.Set("duplicate", 6.7, nil) // Gauge with key "duplicate" should hold the latest (last seen) value. - val, ok = globalGauges.LoadAndDelete("duplicate") + val, ok = gaugeStore.LoadAndDelete("duplicate") require.True(t, ok) require.Equal(t, val.Value, 6.7) } @@ -46,13 +47,35 @@ func TestGaugeCallback_Failure(t *testing.T) { t.Parallel() k := "consul.raft.apply" - globalGauges := NewGaugeStore() - globalGauges.Store(k, 1.23, nil) + gaugeStore := NewGaugeStore() + gaugeStore.Set(k, 1.23, nil) - cb := globalGauges.gaugeCallback(k) + cb := gaugeStore.gaugeCallback(k) ctx, cancel := context.WithCancel(context.Background()) cancel() err := cb(ctx, nil) require.ErrorIs(t, err, context.Canceled) } + +// TestGaugeStore_Race induces a race condition. When run with go test -race, +// this test should pass if implementation is concurrency safe. +func TestGaugeStore_Race(t *testing.T) { + t.Parallel() + + gaugeStore := NewGaugeStore() + wg := &sync.WaitGroup{} + for k, v := range map[string]float64{"consul.raft.apply": 23.23, "consul.raft.test": 14.3} { + wg.Add(1) + go storeAndRetrieve(t, k, v, gaugeStore, wg) + } + wg.Wait() +} + +func storeAndRetrieve(t *testing.T, k string, v float64, gaugeStore *gaugeStore, wg *sync.WaitGroup) { + gaugeStore.Set(k, v, nil) + gv, ok := gaugeStore.LoadAndDelete(k) + require.True(t, ok) + require.Equal(t, v, gv.Value) + wg.Done() +} diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 6f2f94583ff..ec5e2d476b1 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -115,7 +115,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr k := o.flattenKey(key) // Set value in global Gauge store. - o.gaugeStore.Store(k, float64(val), toAttributes(labels)) + o.gaugeStore.Set(k, float64(val), toAttributes(labels)) o.mutex.Lock() defer o.mutex.Unlock() diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index b9bf819251d..6fd05514632 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -3,6 +3,7 @@ package telemetry import ( "context" "sort" + "sync" "testing" gometrics "github.com/armon/go-metrics" @@ -108,6 +109,7 @@ var ( ) func TestNewOTELSink(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { wantErr string opts *OTELSinkOpts @@ -126,7 +128,9 @@ func TestNewOTELSink(t *testing.T) { }, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() sink, err := NewOTELSink(test.opts) if test.wantErr != "" { require.Error(t, err) @@ -139,14 +143,18 @@ func TestNewOTELSink(t *testing.T) { } } +// TestOTELSink performs concurrent metric instrument operations on the sink. +// When run with go test -race, this test should pass if implementation is concurrency safe. func TestOTELSink(t *testing.T) { + t.Parallel() + // Manual reader outputs the aggregated metrics when reader.Collect is called. reader := metric.NewManualReader() ctx := context.Background() opts := &OTELSinkOpts{ Reader: reader, - Ctx: context.Background(), + Ctx: ctx, } sink, err := NewOTELSink(opts) @@ -159,14 +167,29 @@ func TestOTELSink(t *testing.T) { }, } - sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) - sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + wg := &sync.WaitGroup{} + wg.Add(3) + + go func() { + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + wg.Done() + + }() + + go func() { + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + wg.Done() + }() - sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) - sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + go func() { + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + wg.Done() + }() - sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) - sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + wg.Wait() var collected metricdata.ResourceMetrics err = reader.Collect(ctx, &collected) From a893c320dd4ecb50710017f7e963caf0f4e69c87 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 16:22:14 -0400 Subject: [PATCH 44/50] Generate 100 gauge operations --- agent/hcp/telemetry/gauge_store_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 167b2e06e1b..b450b6209f2 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -2,9 +2,12 @@ package telemetry import ( "context" + "fmt" + "math/rand" "sync" "testing" + "github.com/hashicorp/go-uuid" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" ) @@ -65,9 +68,13 @@ func TestGaugeStore_Race(t *testing.T) { gaugeStore := NewGaugeStore() wg := &sync.WaitGroup{} - for k, v := range map[string]float64{"consul.raft.apply": 23.23, "consul.raft.test": 14.3} { + for i := 0; i < 100; i++ { wg.Add(1) - go storeAndRetrieve(t, k, v, gaugeStore, wg) + v := rand.Float64() + uuid, err := uuid.GenerateUUID() + require.NoError(t, err) + + go storeAndRetrieve(t, fmt.Sprintf("%s%f", uuid, v), v, gaugeStore, wg) } wg.Wait() } From 9d5f5efb5d08b5348476a2593186da02cf4a4e7e Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 16:47:14 -0400 Subject: [PATCH 45/50] Seperate the labels into goroutines in sink test --- agent/hcp/telemetry/otel_sink_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 6fd05514632..6b88bbb7aa3 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -168,23 +168,35 @@ func TestOTELSink(t *testing.T) { } wg := &sync.WaitGroup{} - wg.Add(3) + wg.Add(6) go func() { sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) - sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) wg.Done() }() + go func() { + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + wg.Done() + }() + go func() { sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + wg.Done() + }() + + go func() { sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) wg.Done() }() go func() { sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + wg.Done() + }() + + go func() { sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) wg.Done() }() From 80a534bfcc6cf3764e61ad717ca17bb40a68ffc2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 16:55:01 -0400 Subject: [PATCH 46/50] Generate kv store for the test case keys to avoid using uuid --- agent/hcp/telemetry/gauge_store_test.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index b450b6209f2..5ed5eadb8fd 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -7,7 +7,6 @@ import ( "sync" "testing" - "github.com/hashicorp/go-uuid" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" ) @@ -68,14 +67,14 @@ func TestGaugeStore_Race(t *testing.T) { gaugeStore := NewGaugeStore() wg := &sync.WaitGroup{} - for i := 0; i < 100; i++ { - wg.Add(1) + samples := 100 + for i := 0; i < samples; i++ { + k := fmt.Sprintf("consul.test.%d", i) v := rand.Float64() - uuid, err := uuid.GenerateUUID() - require.NoError(t, err) - - go storeAndRetrieve(t, fmt.Sprintf("%s%f", uuid, v), v, gaugeStore, wg) + wg.Add(1) + go storeAndRetrieve(t, k, v, gaugeStore, wg) } + wg.Wait() } From aa2a97136c9b67403c200dc026a48005c2c6d0e6 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 12 May 2023 10:55:17 -0400 Subject: [PATCH 47/50] Added a race test with 300 samples for OTELSink --- agent/hcp/telemetry/otel_sink_test.go | 166 +++++++++++++++++++------- 1 file changed, 126 insertions(+), 40 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 6b88bbb7aa3..1e60d4ad26e 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -2,7 +2,10 @@ package telemetry import ( "context" + "fmt" + "math/rand" "sort" + "strings" "sync" "testing" @@ -143,8 +146,6 @@ func TestNewOTELSink(t *testing.T) { } } -// TestOTELSink performs concurrent metric instrument operations on the sink. -// When run with go test -race, this test should pass if implementation is concurrency safe. func TestOTELSink(t *testing.T) { t.Parallel() @@ -167,65 +168,152 @@ func TestOTELSink(t *testing.T) { }, } - wg := &sync.WaitGroup{} - wg.Add(6) + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) - go func() { - sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) - wg.Done() + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) - }() + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) - go func() { - sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) - wg.Done() - }() + var collected metricdata.ResourceMetrics + err = reader.Collect(ctx, &collected) + require.NoError(t, err) - go func() { - sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) - wg.Done() - }() + isSame(t, expectedSinkMetrics, collected) +} - go func() { - sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) - wg.Done() - }() +func TestOTELSink_Race(t *testing.T) { + reader := metric.NewManualReader() + ctx := context.Background() + opts := &OTELSinkOpts{ + Ctx: ctx, + Reader: reader, + } - go func() { - sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) - wg.Done() - }() + sink, err := NewOTELSink(opts) + require.NoError(t, err) - go func() { - sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) - wg.Done() - }() + expectedMetrics := generateSamples(100) + wg := &sync.WaitGroup{} + for k, v := range expectedMetrics { + wg.Add(1) + go performSinkOperation(t, sink, k, v, wg) + } wg.Wait() var collected metricdata.ResourceMetrics err = reader.Collect(ctx, &collected) require.NoError(t, err) + isSame(t, expectedMetrics, collected) +} + +// generateSamples generates n of each gauges, counter and histogram measurements to use for test purposes. +func generateSamples(n int) map[string]metricdata.Metrics { + generated := make(map[string]metricdata.Metrics, 3*n) + + for i := 0; i < n; i++ { + v := rand.Float64() + k := fmt.Sprintf("consul.test.gauges.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(v)), + }, + }, + }, + } + } + + for i := 0; i < n; i++ { + v := rand.Float64() + k := fmt.Sprintf("consul.test.sum.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(v)), + }, + }, + }, + } + + } + + for i := 0; i < n; i++ { + v := rand.Float64() + k := fmt.Sprintf("consul.test.hist.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Sum: float64(float32(v)), + Max: metricdata.NewExtrema(float64(float32(v))), + Min: metricdata.NewExtrema(float64(float32(v))), + Count: 1, + }, + }, + }, + } + } + + return generated +} + +// performSinkOperation emits a measurement using the OTELSink and calls wg.Done() when completed. +func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, wg *sync.WaitGroup) { + key := strings.Split(k, ".") + data := v.Data + switch data.(type) { + case metricdata.Gauge[float64]: + gauge, ok := data.(metricdata.Gauge[float64]) + require.True(t, ok) + + sink.SetGauge(key, float32(gauge.DataPoints[0].Value)) + case metricdata.Sum[float64]: + sum, ok := data.(metricdata.Sum[float64]) + require.True(t, ok) + + sink.IncrCounter(key, float32(sum.DataPoints[0].Value)) + case metricdata.Histogram[float64]: + hist, ok := data.(metricdata.Histogram[float64]) + require.True(t, ok) + + sink.AddSample(key, float32(hist.DataPoints[0].Sum)) + } + + wg.Done() +} + +func isSame(t *testing.T, expectedMap map[string]metricdata.Metrics, actual metricdata.ResourceMetrics) { // Validate resource - require.Equal(t, resource.NewSchemaless(), collected.Resource) + require.Equal(t, resource.NewSchemaless(), actual.Resource) // Validate Metrics - require.NotEmpty(t, collected.ScopeMetrics) - actualMetrics := collected.ScopeMetrics[0].Metrics - require.Equal(t, len(actualMetrics), len(expectedSinkMetrics)) + require.NotEmpty(t, actual.ScopeMetrics) + actualMetrics := actual.ScopeMetrics[0].Metrics + require.Equal(t, len(expectedMap), len(actualMetrics)) for _, actual := range actualMetrics { name := actual.Name - expected, ok := expectedSinkMetrics[actual.Name] + expected, ok := expectedMap[actual.Name] require.True(t, ok, "metric key %s should be in expectedMetrics map", name) isSameMetrics(t, expected, actual) } } // compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. -// test metrics should not contain duplicate sums for histograms nor duplicate values for counters/gauges -// to ensure predictable order of data. +// avoid duplicate datapoint values to ensure predictable order of sort. func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { require.Equal(t, expected.Name, actual.Name, "different .Name field") require.Equal(t, expected.Description, actual.Description, "different .Description field") @@ -236,12 +324,12 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. actualData, ok := actual.Data.(metricdata.Gauge[float64]) require.True(t, ok, "different metric types: expected metricdata.Gauge[float64]") - isSameData(t, expectedData.DataPoints, actualData.DataPoints) + isSameDataPoint(t, expectedData.DataPoints, actualData.DataPoints) case metricdata.Sum[float64]: actualData, ok := actual.Data.(metricdata.Sum[float64]) require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") - isSameData(t, expectedData.DataPoints, actualData.DataPoints) + isSameDataPoint(t, expectedData.DataPoints, actualData.DataPoints) case metricdata.Histogram[float64]: actualData, ok := actual.Data.(metricdata.Histogram[float64]) require.True(t, ok, "different metric types: expected metricdata.Histogram") @@ -250,11 +338,10 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. } } -func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { +func isSameDataPoint(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different datapoints length") // Sort for predictable data in order of lowest value. - // Test cases should not contain duplicate values. sort.Slice(expected, func(i, j int) bool { return expected[i].Value < expected[j].Value }) @@ -274,7 +361,6 @@ func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[ require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Sort for predictable data in order of lowest sum. - // Test cases should not contain duplicate sums. sort.Slice(expected, func(i, j int) bool { return expected[i].Sum < expected[j].Sum }) From d13849b96bc453b45889a7c78e2da4538b89fbff Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 15:37:57 -0400 Subject: [PATCH 48/50] Do not pass in waitgroup and use error channel instead. --- agent/hcp/telemetry/gauge_store_test.go | 24 +++++++++------- agent/hcp/telemetry/otel_sink_test.go | 38 ++++++++++++++----------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 5ed5eadb8fd..1171ee379c3 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -3,7 +3,6 @@ package telemetry import ( "context" "fmt" - "math/rand" "sync" "testing" @@ -66,22 +65,25 @@ func TestGaugeStore_Race(t *testing.T) { t.Parallel() gaugeStore := NewGaugeStore() + wg := &sync.WaitGroup{} samples := 100 + errCh := make(chan error, samples) for i := 0; i < samples; i++ { - k := fmt.Sprintf("consul.test.%d", i) - v := rand.Float64() wg.Add(1) - go storeAndRetrieve(t, k, v, gaugeStore, wg) + key := fmt.Sprintf("consul.test.%d", i) + value := 12.34 + go func() { + defer wg.Done() + gaugeStore.Set(key, value, nil) + gv, _ := gaugeStore.LoadAndDelete(key) + if gv.Value != value { + errCh <- fmt.Errorf("expected value: '%f', but got: '%f' for key: '%s'", value, gv.Value, key) + } + }() } wg.Wait() -} -func storeAndRetrieve(t *testing.T, k string, v float64, gaugeStore *gaugeStore, wg *sync.WaitGroup) { - gaugeStore.Set(k, v, nil) - gv, ok := gaugeStore.LoadAndDelete(k) - require.True(t, ok) - require.Equal(t, v, gv.Value) - wg.Done() + require.Empty(t, errCh) } diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 1e60d4ad26e..ebdfa432554 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -3,7 +3,6 @@ package telemetry import ( "context" "fmt" - "math/rand" "sort" "strings" "sync" @@ -195,15 +194,21 @@ func TestOTELSink_Race(t *testing.T) { sink, err := NewOTELSink(opts) require.NoError(t, err) - expectedMetrics := generateSamples(100) - + samples := 100 + expectedMetrics := generateSamples(samples) wg := &sync.WaitGroup{} + errCh := make(chan error, samples) for k, v := range expectedMetrics { wg.Add(1) - go performSinkOperation(t, sink, k, v, wg) + go func(k string, v metricdata.Metrics) { + performSinkOperation(t, sink, k, v, errCh) + wg.Done() + }(k, v) } wg.Wait() + require.Empty(t, errCh) + var collected metricdata.ResourceMetrics err = reader.Collect(ctx, &collected) require.NoError(t, err) @@ -216,7 +221,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { generated := make(map[string]metricdata.Metrics, 3*n) for i := 0; i < n; i++ { - v := rand.Float64() + v := 12.3 k := fmt.Sprintf("consul.test.gauges.%d", i) generated[k] = metricdata.Metrics{ Name: k, @@ -232,7 +237,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { } for i := 0; i < n; i++ { - v := rand.Float64() + v := 22.23 k := fmt.Sprintf("consul.test.sum.%d", i) generated[k] = metricdata.Metrics{ Name: k, @@ -249,7 +254,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { } for i := 0; i < n; i++ { - v := rand.Float64() + v := 13.24 k := fmt.Sprintf("consul.test.hist.%d", i) generated[k] = metricdata.Metrics{ Name: k, @@ -271,28 +276,29 @@ func generateSamples(n int) map[string]metricdata.Metrics { } // performSinkOperation emits a measurement using the OTELSink and calls wg.Done() when completed. -func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, wg *sync.WaitGroup) { +func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, errCh chan error) { key := strings.Split(k, ".") data := v.Data switch data.(type) { case metricdata.Gauge[float64]: gauge, ok := data.(metricdata.Gauge[float64]) - require.True(t, ok) - + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } sink.SetGauge(key, float32(gauge.DataPoints[0].Value)) case metricdata.Sum[float64]: sum, ok := data.(metricdata.Sum[float64]) - require.True(t, ok) - + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } sink.IncrCounter(key, float32(sum.DataPoints[0].Value)) case metricdata.Histogram[float64]: hist, ok := data.(metricdata.Histogram[float64]) - require.True(t, ok) - + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } sink.AddSample(key, float32(hist.DataPoints[0].Sum)) } - - wg.Done() } func isSame(t *testing.T, expectedMap map[string]metricdata.Metrics, actual metricdata.ResourceMetrics) { From 713c5fa4b4a992afb2886c02eff2bba715e881c2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 15:51:38 -0400 Subject: [PATCH 49/50] Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel --- go.mod | 12 ++++++------ go.sum | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index a079e511b5d..005db2fa752 100644 --- a/go.mod +++ b/go.mod @@ -96,17 +96,17 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.1 - go.opentelemetry.io/otel/metric v0.38.1 - go.opentelemetry.io/otel/sdk v1.15.1 - go.opentelemetry.io/otel/sdk/metric v0.38.1 + go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218 + go.opentelemetry.io/otel/metric v1.16.0-rc.1 + go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218 + go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.7.0 + golang.org/x/sys v0.8.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -233,7 +233,7 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/trace v1.15.1 // indirect + go.opentelemetry.io/otel/trace v1.16.0-rc.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 4412cb4947e..05bf3541736 100644 --- a/go.sum +++ b/go.sum @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.1 h1:3Iwq3lfRByPaws0f6bU3naAqOR1n5IeDWd9390kWHa8= -go.opentelemetry.io/otel v1.15.1/go.mod h1:mHHGEHVDLal6YrKMmk9LqC4a3sF5g+fHfrttQIB1NTc= -go.opentelemetry.io/otel/metric v0.38.1 h1:2MM7m6wPw9B8Qv8iHygoAgkbejed59uUR6ezR5T3X2s= -go.opentelemetry.io/otel/metric v0.38.1/go.mod h1:FwqNHD3I/5iX9pfrRGZIlYICrJv0rHEUl2Ln5vdIVnQ= -go.opentelemetry.io/otel/sdk v1.15.1 h1:5FKR+skgpzvhPQHIEfcwMYjCBr14LWzs3uSqKiQzETI= -go.opentelemetry.io/otel/sdk v1.15.1/go.mod h1:8rVtxQfrbmbHKfqzpQkT5EzZMcbMBwTzNAggbEAM0KA= -go.opentelemetry.io/otel/sdk/metric v0.38.1 h1:EkO5wI4NT/fUaoPMGc0fKV28JaWe7q4vfVpEVasGb+8= -go.opentelemetry.io/otel/sdk/metric v0.38.1/go.mod h1:Rn4kSXFF9ZQZ5lL1pxQjCbK4seiO+U7s0ncmIFJaj34= -go.opentelemetry.io/otel/trace v1.15.1 h1:uXLo6iHJEzDfrNC0L0mNjItIp06SyaBQxu5t3xMlngY= -go.opentelemetry.io/otel/trace v1.15.1/go.mod h1:IWdQG/5N1x7f6YUlmdLeJvH9yxtuJAfc4VW5Agv9r/8= +go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218 h1:aKv7ueCXRlBdHGBNfot8BYwcvp4jwJ/rK/T/KQ3uXoA= +go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:dGSTwGyzvw5Dzn8nE8HrfOXnWIDrL0GIzQdOpTnJ2CM= +go.opentelemetry.io/otel/metric v1.16.0-rc.1 h1:R9MPFw2jA+z91ejfOVU7QRYSdb37E5Ak6jJUwNMQbR8= +go.opentelemetry.io/otel/metric v1.16.0-rc.1/go.mod h1:0I+4bYjKHaoXGw7uXAABYA5wyptQdXeXOhi3SBgD6GM= +go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218 h1:YC5ikDtSM7s+sJprqR7edyP9EBKMHGaAnWfte7EsQCI= +go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:tY+q2LQ4iuvdwcN0zrt/2NdF3ntVodUPbiHPMRZnXyo= +go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218 h1:5Ehgy+TyY7Jh3orDVIn7uVJ7UkFm3yP5lXXQN8ia+00= +go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:VKkJz/K+pb4rkqXlBH5DMJi1ebQLYhV82fTSK3WvOOQ= +go.opentelemetry.io/otel/trace v1.16.0-rc.1 h1:/dPBlZrzSSXglIEKgy/A3kyiACcmgNMFWKTIHHxxd/o= +go.opentelemetry.io/otel/trace v1.16.0-rc.1/go.mod h1:xqretMbHfSU24I2KKbSEG+aVHsNtBCr5L4BGaNqTx68= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1354,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From 506e867811eba7cc6604f38465fc74e8571b7dde Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 16 May 2023 10:45:25 -0400 Subject: [PATCH 50/50] Fix nits --- agent/hcp/telemetry/otel_sink_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index ebdfa432554..2b4dc26abda 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -201,8 +201,8 @@ func TestOTELSink_Race(t *testing.T) { for k, v := range expectedMetrics { wg.Add(1) go func(k string, v metricdata.Metrics) { - performSinkOperation(t, sink, k, v, errCh) - wg.Done() + defer wg.Done() + performSinkOperation(sink, k, v, errCh) }(k, v) } wg.Wait() @@ -276,7 +276,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { } // performSinkOperation emits a measurement using the OTELSink and calls wg.Done() when completed. -func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, errCh chan error) { +func performSinkOperation(sink *OTELSink, k string, v metricdata.Metrics, errCh chan error) { key := strings.Split(k, ".") data := v.Data switch data.(type) {