From 10136c945ec6feb1137807168e519a1fa85b9c2a Mon Sep 17 00:00:00 2001 From: Christos Kalkanis Date: Thu, 23 Jan 2025 17:43:12 -0500 Subject: [PATCH] Add trace event metrics --- metrics/ids.go | 11 ++++++++++- metrics/metrics.json | 21 +++++++++++++++++++++ tracer/events.go | 16 ++++++++++------ tracer/tracer.go | 3 ++- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/metrics/ids.go b/metrics/ids.go index ca8834212..5222ccebf 100644 --- a/metrics/ids.go +++ b/metrics/ids.go @@ -689,6 +689,15 @@ const ( // Number of times the stack delta provider succeeded to extract stack deltas IDStackDeltaProviderSuccess = 271 + // Number of lost trace events in the communication between kernel and user space (trace_events) + IDTraceEventLost = 272 + + // Number of times a trace event was received without data (trace_events) + IDTraceEventNoData = 273 + + // Number of times a trace event read failed (trace_events) + IDTraceEventReadError = 274 + // max number of ID values, keep this as *last entry* - IDMax = 272 + IDMax = 275 ) diff --git a/metrics/metrics.json b/metrics/metrics.json index 76232ff8a..85e2cabe7 100644 --- a/metrics/metrics.json +++ b/metrics/metrics.json @@ -1937,5 +1937,26 @@ "name": "StackDeltaProviderSuccess", "field": "agent.stack_delta_extraction.success", "id": 271 + }, + { + "description": "Number of lost trace events in the communication between kernel and user space (trace_events)", + "type": "counter", + "name": "TraceEventLost", + "field": "agent.errors.trace_event_lost", + "id": 272 + }, + { + "description": "Number of times a trace event was received without data (trace_events)", + "type": "counter", + "name": "TraceEventNoData", + "field": "agent.errors.trace_event_no_data", + "id": 273 + }, + { + "description": "Number of times a trace event read failed (trace_events)", + "type": "counter", + "name": "TraceEventReadError", + "field": "agent.errors.trace_event_read_error", + "id": 274 } ] diff --git a/tracer/events.go b/tracer/events.go index 240e52df9..36238bbe9 100644 --- a/tracer/events.go +++ b/tracer/events.go @@ -134,7 +134,7 @@ func startPerfEventMonitor(ctx context.Context, perfEventMap *ebpf.Map, // Returns a function that can be called to retrieve perf event array // error counts. func (t *Tracer) startTraceEventMonitor(ctx context.Context, - traceOutChan chan<- *host.Trace) func() (lost, noData, readError uint64) { + traceOutChan chan<- *host.Trace) func() []metrics.Metric { eventsMap := t.ebpfMaps["trace_events"] eventReader, err := perf.NewReader(eventsMap, t.samplesPerSecond*int(unsafe.Sizeof(C.Trace{}))) @@ -224,11 +224,15 @@ func (t *Tracer) startTraceEventMonitor(ctx context.Context, } }() - return func() (lost, noData, readError uint64) { - lost = lostEventsCount.Swap(0) - noData = noDataCount.Swap(0) - readError = readErrorCount.Swap(0) - return + return func() []metrics.Metric { + lost := lostEventsCount.Swap(0) + noData := noDataCount.Swap(0) + readError := readErrorCount.Swap(0) + return []metrics.Metric{ + {ID: metrics.IDTraceEventLost, Value: metrics.MetricValue(lost)}, + {ID: metrics.IDTraceEventNoData, Value: metrics.MetricValue(noData)}, + {ID: metrics.IDTraceEventReadError, Value: metrics.MetricValue(readError)}, + } } } diff --git a/tracer/tracer.go b/tracer/tracer.go index 74898e5e9..c9f3ad415 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -1043,7 +1043,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *host.Trace { // maps for tracepoints, new traces, trace count updates and unknown PCs. func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *host.Trace) error { eventMetricCollector := t.startEventMonitor(ctx) - t.startTraceEventMonitor(ctx, traceOutChan) + traceEventMetricCollector := t.startTraceEventMonitor(ctx, traceOutChan) pidEvents := make([]uint32, 0) periodiccaller.StartWithManualTrigger(ctx, t.intervals.MonitorInterval(), @@ -1162,6 +1162,7 @@ func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *host periodiccaller.Start(ctx, t.intervals.MonitorInterval(), func() { metrics.AddSlice(eventMetricCollector()) + metrics.AddSlice(traceEventMetricCollector()) metrics.AddSlice(t.eBPFMetricsCollector(translateIDs, previousMetricValue)) metrics.AddSlice([]metrics.Metric{