diff --git a/tracer/events.go b/tracer/events.go index 87a8394f3..c117a038c 100644 --- a/tracer/events.go +++ b/tracer/events.go @@ -214,7 +214,22 @@ func (t *Tracer) startTraceEventMonitor(ctx context.Context, eventCount++ // Keep track of min KTime seen in this batch processing loop - trace := t.loadBpfTrace(data.RawSample, data.CPU) + trace, err := t.loadBpfTrace(data.RawSample, data.CPU) + switch { + case err == nil: + // Fast path for no error. + case errors.Is(err, errOriginUnexpected): + log.Warnf("skip trace handling: %v", err) + continue + case errors.Is(err, errRecordTooSmall), errors.Is(err, errRecordUnexpectedSize): + log.Errorf("stop receiving traces: %v", err) + // TODO: trigger a graceful shutdown + return + default: + log.Warnf("unexpected error handling trace: %v", err) + continue + } + if minKTime == 0 || trace.KTime < minKTime { minKTime = trace.KTime } diff --git a/tracer/tracer.go b/tracer/tracer.go index f77c9bee8..937f130fd 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -927,15 +927,22 @@ func (t *Tracer) eBPFMetricsCollector( return metricsUpdates } +// Various bpf trace handling related errors: +var ( + errRecordTooSmall = errors.New("trace record too small") + errRecordUnexpectedSize = errors.New("unexpected record size") + errOriginUnexpected = errors.New("unexepcted origin") +) + // loadBpfTrace parses a raw BPF trace into a `host.Trace` instance. // // If the raw trace contains a kernel stack ID, the kernel stack is also // retrieved and inserted at the appropriate position. -func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { +func (t *Tracer) loadBpfTrace(raw []byte, cpu int) (*libpf.EbpfTrace, error) { frameListOffs := int(unsafe.Offsetof(support.Trace{}.Frame_data)) if len(raw) < frameListOffs { - panic("trace record too small") + return nil, fmt.Errorf("%d < %d: %w", len(raw), frameListOffs, errRecordTooSmall) } ptr := traceFromRaw(raw) @@ -943,7 +950,8 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { // NOTE: can't do exact check here: kernel adds a few padding bytes to messages. if len(raw) < frameListOffs+frameDataLen { - panic("unexpected record size") + return nil, fmt.Errorf("%d < %d: %w", len(raw), frameListOffs+frameDataLen, + errRecordUnexpectedSize) } pid := libpf.PID(ptr.Pid) @@ -970,8 +978,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { case support.TraceOriginOffCPU: case support.TraceOriginProbe: default: - log.Warnf("Skip handling trace from unexpected %d origin", trace.Origin) - return nil + return nil, fmt.Errorf("origin %d: %w", trace.Origin, errOriginUnexpected) } if ptr.Kernel_stack_id >= 0 { @@ -996,7 +1003,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { trace.FrameData = trace.FrameDataBuf[:ptr.Frame_data_len] copy(trace.FrameData, ptr.Frame_data[:ptr.Frame_data_len]) - return trace + return trace, nil } // StartMapMonitors starts goroutines for collecting metrics and monitoring eBPF