-
Notifications
You must be signed in to change notification settings - Fork 399
tracer: avoid panic for unknown origins #1046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
002beb1
52aa14d
a10309c
45f68f5
06e56ab
c204bf7
f22c87a
ee61aea
07210ff
707d66c
cf71033
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -927,23 +927,31 @@ func (t *Tracer) eBPFMetricsCollector( | |
| return metricsUpdates | ||
| } | ||
|
|
||
| // Various bpf trace handling related errors: | ||
| var ( | ||
| errRecordTooSmall = errors.New("trace record too small") | ||
| errRecordUnexpectedSize = errors.New("unexpected record size") | ||
| errOriginUnexpected = errors.New("unexepcted origin") | ||
| ) | ||
|
|
||
| // loadBpfTrace parses a raw BPF trace into a `host.Trace` instance. | ||
| // | ||
| // If the raw trace contains a kernel stack ID, the kernel stack is also | ||
| // retrieved and inserted at the appropriate position. | ||
| func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { | ||
| func (t *Tracer) loadBpfTrace(raw []byte, cpu int) (*libpf.EbpfTrace, error) { | ||
| frameListOffs := int(unsafe.Offsetof(support.Trace{}.Frame_data)) | ||
|
|
||
| if len(raw) < frameListOffs { | ||
| panic("trace record too small") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should keep these panics for now until we have a functional equivalent (cleanly shutting down the receiver). If we remove them here, we're changing the behavior as we're effectively ignoring these errors that were previously deemed serious enough to panic (we're also hiding this behavior from a future refactoring).
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mixing
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main issue is that we're hiding information with this change, the current code encodes certain semantics: that this error is serious enough to panic. So we at least need to keep the "serious enough" part of these semantics intact, otherwise we're forcing ourselves to remember that these conditions should trigger shutdown. See my other suggestions.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To mimic the current semantics I added dedicated error handling for these The next step, a graceful complete shutdown, then should only be the next step with the handling of |
||
| return nil, fmt.Errorf("%d < %d: %w", len(raw), frameListOffs, errRecordTooSmall) | ||
|
florianl marked this conversation as resolved.
|
||
| } | ||
|
|
||
| ptr := traceFromRaw(raw) | ||
| frameDataLen := int(ptr.Frame_data_len) * 8 | ||
|
|
||
| // NOTE: can't do exact check here: kernel adds a few padding bytes to messages. | ||
| if len(raw) < frameListOffs+frameDataLen { | ||
| panic("unexpected record size") | ||
| return nil, fmt.Errorf("%d < %d: %w", len(raw), frameListOffs+frameDataLen, | ||
|
florianl marked this conversation as resolved.
|
||
| errRecordUnexpectedSize) | ||
|
Comment on lines
+953
to
+954
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about log the error here, and return
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used the |
||
| } | ||
|
|
||
| pid := libpf.PID(ptr.Pid) | ||
|
|
@@ -970,8 +978,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { | |
| case support.TraceOriginOffCPU: | ||
| case support.TraceOriginProbe: | ||
| default: | ||
| log.Warnf("Skip handling trace from unexpected %d origin", trace.Origin) | ||
| return nil | ||
| return nil, fmt.Errorf("origin %d: %w", trace.Origin, errOriginUnexpected) | ||
| } | ||
|
|
||
| if ptr.Kernel_stack_id >= 0 { | ||
|
|
@@ -996,7 +1003,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *libpf.EbpfTrace { | |
| trace.FrameData = trace.FrameDataBuf[:ptr.Frame_data_len] | ||
| copy(trace.FrameData, ptr.Frame_data[:ptr.Frame_data_len]) | ||
|
|
||
| return trace | ||
| return trace, nil | ||
| } | ||
|
|
||
| // StartMapMonitors starts goroutines for collecting metrics and monitoring eBPF | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.