-
Notifications
You must be signed in to change notification settings - Fork 399
interpreter: Support multiple interpreters for single ELF object #702
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c4ed413
6a4895b
96574f2
bb30f14
87ec381
b02c892
5a63653
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,143 @@ | ||
| // Copyright The OpenTelemetry Authors | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| package interpreter // import "go.opentelemetry.io/ebpf-profiler/interpreter" | ||
|
|
||
| import ( | ||
| "errors" | ||
|
|
||
| log "github.com/sirupsen/logrus" | ||
| "go.opentelemetry.io/ebpf-profiler/host" | ||
| "go.opentelemetry.io/ebpf-profiler/libpf" | ||
| "go.opentelemetry.io/ebpf-profiler/metrics" | ||
| "go.opentelemetry.io/ebpf-profiler/process" | ||
| "go.opentelemetry.io/ebpf-profiler/remotememory" | ||
| "go.opentelemetry.io/ebpf-profiler/reporter" | ||
| "go.opentelemetry.io/ebpf-profiler/tpbase" | ||
| ) | ||
|
|
||
| // MultiData implements the Data interface for multiple interpreters. | ||
| type MultiData struct { | ||
| interpreters []Data | ||
| } | ||
|
|
||
| // NewMultiData creates a new MultiData instance from multiple Data instances. | ||
| func NewMultiData(interpreters []Data) *MultiData { | ||
| return &MultiData{ | ||
| interpreters: interpreters, | ||
| } | ||
| } | ||
|
|
||
| // Attach attaches all interpreters and returns a MultiInstance. | ||
| func (m *MultiData) Attach(ebpf EbpfHandler, pid libpf.PID, bias libpf.Address, | ||
| rm remotememory.RemoteMemory) (Instance, error) { | ||
| var instances []Instance | ||
| var errs []error | ||
|
|
||
| for _, data := range m.interpreters { | ||
| instance, err := data.Attach(ebpf, pid, bias, rm) | ||
| if err != nil { | ||
| errs = append(errs, err) | ||
| continue | ||
| } | ||
| if instance != nil { | ||
| instances = append(instances, instance) | ||
| } | ||
| } | ||
|
|
||
| err := errors.Join(errs...) | ||
| if len(instances) == 0 { | ||
| // Either all interpreters returned nil instances without error (e.g., not ready yet) | ||
| // in which case return nil, nil (valid state) otherwise return combined error. | ||
| return nil, err | ||
| } | ||
|
|
||
| // We got at least one valid instance, log any errors that occurred | ||
| if err != nil { | ||
| log.Errorf("Errors occurred while attaching interpreters: %v", err) | ||
| } | ||
|
|
||
| return NewMultiInstance(instances), nil | ||
| } | ||
|
|
||
| // Unload unloads all interpreters. | ||
| func (m *MultiData) Unload(ebpf EbpfHandler) { | ||
| for _, data := range m.interpreters { | ||
| data.Unload(ebpf) | ||
| } | ||
| } | ||
|
|
||
| // MultiInstance implements the Instance interface for multiple interpreters. | ||
| type MultiInstance struct { | ||
| instances []Instance | ||
| } | ||
|
|
||
| // NewMultiInstance creates a new MultiInstance from multiple Instance instances. | ||
| func NewMultiInstance(instances []Instance) *MultiInstance { | ||
| return &MultiInstance{ | ||
| instances: instances, | ||
| } | ||
| } | ||
|
|
||
| // Detach detaches all interpreter instances. | ||
| func (m *MultiInstance) Detach(ebpf EbpfHandler, pid libpf.PID) error { | ||
| var errs []error | ||
| for _, instance := range m.instances { | ||
| if err := instance.Detach(ebpf, pid); err != nil { | ||
| errs = append(errs, err) | ||
| } | ||
| } | ||
| return errors.Join(errs...) | ||
| } | ||
|
|
||
| // SynchronizeMappings synchronizes mappings for all interpreter instances. | ||
| func (m *MultiInstance) SynchronizeMappings(ebpf EbpfHandler, | ||
| symbolReporter reporter.SymbolReporter, pr process.Process, mappings []process.Mapping) error { | ||
| var errs []error | ||
| for _, instance := range m.instances { | ||
| if err := instance.SynchronizeMappings(ebpf, symbolReporter, pr, mappings); err != nil { | ||
| errs = append(errs, err) | ||
| } | ||
| } | ||
| return errors.Join(errs...) | ||
| } | ||
|
|
||
| // UpdateTSDInfo updates TSD info for all interpreter instances. | ||
| func (m *MultiInstance) UpdateTSDInfo(ebpf EbpfHandler, pid libpf.PID, info tpbase.TSDInfo) error { | ||
| var errs []error | ||
| for _, instance := range m.instances { | ||
| if err := instance.UpdateTSDInfo(ebpf, pid, info); err != nil { | ||
| errs = append(errs, err) | ||
| } | ||
| } | ||
| return errors.Join(errs...) | ||
| } | ||
|
|
||
| // Symbolize tries to symbolize the frame with each interpreter instance until one succeeds. | ||
| func (m *MultiInstance) Symbolize(ebpfFrame *host.Frame, frames *libpf.Frames) error { | ||
| // Try each interpreter in order | ||
| for _, instance := range m.instances { | ||
| err := instance.Symbolize(ebpfFrame, frames) | ||
| if err != ErrMismatchInterpreterType { | ||
| return err | ||
| } | ||
| } | ||
| return ErrMismatchInterpreterType | ||
| } | ||
|
|
||
| // GetAndResetMetrics collects metrics from all interpreter instances. | ||
| func (m *MultiInstance) GetAndResetMetrics() ([]metrics.Metric, error) { | ||
| var allMetrics []metrics.Metric | ||
| var errs []error | ||
|
|
||
| for _, instance := range m.instances { | ||
| metrics, err := instance.GetAndResetMetrics() | ||
| if err != nil { | ||
| errs = append(errs, err) | ||
| continue | ||
| } | ||
| allMetrics = append(allMetrics, metrics...) | ||
| } | ||
|
|
||
| return allMetrics, errors.Join(errs...) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This breaks the existing caller assumption that no useful metrics are returned if |
||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -118,20 +118,18 @@ func metricSummaryToSlice(summary metrics.Summary) []metrics.Metric { | |||||
| return result | ||||||
| } | ||||||
|
|
||||||
| // updateMetricSummary gets the metrics from the provided interpreter instance and updaates the | ||||||
| // updateMetricSummary gets the metrics from the provided interpreter instance and updates the | ||||||
| // provided summary by aggregating the new metrics into the summary. | ||||||
| // The caller is responsible to hold the lock on the interpreter.Instance to avoid race conditions. | ||||||
| func updateMetricSummary(ii interpreter.Instance, summary metrics.Summary) error { | ||||||
| instanceMetrics, err := ii.GetAndResetMetrics() | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| // Update metrics even if there was an error, because it's possible ii is a MultiInstance | ||||||
| // and some of the instances may have returned metrics. | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| for _, metric := range instanceMetrics { | ||||||
| summary[metric.ID] += metric.Value | ||||||
| } | ||||||
|
|
||||||
| return nil | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| // collectInterpreterMetrics starts a goroutine that periodically fetches and reports interpreter | ||||||
|
|
@@ -145,8 +143,8 @@ func collectInterpreterMetrics(ctx context.Context, pm *ProcessManager, | |||||
| summary := make(map[metrics.MetricID]metrics.MetricValue) | ||||||
|
|
||||||
| for pid := range pm.interpreters { | ||||||
| for addr := range pm.interpreters[pid] { | ||||||
| if err := updateMetricSummary(pm.interpreters[pid][addr], summary); err != nil { | ||||||
| for addr, ii := range pm.interpreters[pid] { | ||||||
| if err := updateMetricSummary(ii, summary); err != nil { | ||||||
| log.Errorf("Failed to get/reset metrics for PID %d at 0x%x: %v", | ||||||
| pid, addr, err) | ||||||
| } | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're potentially swallowing/hiding an error here (
if len(instances) > 0 && len(errs) > 0)), maybe we should log the latter case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch