-
Notifications
You must be signed in to change notification settings - Fork 398
reporter: simplify TraceReporter interface #405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7367ee5
f118edf
863b767
da54892
c3c7327
5425e42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ package tracehandler // import "go.opentelemetry.io/ebpf-profiler/tracehandler" | |
| import ( | ||
| "context" | ||
| "fmt" | ||
| "sync" | ||
| "time" | ||
|
|
||
| lru "github.com/elastic/go-freelru" | ||
|
|
@@ -29,6 +30,10 @@ type Times interface { | |
| MonitorInterval() time.Duration | ||
| } | ||
|
|
||
| // Default lifetime of elements in the cache to reduce recurring | ||
| // symbolization efforts. | ||
| var traceCacheLifetime = 5 * time.Minute | ||
|
|
||
| // TraceProcessor is an interface used by traceHandler to convert traces | ||
| // from a form received from eBPF to the form we wish to dispatch to the | ||
| // collection agent. | ||
|
|
@@ -54,21 +59,15 @@ type TraceProcessor interface { | |
| // from the eBPF components. | ||
| type traceHandler struct { | ||
| // Metrics | ||
| umTraceCacheHit uint64 | ||
| umTraceCacheMiss uint64 | ||
| bpfTraceCacheHit uint64 | ||
| bpfTraceCacheMiss uint64 | ||
| traceCacheHit uint64 | ||
| traceCacheMiss uint64 | ||
|
|
||
| traceProcessor TraceProcessor | ||
|
|
||
| // bpfTraceCache stores mappings from BPF to user-mode hashes. This allows | ||
| // traceCache stores mappings from BPF hashes to symbolized traces. This allows | ||
| // avoiding the overhead of re-doing user-mode symbolization of traces that | ||
| // we have recently seen already. | ||
| bpfTraceCache *lru.LRU[host.TraceHash, libpf.TraceHash] | ||
|
|
||
| // umTraceCache is a LRU set that suppresses unnecessary resends of traces | ||
| // that we have recently reported to the collector already. | ||
| umTraceCache *lru.LRU[libpf.TraceHash, libpf.Void] | ||
| traceCache *lru.SyncedLRU[host.TraceHash, libpf.Trace] | ||
|
|
||
| // reporter instance to use to send out traces. | ||
| reporter reporter.TraceReporter | ||
|
|
@@ -77,29 +76,43 @@ type traceHandler struct { | |
| } | ||
|
|
||
| // newTraceHandler creates a new traceHandler | ||
| func newTraceHandler(rep reporter.TraceReporter, traceProcessor TraceProcessor, | ||
| intervals Times, cacheSize uint32) (*traceHandler, error) { | ||
| bpfTraceCache, err := lru.New[host.TraceHash, libpf.TraceHash]( | ||
| func newTraceHandler(ctx context.Context, rep reporter.TraceReporter, | ||
| traceProcessor TraceProcessor, intervals Times, cacheSize uint32) (*traceHandler, error) { | ||
| traceCache, err := lru.NewSynced[host.TraceHash, libpf.Trace]( | ||
| cacheSize, func(k host.TraceHash) uint32 { return uint32(k) }) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| // Do not hold elements indefinitely in the cache. | ||
| traceCache.SetLifetime(traceCacheLifetime) | ||
|
|
||
| umTraceCache, err := lru.New[libpf.TraceHash, libpf.Void]( | ||
| cacheSize, libpf.TraceHash.Hash32) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| var wg sync.WaitGroup | ||
| wg.Add(1) | ||
|
|
||
| go func() { | ||
| wg.Done() | ||
| ticker := time.NewTicker(traceCacheLifetime) | ||
| defer ticker.Stop() | ||
|
|
||
| for { | ||
| select { | ||
| case <-ctx.Done(): | ||
| return | ||
| case <-ticker.C: | ||
| traceCache.PurgeExpired() | ||
| } | ||
| } | ||
| }() | ||
|
|
||
| t := &traceHandler{ | ||
| // Wait to make sure the purge routine did start. | ||
| wg.Wait() | ||
|
Comment on lines
+107
to
+108
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense to wait here? It doesn't harm if the go routine starts after return.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The waitgroup pattern is just used to make sure the Go routine starts. This pattern is used also in other places in the code base. Waiting here should not increase the start up time in a noticeable way. But as we can not control the Go scheduler, we make sure this way, the clean up/purge routine is working properly and scheduled. |
||
|
|
||
| return &traceHandler{ | ||
| traceProcessor: traceProcessor, | ||
| bpfTraceCache: bpfTraceCache, | ||
| umTraceCache: umTraceCache, | ||
| traceCache: traceCache, | ||
| reporter: rep, | ||
| times: intervals, | ||
| } | ||
|
|
||
| return t, nil | ||
| }, nil | ||
| } | ||
|
|
||
| func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) { | ||
|
|
@@ -117,40 +130,26 @@ func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) { | |
| EnvVars: bpfTrace.EnvVars, | ||
| } | ||
|
|
||
| if !m.reporter.SupportsReportTraceEvent() { | ||
| // Fast path: if the trace is already known remotely, we just send a counter update. | ||
| postConvHash, traceKnown := m.bpfTraceCache.Get(bpfTrace.Hash) | ||
| if traceKnown { | ||
| m.bpfTraceCacheHit++ | ||
| meta.APMServiceName = m.traceProcessor.MaybeNotifyAPMAgent(bpfTrace, postConvHash, 1) | ||
| m.reporter.ReportCountForTrace(postConvHash, 1, meta) | ||
| return | ||
| if trace, exists := m.traceCache.GetAndRefresh(bpfTrace.Hash, | ||
| traceCacheLifetime); exists { | ||
| m.traceCacheHit++ | ||
| // Fast path | ||
| meta.APMServiceName = m.traceProcessor.MaybeNotifyAPMAgent(bpfTrace, trace.Hash, 1) | ||
| if err := m.reporter.ReportTraceEvent(&trace, meta); err != nil { | ||
| log.Errorf("Failed to report trace event: %v", err) | ||
| } | ||
| m.bpfTraceCacheMiss++ | ||
| return | ||
| } | ||
| m.traceCacheMiss++ | ||
|
|
||
| // Slow path: convert trace. | ||
| umTrace := m.traceProcessor.ConvertTrace(bpfTrace) | ||
| log.Debugf("Trace hash remap 0x%x -> 0x%x", bpfTrace.Hash, umTrace.Hash) | ||
| m.bpfTraceCache.Add(bpfTrace.Hash, umTrace.Hash) | ||
| m.traceCache.Add(bpfTrace.Hash, *umTrace) | ||
|
|
||
| meta.APMServiceName = m.traceProcessor.MaybeNotifyAPMAgent(bpfTrace, umTrace.Hash, 1) | ||
| if m.reporter.SupportsReportTraceEvent() { | ||
| m.reporter.ReportTraceEvent(umTrace, meta) | ||
| return | ||
| } | ||
| m.reporter.ReportCountForTrace(umTrace.Hash, 1, meta) | ||
|
|
||
| // Trace already known to collector by UM hash? | ||
| if _, known := m.umTraceCache.Get(umTrace.Hash); known { | ||
| m.umTraceCacheHit++ | ||
| return | ||
| if err := m.reporter.ReportTraceEvent(umTrace, meta); err != nil { | ||
| log.Errorf("Failed to report trace event: %v", err) | ||
| } | ||
| m.umTraceCacheMiss++ | ||
|
|
||
| // Nope. Send it now. | ||
| m.reporter.ReportFramesForTrace(umTrace) | ||
| m.umTraceCache.Add(umTrace.Hash, libpf.Void{}) | ||
| } | ||
|
|
||
| // Start starts a goroutine that receives and processes trace updates over | ||
|
|
@@ -161,7 +160,7 @@ func Start(ctx context.Context, rep reporter.TraceReporter, traceProcessor Trace | |
| traceInChan <-chan *host.Trace, intervals Times, cacheSize uint32, | ||
| ) (workerExited <-chan libpf.Void, err error) { | ||
| handler, err := | ||
| newTraceHandler(rep, traceProcessor, intervals, cacheSize) | ||
| newTraceHandler(ctx, rep, traceProcessor, intervals, cacheSize) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to create traceHandler: %v", err) | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.