diff --git a/interpreter/golabels/integrationtests/golabels_integration_test.go b/interpreter/golabels/integrationtests/golabels_integration_test.go index dedead1af..4452e5c29 100644 --- a/interpreter/golabels/integrationtests/golabels_integration_test.go +++ b/interpreter/golabels/integrationtests/golabels_integration_test.go @@ -44,9 +44,10 @@ var ( type mockIntervals struct{} -func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second } -func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond } -func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second } +func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second } +func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond } +func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second } +func (mockIntervals) ExecutableUnloadDelay() time.Duration { return 1 * time.Second } func isRoot() bool { return os.Geteuid() == 0 diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 0b957bff2..358d425cb 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -142,6 +142,7 @@ func NewExecutableInfoManager( state: xsync.NewRWMutex(executableInfoManagerState{ interpreterLoaders: interpreterLoaders, executables: map[host.FileID]*entry{}, + unusedExecutables: map[host.FileID]time.Time{}, unwindInfoIndex: map[sdtypes.UnwindInfo]uint16{}, ebpf: ebpf, }), @@ -173,6 +174,9 @@ func (mgr *ExecutableInfoManager) AddOrIncRef(fileID host.FileID, if ok { defer mgr.state.WUnlock(&state) info.rc++ + if info.rc == 1 { + delete(state.unusedExecutables, fileID) + } return info.ExecutableInfo, nil } @@ -207,6 +211,9 @@ func (mgr *ExecutableInfoManager) AddOrIncRef(fileID host.FileID, defer mgr.state.WUnlock(&state) if info, ok = state.executables[fileID]; ok { info.rc++ + if info.rc == 1 { + delete(state.unusedExecutables, fileID) + } return info.ExecutableInfo, nil } @@ -234,10 +241,8 @@ func (mgr *ExecutableInfoManager) AddOrIncRef(fileID host.FileID, return info.ExecutableInfo, nil } -// RemoveOrDecRef decrements the reference counter of the executable being tracked. Once the RC -// reaches zero, information about the file is removed from the manager and the corresponding -// BPF maps. -func (mgr *ExecutableInfoManager) RemoveOrDecRef(fileID host.FileID) error { +// DecRef decrements the reference counter of the executable being tracked. +func (mgr *ExecutableInfoManager) DecRef(fileID host.FileID) error { state := mgr.state.WLock() defer mgr.state.WUnlock(&state) @@ -246,21 +251,53 @@ func (mgr *ExecutableInfoManager) RemoveOrDecRef(fileID host.FileID) error { return fmt.Errorf("FileID %v is not known to ExecutableInfoManager", fileID) } - switch info.rc { - case 1: - // This was the last reference: clean up all associated resources. + if info.rc == 0 { + // This should be unreachable. + return errors.New("state corruption in ExecutableInfoManager: encountered 0 RC") + } + + info.rc-- + + if info.rc == 0 { + state.unusedExecutables[fileID] = time.Now() + } + + return nil +} + +// CleanupUnused removes tracked executables for which reference counter has reached zero +// more than `age` ago. During cleanup information about the file is removed from the manager +// and the corresponding BPF maps. +func (mgr *ExecutableInfoManager) CleanupUnused(age time.Duration) error { + state := mgr.state.WLock() + defer mgr.state.WUnlock(&state) + + cutoff := time.Now().Add(-age) + + for fileID, unusedSince := range state.unusedExecutables { + if unusedSince.After(cutoff) { + continue + } + + info, ok := state.executables[fileID] + if !ok { + return fmt.Errorf("FileID %v is in state.unusedExecutables, but not in state.executables", fileID) + } + + if info.rc != 0 { + return fmt.Errorf("FileID %v has rc=%d when zero is expected", fileID, info.rc) + } + if err := state.unloadDeltas(fileID, &info.mapRef); err != nil { return fmt.Errorf("failed remove fileID 0x%x from BPF maps: %w", fileID, err) } + if info.Data != nil { info.Data.Unload(state.ebpf) } + delete(state.executables, fileID) - case 0: - // This should be unreachable. - return errors.New("state corruption in ExecutableInfoManager: encountered 0 RC") - default: - info.rc-- + delete(state.unusedExecutables, fileID) } return nil @@ -301,6 +338,10 @@ type executableInfoManagerState struct { // - exe_id_to_%d_stack_deltas executables map[host.FileID]*entry + // unusedExecutables is an additional mapping from file ID to the time when their reference + // counter reached zero. + unusedExecutables map[host.FileID]time.Time + // unwindInfoIndex maps each unique UnwindInfo to its array index within the corresponding // BPF map. This serves for de-duplication purposes. Elements are never removed. Entries are // synchronized with the unwind_info_array eBPF map. diff --git a/processmanager/manager.go b/processmanager/manager.go index 513de4305..0148d99b6 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -60,7 +60,7 @@ var ( // New creates a new ProcessManager which is responsible for keeping track of loading // and unloading of symbols for processes. func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInterval time.Duration, - ebpf pmebpf.EbpfHandler, traceReporter reporter.TraceReporter, + executableUnloadDelay time.Duration, ebpf pmebpf.EbpfHandler, traceReporter reporter.TraceReporter, exeReporter reporter.ExecutableReporter, sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool, includeEnvVars libpf.Set[string]) (*ProcessManager, error) { if exeReporter == nil { @@ -85,6 +85,13 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter return nil, fmt.Errorf("unable to create ExecutableInfoManager: %v", err) } + periodiccaller.Start(ctx, executableUnloadDelay, func() { + err := em.CleanupUnused(executableUnloadDelay) + if err != nil { + log.Errorf("Failed to cleanup unused executables: %v", err) + } + }) + interpreters := make(map[libpf.PID]map[util.OnDiskFileIdentifier]interpreter.Instance) pm := &ProcessManager{ diff --git a/processmanager/processinfo.go b/processmanager/processinfo.go index e1a764aa2..f793c57fa 100644 --- a/processmanager/processinfo.go +++ b/processmanager/processinfo.go @@ -290,7 +290,7 @@ func (pm *ProcessManager) processRemovedMapping(pid libpf.PID, m *Mapping) uint6 } fileID := host.FileIDFromLibpf(mf.File.Value().FileID) - pm.eim.RemoveOrDecRef(fileID) + pm.eim.DecRef(fileID) return uint64(deleted) } diff --git a/times/times.go b/times/times.go index 96434d95d..5b44fe5ca 100644 --- a/times/times.go +++ b/times/times.go @@ -48,6 +48,7 @@ type Times struct { grpcAuthErrorDelay time.Duration pidCleanupInterval time.Duration probabilisticInterval time.Duration + executableUnloadDelay time.Duration } // IntervalsAndTimers is a meta-interface that exists purely to document its functionality. @@ -74,6 +75,9 @@ type IntervalsAndTimers interface { // ProbabilisticInterval defines the interval for which probabilistic profiling will // be enabled or disabled. ProbabilisticInterval() time.Duration + // ExecutableUnloadDelay defines how long to wait after an executable is no longer used + // before unloading it from memory. + ExecutableUnloadDelay() time.Duration } func (t *Times) MonitorInterval() time.Duration { return t.monitorInterval } @@ -94,6 +98,8 @@ func (t *Times) PIDCleanupInterval() time.Duration { return t.pidCleanupInterval func (t *Times) ProbabilisticInterval() time.Duration { return t.probabilisticInterval } +func (t *Times) ExecutableUnloadDelay() time.Duration { return t.executableUnloadDelay } + // StartRealtimeSync calculates a delta between the monotonic clock // (CLOCK_MONOTONIC, rebased to unixtime) and the realtime clock. If syncInterval is // greater than zero, it also starts a goroutine to perform that calculation periodically. @@ -119,6 +125,7 @@ func New(reportInterval, monitorInterval, probabilisticInterval time.Duration) * reportInterval: reportInterval, monitorInterval: monitorInterval, probabilisticInterval: probabilisticInterval, + executableUnloadDelay: 5 * time.Minute, } } diff --git a/tools/coredump/coredump.go b/tools/coredump/coredump.go index 3d4c78e28..be0606cd2 100644 --- a/tools/coredump/coredump.go +++ b/tools/coredump/coredump.go @@ -135,6 +135,8 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool, // function are never used. monitorInterval := time.Hour * 24 + executableUnloadDelay := time.Minute * 5 + // Check compatibility. pid := pr.PID() machineData := pr.GetMachineData() @@ -169,9 +171,9 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool, // Instantiate managers and enable all tracers by default includeTracers, _ := tracertypes.Parse("all") - manager, err := pm.New(todo, includeTracers, monitorInterval, &coredumpEbpfMaps, - &traceReporter, nil, elfunwindinfo.NewStackDeltaProvider(), false, - libpf.Set[string]{}) + manager, err := pm.New(todo, includeTracers, monitorInterval, executableUnloadDelay, + &coredumpEbpfMaps, &traceReporter, nil, elfunwindinfo.NewStackDeltaProvider(), + false, libpf.Set[string]{}) if err != nil { return nil, fmt.Errorf("failed to get Interpreter manager: %v", err) } diff --git a/tracer/ebpf_integration_test.go b/tracer/ebpf_integration_test.go index 98d136d52..cf12ec3e4 100644 --- a/tracer/ebpf_integration_test.go +++ b/tracer/ebpf_integration_test.go @@ -30,9 +30,10 @@ import ( type mockIntervals struct{} -func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second } -func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond } -func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second } +func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second } +func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond } +func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second } +func (mockIntervals) ExecutableUnloadDelay() time.Duration { return 1 * time.Second } // forceContextSwitch makes sure two Go threads are running concurrently // and that there will be a context switch between those two. diff --git a/tracer/tracer.go b/tracer/tracer.go index 1320a6de7..3421dbdf0 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -67,6 +67,7 @@ type Intervals interface { MonitorInterval() time.Duration TracePollInterval() time.Duration PIDCleanupInterval() time.Duration + ExecutableUnloadDelay() time.Duration } // Tracer provides an interface for loading and initializing the eBPF components as @@ -239,7 +240,7 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) { hasBatchLookupAndDelete := ebpfHandler.SupportsGenericBatchLookupAndDelete() processManager, err := pm.New(ctx, cfg.IncludeTracers, cfg.Intervals.MonitorInterval(), - ebpfHandler, cfg.TraceReporter, cfg.ExecutableReporter, + cfg.Intervals.ExecutableUnloadDelay(), ebpfHandler, cfg.TraceReporter, cfg.ExecutableReporter, elfunwindinfo.NewStackDeltaProvider(), cfg.FilterErrorFrames, cfg.IncludeEnvVars) if err != nil {