Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ var (

type mockIntervals struct{}

func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond }
func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond }
func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) ExecutableUnloadDelay() time.Duration { return 1 * time.Second }

func isRoot() bool {
return os.Geteuid() == 0
Expand Down
65 changes: 53 additions & 12 deletions processmanager/execinfomanager/manager.go
Comment thread
bobrik marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ func NewExecutableInfoManager(
state: xsync.NewRWMutex(executableInfoManagerState{
interpreterLoaders: interpreterLoaders,
executables: map[host.FileID]*entry{},
unusedExecutables: map[host.FileID]time.Time{},
unwindInfoIndex: map[sdtypes.UnwindInfo]uint16{},
ebpf: ebpf,
}),
Expand Down Expand Up @@ -173,6 +174,9 @@ func (mgr *ExecutableInfoManager) AddOrIncRef(fileID host.FileID,
if ok {
defer mgr.state.WUnlock(&state)
info.rc++
if info.rc == 1 {
delete(state.unusedExecutables, fileID)
}
return info.ExecutableInfo, nil
}

Expand Down Expand Up @@ -207,6 +211,9 @@ func (mgr *ExecutableInfoManager) AddOrIncRef(fileID host.FileID,
defer mgr.state.WUnlock(&state)
if info, ok = state.executables[fileID]; ok {
info.rc++
if info.rc == 1 {
delete(state.unusedExecutables, fileID)
}
return info.ExecutableInfo, nil
}

Expand Down Expand Up @@ -234,10 +241,8 @@ func (mgr *ExecutableInfoManager) AddOrIncRef(fileID host.FileID,
return info.ExecutableInfo, nil
}

// RemoveOrDecRef decrements the reference counter of the executable being tracked. Once the RC
// reaches zero, information about the file is removed from the manager and the corresponding
// BPF maps.
func (mgr *ExecutableInfoManager) RemoveOrDecRef(fileID host.FileID) error {
// DecRef decrements the reference counter of the executable being tracked.
func (mgr *ExecutableInfoManager) DecRef(fileID host.FileID) error {
state := mgr.state.WLock()
defer mgr.state.WUnlock(&state)

Expand All @@ -246,21 +251,53 @@ func (mgr *ExecutableInfoManager) RemoveOrDecRef(fileID host.FileID) error {
return fmt.Errorf("FileID %v is not known to ExecutableInfoManager", fileID)
}

switch info.rc {
case 1:
// This was the last reference: clean up all associated resources.
if info.rc == 0 {
// This should be unreachable.
return errors.New("state corruption in ExecutableInfoManager: encountered 0 RC")
}

info.rc--

if info.rc == 0 {
state.unusedExecutables[fileID] = time.Now()
}
Comment thread
bobrik marked this conversation as resolved.

return nil
}

// CleanupUnused removes tracked executables for which reference counter has reached zero
// more than `age` ago. During cleanup information about the file is removed from the manager
// and the corresponding BPF maps.
func (mgr *ExecutableInfoManager) CleanupUnused(age time.Duration) error {
state := mgr.state.WLock()
defer mgr.state.WUnlock(&state)

cutoff := time.Now().Add(-age)

for fileID, unusedSince := range state.unusedExecutables {
if unusedSince.After(cutoff) {
continue
}

info, ok := state.executables[fileID]
if !ok {
return fmt.Errorf("FileID %v is in state.unusedExecutables, but not in state.executables", fileID)
}

if info.rc != 0 {
return fmt.Errorf("FileID %v has rc=%d when zero is expected", fileID, info.rc)
}

if err := state.unloadDeltas(fileID, &info.mapRef); err != nil {
return fmt.Errorf("failed remove fileID 0x%x from BPF maps: %w", fileID, err)
}

if info.Data != nil {
info.Data.Unload(state.ebpf)
}

delete(state.executables, fileID)
case 0:
// This should be unreachable.
return errors.New("state corruption in ExecutableInfoManager: encountered 0 RC")
default:
info.rc--
delete(state.unusedExecutables, fileID)
}

return nil
Expand Down Expand Up @@ -301,6 +338,10 @@ type executableInfoManagerState struct {
// - exe_id_to_%d_stack_deltas
executables map[host.FileID]*entry

// unusedExecutables is an additional mapping from file ID to the time when their reference
// counter reached zero.
unusedExecutables map[host.FileID]time.Time

// unwindInfoIndex maps each unique UnwindInfo to its array index within the corresponding
// BPF map. This serves for de-duplication purposes. Elements are never removed. Entries are
// synchronized with the unwind_info_array eBPF map.
Expand Down
9 changes: 8 additions & 1 deletion processmanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ var (
// New creates a new ProcessManager which is responsible for keeping track of loading
// and unloading of symbols for processes.
func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInterval time.Duration,
ebpf pmebpf.EbpfHandler, traceReporter reporter.TraceReporter,
executableUnloadDelay time.Duration, ebpf pmebpf.EbpfHandler, traceReporter reporter.TraceReporter,
exeReporter reporter.ExecutableReporter, sdp nativeunwind.StackDeltaProvider,
filterErrorFrames bool, includeEnvVars libpf.Set[string]) (*ProcessManager, error) {
if exeReporter == nil {
Expand All @@ -85,6 +85,13 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter
return nil, fmt.Errorf("unable to create ExecutableInfoManager: %v", err)
}

periodiccaller.Start(ctx, executableUnloadDelay, func() {
err := em.CleanupUnused(executableUnloadDelay)
if err != nil {
log.Errorf("Failed to cleanup unused executables: %v", err)
}
})

interpreters := make(map[libpf.PID]map[util.OnDiskFileIdentifier]interpreter.Instance)

pm := &ProcessManager{
Expand Down
2 changes: 1 addition & 1 deletion processmanager/processinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ func (pm *ProcessManager) processRemovedMapping(pid libpf.PID, m *Mapping) uint6
}

fileID := host.FileIDFromLibpf(mf.File.Value().FileID)
pm.eim.RemoveOrDecRef(fileID)
pm.eim.DecRef(fileID)
return uint64(deleted)
}

Expand Down
7 changes: 7 additions & 0 deletions times/times.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type Times struct {
grpcAuthErrorDelay time.Duration
pidCleanupInterval time.Duration
probabilisticInterval time.Duration
executableUnloadDelay time.Duration
}

// IntervalsAndTimers is a meta-interface that exists purely to document its functionality.
Expand All @@ -74,6 +75,9 @@ type IntervalsAndTimers interface {
// ProbabilisticInterval defines the interval for which probabilistic profiling will
// be enabled or disabled.
ProbabilisticInterval() time.Duration
// ExecutableUnloadDelay defines how long to wait after an executable is no longer used
// before unloading it from memory.
ExecutableUnloadDelay() time.Duration
}

func (t *Times) MonitorInterval() time.Duration { return t.monitorInterval }
Expand All @@ -94,6 +98,8 @@ func (t *Times) PIDCleanupInterval() time.Duration { return t.pidCleanupInterval

func (t *Times) ProbabilisticInterval() time.Duration { return t.probabilisticInterval }

func (t *Times) ExecutableUnloadDelay() time.Duration { return t.executableUnloadDelay }

// StartRealtimeSync calculates a delta between the monotonic clock
// (CLOCK_MONOTONIC, rebased to unixtime) and the realtime clock. If syncInterval is
// greater than zero, it also starts a goroutine to perform that calculation periodically.
Expand All @@ -119,6 +125,7 @@ func New(reportInterval, monitorInterval, probabilisticInterval time.Duration) *
reportInterval: reportInterval,
monitorInterval: monitorInterval,
probabilisticInterval: probabilisticInterval,
executableUnloadDelay: 5 * time.Minute,
}
}

Expand Down
8 changes: 5 additions & 3 deletions tools/coredump/coredump.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool,
// function are never used.
monitorInterval := time.Hour * 24

executableUnloadDelay := time.Minute * 5

// Check compatibility.
pid := pr.PID()
machineData := pr.GetMachineData()
Expand Down Expand Up @@ -169,9 +171,9 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool,
// Instantiate managers and enable all tracers by default
includeTracers, _ := tracertypes.Parse("all")

manager, err := pm.New(todo, includeTracers, monitorInterval, &coredumpEbpfMaps,
&traceReporter, nil, elfunwindinfo.NewStackDeltaProvider(), false,
libpf.Set[string]{})
manager, err := pm.New(todo, includeTracers, monitorInterval, executableUnloadDelay,
&coredumpEbpfMaps, &traceReporter, nil, elfunwindinfo.NewStackDeltaProvider(),
false, libpf.Set[string]{})
if err != nil {
return nil, fmt.Errorf("failed to get Interpreter manager: %v", err)
}
Expand Down
7 changes: 4 additions & 3 deletions tracer/ebpf_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ import (

type mockIntervals struct{}

func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond }
func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond }
func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second }
func (mockIntervals) ExecutableUnloadDelay() time.Duration { return 1 * time.Second }

// forceContextSwitch makes sure two Go threads are running concurrently
// and that there will be a context switch between those two.
Expand Down
3 changes: 2 additions & 1 deletion tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type Intervals interface {
MonitorInterval() time.Duration
TracePollInterval() time.Duration
PIDCleanupInterval() time.Duration
ExecutableUnloadDelay() time.Duration
}

// Tracer provides an interface for loading and initializing the eBPF components as
Expand Down Expand Up @@ -239,7 +240,7 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) {
hasBatchLookupAndDelete := ebpfHandler.SupportsGenericBatchLookupAndDelete()

processManager, err := pm.New(ctx, cfg.IncludeTracers, cfg.Intervals.MonitorInterval(),
ebpfHandler, cfg.TraceReporter, cfg.ExecutableReporter,
cfg.Intervals.ExecutableUnloadDelay(), ebpfHandler, cfg.TraceReporter, cfg.ExecutableReporter,
elfunwindinfo.NewStackDeltaProvider(),
cfg.FilterErrorFrames, cfg.IncludeEnvVars)
if err != nil {
Expand Down