Skip to content

Commit

Permalink
Remove pidMap
Browse files Browse the repository at this point in the history
pidMap holds mappings from PID to the most recent exec ID for the PID.
This is used to find the parent of exec events that come after another
exec event (i.e. not a clone). This can be racy as events can come OOO
to the user space.

This information is also available in the execve_map inside the kernel
where all events happen in-order.

45745a0
introduced a new field in msg_execve_event (cleanup_key) that keeps the
parent of the execve event. In that case this was needed for cleanup.

We can use the same information for the parent and remove entirely
the pidMap. If there is a case where we cannot find the entry in the
execve_map, we use as a parent the view of Linux (i.e. the process
that clones the current process and not the previous execve).

Signed-off-by: Anastasios Papagiannis <[email protected]>
  • Loading branch information
tpapagian authored and kkourt committed Oct 31, 2022
1 parent 113a336 commit ef0a426
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 85 deletions.
6 changes: 0 additions & 6 deletions pkg/metrics/errormetrics/errormetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ var (
EventCacheProcessInfoFailed ErrorType = "event_cache_process_info_failed"
// Event cache failed to set parent information for an event.
EventCacheParentInfoFailed ErrorType = "event_cache_parent_info_failed"
// There was an invalid entry in the pid map.
PidMapInvalidEntry ErrorType = "pid_map_invalid_entry"
// An entry was evicted from the pid map because the map was full.
PidMapEvicted ErrorType = "pid_map_evicted"
// PID not found in the pid map on remove() call.
PidMapMissOnRemove ErrorType = "pid_map_miss_on_remove"
// An exec event without parent info.
ExecMissingParent ErrorType = "exec_missing_parent"
// An event is missing process info.
Expand Down
44 changes: 1 addition & 43 deletions pkg/process/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,12 @@ import (
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
"github.com/cilium/tetragon/pkg/metrics/mapmetrics"
lru "github.com/hashicorp/golang-lru"
"github.com/sirupsen/logrus"
)

type Cache struct {
cache *lru.Cache
deleteChan chan *ProcessInternal
stopChan chan bool

// pidMap is a map from PID to the most recent exec ID for the PID. This is used to find the parent
// of exec events without clone flag.
pidMap *lru.Cache
}

// garbage collection states
Expand Down Expand Up @@ -52,7 +47,6 @@ func (pc *Cache) cacheGarbageCollector() {
case <-pc.stopChan:
ticker.Stop()
pc.cache.Purge()
pc.pidMap.Purge()
case <-ticker.C:
newQueue = newQueue[:0]
for _, p := range deleteQueue {
Expand Down Expand Up @@ -137,17 +131,11 @@ func NewCache(
if err != nil {
return nil, err
}
pidMap, err := lru.New(processCacheSize)
if err != nil {
return nil, err
}
pm := &Cache{
cache: lruCache,
pidMap: pidMap,
cache: lruCache,
}
update := func() {
mapmetrics.MapSizeSet("processLru", processCacheSize, float64(pm.cache.Len()))
mapmetrics.MapSizeSet("pidMap", processCacheSize, float64(pm.pidMap.Len()))
}
ticker := time.NewTicker(60 * time.Second)
go func() {
Expand Down Expand Up @@ -191,39 +179,9 @@ func (pc *Cache) remove(process *tetragon.Process) bool {
if !present {
errormetrics.ErrorTotalInc(errormetrics.ProcessCacheMissOnRemove)
}
if process.Pid != nil {
pidFound := pc.pidMap.Remove(process.Pid.Value)
if !pidFound {
errormetrics.ErrorTotalInc(errormetrics.PidMapMissOnRemove)
}
}
return present
}

func (pc *Cache) len() int {
return pc.cache.Len()
}

// Get the exec ID for a given PID. If PID is not found, it returns an empty string.
func (pc *Cache) getFromPidMap(pid uint32) string {
entry, ok := pc.pidMap.Get(pid)
if !ok {
return ""
}
execID, ok := entry.(string)
if !ok {
logger.GetLogger().WithFields(logrus.Fields{"pid": pid, "execID": execID}).Warn("Invalid entry in pidMap")
errormetrics.ErrorTotalInc(errormetrics.PidMapInvalidEntry)
return ""
}
return execID
}

func (pc *Cache) AddToPidMap(pid uint32, execID string) bool {
evicted := pc.pidMap.Add(pid, execID)
if evicted {
logger.GetLogger().Warn("Entry evicted from pidMap")
errormetrics.ErrorTotalInc(errormetrics.PidMapEvicted)
}
return evicted
}
4 changes: 0 additions & 4 deletions pkg/process/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,15 @@ func TestProcessCache(t *testing.T) {
}
cache.Add(&proc)
assert.Equal(t, cache.len(), 1)
cache.AddToPidMap(pid.Value, execID)

result, err := cache.get(proc.process.ExecId)
assert.NoError(t, err)
assert.Equal(t, proc.process.ExecId, result.process.ExecId)
assert.Equal(t, proc.capabilities, result.capabilities)
assert.Equal(t, cache.getFromPidMap(pid.Value), execID)

// remove the entry from cache.
assert.True(t, cache.remove(proc.process))
assert.Equal(t, cache.len(), 0)
assert.Equal(t, cache.pidMap.Len(), 0)
_, err = cache.get(proc.process.ExecId)
assert.Error(t, err)
assert.Equal(t, cache.getFromPidMap(pid.Value), "")
}
40 changes: 8 additions & 32 deletions pkg/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,12 @@ import (
"github.com/cilium/tetragon/pkg/cilium"
"github.com/cilium/tetragon/pkg/ktime"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
"github.com/cilium/tetragon/pkg/metrics/processexecmetrics"
"github.com/cilium/tetragon/pkg/reader/caps"
"github.com/cilium/tetragon/pkg/reader/exec"
"github.com/cilium/tetragon/pkg/reader/namespace"
"github.com/cilium/tetragon/pkg/reader/node"
"github.com/cilium/tetragon/pkg/reader/path"
"github.com/cilium/tetragon/pkg/watcher"
"github.com/sirupsen/logrus"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/wrapperspb"

Expand Down Expand Up @@ -236,36 +233,15 @@ func GetParentProcessInternal(pid uint32, ktime uint64) (*ProcessInternal, *Proc

// AddExecEvent constructs a new ProcessInternal structure from an Execve event, adds it to the cache, and also returns it
func AddExecEvent(event *tetragonAPI.MsgExecveEventUnix) *ProcessInternal {
proc, _ := GetProcess(event.Process, event.Kube.Docker, event.Parent, event.Capabilities, event.Namespaces)
procCache.Add(proc)

var parentExecID string
if proc.process.Pid != nil {
parentExecID = procCache.getFromPidMap(proc.process.Pid.Value)
procCache.AddToPidMap(proc.process.Pid.Value, proc.process.ExecId)
}
if strings.Contains(proc.process.Flags, "clone") || strings.Contains(proc.process.Flags, "procFS") {
return proc
}
// This means the exec didn't clone. Look up the most recent exec ID for this PID
// and use that as the parent.
parent, err := procCache.get(parentExecID)
if err != nil {
errormetrics.ErrorTotalInc(errormetrics.NoParentNoClone)
logger.GetLogger().WithFields(logrus.Fields{
"parent exec id": parentExecID,
"process": proc,
}).Debug("parent not found in cache")
return proc
}
if parent.process.ExecId == proc.process.ExecId {
logger.GetLogger().WithFields(logrus.Fields{
"exec_id": parent.process.ExecId,
}).Debug("Parent and current process have the same exec ID")
processexecmetrics.SameExecIdInc(parent.process.ExecId)
return proc
var proc *ProcessInternal
if event.CleanupProcess.Ktime == 0 {
// there is a case where we cannot find this entry in execve_map
// in that case we use as parent what Linux knows
proc, _ = GetProcess(event.Process, event.Kube.Docker, event.Parent, event.Capabilities, event.Namespaces)
} else {
proc, _ = GetProcess(event.Process, event.Kube.Docker, event.CleanupProcess, event.Capabilities, event.Namespaces)
}
proc.process.ParentExecId = parent.process.ExecId
procCache.Add(proc)
return proc
}

Expand Down

0 comments on commit ef0a426

Please sign in to comment.