Skip to content

Commit

Permalink
ProcessCache: Make GC interval configurable
Browse files Browse the repository at this point in the history
The process cache delays removal of exited processes. The first time the
garbage collector sees a process (marked deletePending), it changes it
to deleteReady. The second time it sees the process, it deletes it from
the cache. The garbage collector runs every 30s.

This commit makes the GC interval configurable with the
--process-cache-gc-interval switch.

Signed-off-by: Kevin Sheldrake <[email protected]>
  • Loading branch information
kevsecurity committed Nov 18, 2024
1 parent 3938aac commit 3391ef3
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 35 deletions.
7 changes: 6 additions & 1 deletion cmd/tetragon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,12 @@ func tetragonExecuteCtx(ctx context.Context, cancel context.CancelFunc, ready fu
}
k8sWatcher.Start()

if err := process.InitCache(k8sWatcher, option.Config.ProcessCacheSize); err != nil {
pcGcInterval := option.Config.ProcessCacheGcInterval
if pcGcInterval <= 0 {
pcGcInterval = defaults.DefaultProcessCacheGcInterval
}

if err := process.InitCache(k8sWatcher, option.Config.ProcessCacheSize, pcGcInterval); err != nil {
return err
}

Expand Down
3 changes: 3 additions & 0 deletions docs/data/tetragon_flags.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pkg/bench/bench.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func startBenchmarkExporter(ctx context.Context, obs *observer.Observer, summary
dataCacheSize := 1024

watcher := watcher.NewFakeK8sWatcher(nil)
if err := process.InitCache(watcher, processCacheSize); err != nil {
if err := process.InitCache(watcher, processCacheSize, defaults.DefaultProcessCacheGcInterval); err != nil {
return err
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

package defaults

import "time"

const (
// DefaultMapRoot is the default path where BPFFS should be mounted
DefaultMapRoot = "/sys/fs/bpf"
Expand Down Expand Up @@ -49,6 +51,9 @@ const (
// defaults for the event cache
DefaultEventCacheNumRetries = 15
DefaultEventCacheRetryDelay = 2

// defaults for the process cache
DefaultProcessCacheGcInterval = 30 * time.Second
)

var (
Expand Down
3 changes: 2 additions & 1 deletion pkg/grpc/exec/exec_test_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/cilium/tetragon/api/v1/tetragon"
tetragonAPI "github.com/cilium/tetragon/pkg/api/processapi"
"github.com/cilium/tetragon/pkg/defaults"
"github.com/cilium/tetragon/pkg/eventcache"
"github.com/cilium/tetragon/pkg/option"
"github.com/cilium/tetragon/pkg/process"
Expand Down Expand Up @@ -311,7 +312,7 @@ func CreateCloneEvents[CLONE notify.Message, EXIT notify.Message](Pid uint32, Kt
}

func InitEnv[EXEC notify.Message, EXIT notify.Message](t *testing.T, watcher watcher.K8sResourceWatcher) DummyNotifier[EXEC, EXIT] {
if err := process.InitCache(watcher, 65536); err != nil {
if err := process.InitCache(watcher, 65536, defaults.DefaultProcessCacheGcInterval); err != nil {
t.Fatalf("failed to call process.InitCache %s", err)
}

Expand Down
9 changes: 5 additions & 4 deletions pkg/grpc/process_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/cilium/tetragon/api/v1/tetragon"
"github.com/cilium/tetragon/pkg/api/processapi"
"github.com/cilium/tetragon/pkg/defaults"
"github.com/cilium/tetragon/pkg/grpc/exec"
"github.com/cilium/tetragon/pkg/option"
"github.com/cilium/tetragon/pkg/process"
Expand Down Expand Up @@ -60,7 +61,7 @@ func TestProcessManager_getPodInfo(t *testing.T) {
}

pods := []interface{}{&podA}
err := process.InitCache(watcher.NewFakeK8sWatcher(pods), 10)
err := process.InitCache(watcher.NewFakeK8sWatcher(pods), 10, defaults.DefaultProcessCacheGcInterval)
assert.NoError(t, err)
defer process.FreeCache()
pod := process.GetPodInfo(0, "container-id-not-found", "", "", 0)
Expand Down Expand Up @@ -125,7 +126,7 @@ func TestProcessManager_getPodInfoMaybeExecProbe(t *testing.T) {
},
}
pods := []interface{}{&podA}
err := process.InitCache(watcher.NewFakeK8sWatcher(pods), 10)
err := process.InitCache(watcher.NewFakeK8sWatcher(pods), 10, defaults.DefaultProcessCacheGcInterval)
assert.NoError(t, err)
defer process.FreeCache()
pod := process.GetPodInfo(0, "aaaaaaa", "/bin/command", "arg-a arg-b", 1234)
Expand All @@ -145,7 +146,7 @@ func TestProcessManager_getPodInfoMaybeExecProbe(t *testing.T) {
}

func TestProcessManager_GetProcessExec(t *testing.T) {
err := process.InitCache(watcher.NewFakeK8sWatcher(nil), 10)
err := process.InitCache(watcher.NewFakeK8sWatcher(nil), 10, defaults.DefaultProcessCacheGcInterval)
assert.NoError(t, err)
defer process.FreeCache()
var wg sync.WaitGroup
Expand Down Expand Up @@ -215,7 +216,7 @@ func TestProcessManager_GetProcessID(t *testing.T) {
assert.NoError(t, os.Setenv("NODE_NAME", "my-node"))
node.SetNodeName()

err := process.InitCache(watcher.NewFakeK8sWatcher([]interface{}{}), 10)
err := process.InitCache(watcher.NewFakeK8sWatcher([]interface{}{}), 10, defaults.DefaultProcessCacheGcInterval)
assert.NoError(t, err)
defer process.FreeCache()
id := process.GetProcessID(1, 2)
Expand Down
21 changes: 17 additions & 4 deletions pkg/observer/observertesthelper/observer_test_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"time"

"github.com/cilium/tetragon/pkg/cgrouprate"
"github.com/cilium/tetragon/pkg/defaults"
"github.com/cilium/tetragon/pkg/encoder"
"github.com/cilium/tetragon/pkg/metricsconfig"
"github.com/cilium/tetragon/pkg/observer"
Expand Down Expand Up @@ -57,9 +58,10 @@ var (
)

type testObserverOptions struct {
crd bool
config string
lib string
crd bool
config string
lib string
procCacheGcInterval time.Duration
}

type testExporterOptions struct {
Expand Down Expand Up @@ -102,6 +104,12 @@ func WithConfig(config string) TestOption {
}
}

func WithProcCacheGcInterval(gcInterval time.Duration) TestOption {
return func(o *TestOptions) {
o.observer.procCacheGcInterval = gcInterval
}
}

func withK8sWatcher(w watcher.K8sResourceWatcher) TestOption {
return func(o *TestOptions) {
o.exporter.watcher = w
Expand Down Expand Up @@ -358,6 +366,7 @@ func loadExporter(tb testing.TB, ctx context.Context, obs *observer.Observer, op
watcher := opts.watcher
processCacheSize := 32768
dataCacheSize := 1024
procCacheGcInterval := defaults.DefaultProcessCacheGcInterval

if err := obs.InitSensorManager(); err != nil {
return err
Expand All @@ -378,7 +387,11 @@ func loadExporter(tb testing.TB, ctx context.Context, obs *observer.Observer, op
return err
}

if err := process.InitCache(watcher, processCacheSize); err != nil {
if oo.procCacheGcInterval > 0 {
procCacheGcInterval = oo.procCacheGcInterval
}

if err := process.InitCache(watcher, processCacheSize, procCacheGcInterval); err != nil {
return err
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ type config struct {
RBSizeTotal int
RBQueueSize int

ProcessCacheSize int
DataCacheSize int
ProcessCacheSize int
DataCacheSize int
ProcessCacheGcInterval time.Duration

MetricsServer string
MetricsLabelFilter metrics.LabelFilter
Expand Down
31 changes: 19 additions & 12 deletions pkg/option/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,19 @@ import (
)

const (
KeyConfigDir = "config-dir"
KeyDebug = "debug"
KeyHubbleLib = "bpf-lib"
KeyBTF = "btf"
KeyProcFS = "procfs"
KeyKernelVersion = "kernel"
KeyVerbosity = "verbose"
KeyProcessCacheSize = "process-cache-size"
KeyDataCacheSize = "data-cache-size"
KeyForceSmallProgs = "force-small-progs"
KeyForceLargeProgs = "force-large-progs"
KeyClusterName = "cluster-name"
KeyConfigDir = "config-dir"
KeyDebug = "debug"
KeyHubbleLib = "bpf-lib"
KeyBTF = "btf"
KeyProcFS = "procfs"
KeyKernelVersion = "kernel"
KeyVerbosity = "verbose"
KeyProcessCacheSize = "process-cache-size"
KeyDataCacheSize = "data-cache-size"
KeyProcessCacheGcInterval = "process-cache-gc-interval"
KeyForceSmallProgs = "force-small-progs"
KeyForceLargeProgs = "force-large-progs"
KeyClusterName = "cluster-name"

KeyLogLevel = "log-level"
KeyLogFormat = "log-format"
Expand Down Expand Up @@ -170,6 +171,11 @@ func ReadAndSetFlags() error {

Config.ProcessCacheSize = viper.GetInt(KeyProcessCacheSize)
Config.DataCacheSize = viper.GetInt(KeyDataCacheSize)
Config.ProcessCacheGcInterval = viper.GetDuration(KeyProcessCacheGcInterval)

if Config.ProcessCacheGcInterval <= 0 {
return fmt.Errorf("failed to parse process-cache-gc-interval value. Must be >= 0")
}

Config.MetricsServer = viper.GetString(KeyMetricsServer)
Config.MetricsLabelFilter = DefaultLabelFilter().WithEnabledLabels(ParseMetricsLabelFilter(viper.GetString(KeyMetricsLabelFilter)))
Expand Down Expand Up @@ -296,6 +302,7 @@ func AddFlags(flags *pflag.FlagSet) {
flags.Int(KeyVerbosity, 0, "set verbosity level for eBPF verifier dumps. Pass 0 for silent, 1 for truncated logs, 2 for a full dump")
flags.Int(KeyProcessCacheSize, 65536, "Size of the process cache")
flags.Int(KeyDataCacheSize, 1024, "Size of the data events cache")
flags.Duration(KeyProcessCacheGcInterval, defaults.DefaultProcessCacheGcInterval, "Time between checking the process cache for old entries")
flags.Bool(KeyForceSmallProgs, false, "Force loading small programs, even in kernels with >= 5.3 versions")
flags.Bool(KeyForceLargeProgs, false, "Force loading large programs, even in kernels with < 5.3 versions")
flags.String(KeyExportFilename, "", "Filename for JSON export. Disabled by default")
Expand Down
10 changes: 3 additions & 7 deletions pkg/process/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@ var colorStr = map[int]string{
deleted: "deleted",
}

// garbage collection run interval
const (
intervalGC = time.Second * 30
)

func (pc *Cache) cacheGarbageCollector() {
func (pc *Cache) cacheGarbageCollector(intervalGC time.Duration) {
ticker := time.NewTicker(intervalGC)
pc.deleteChan = make(chan *ProcessInternal)
pc.stopChan = make(chan bool)
Expand Down Expand Up @@ -147,6 +142,7 @@ func (pc *Cache) purge() {

func NewCache(
processCacheSize int,
gcInterval time.Duration,
) (*Cache, error) {
lruCache, err := lru.NewWithEvict(
processCacheSize,
Expand All @@ -161,7 +157,7 @@ func NewCache(
cache: lruCache,
size: processCacheSize,
}
pm.cacheGarbageCollector()
pm.cacheGarbageCollector(gcInterval)
return pm, nil
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/process/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ import (
"testing"

"github.com/cilium/tetragon/api/v1/tetragon"
"github.com/cilium/tetragon/pkg/defaults"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/types/known/wrapperspb"
)

func TestProcessCache(t *testing.T) {
// add a process to the cache.
cache, err := NewCache(10)
cache, err := NewCache(10, defaults.DefaultProcessCacheGcInterval)
require.NoError(t, err)
pid := wrapperspb.UInt32Value{Value: 1234}
execID := "process1"
Expand Down
5 changes: 3 additions & 2 deletions pkg/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"
"sync"
"sync/atomic"
"time"

"github.com/cilium/tetragon/pkg/cgidmap"
"github.com/cilium/tetragon/pkg/fieldfilters"
Expand Down Expand Up @@ -73,15 +74,15 @@ var (
ErrProcessInfoMissing = errors.New("failed process info missing")
)

func InitCache(w watcher.K8sResourceWatcher, size int) error {
func InitCache(w watcher.K8sResourceWatcher, size int, gcInterval time.Duration) error {
var err error

if procCache != nil {
FreeCache()
}

k8s = w
procCache, err = NewCache(size)
procCache, err = NewCache(size, gcInterval)
if err != nil {
k8s = nil
}
Expand Down

0 comments on commit 3391ef3

Please sign in to comment.