Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
949f369
feat: foundation for ContainerProfileCache unification (steps 1, 2, 5…
matthyx Apr 22, 2026
3d872aa
feat: ContainerProfileCacheImpl + projection + shared-pointer fast-pa…
matthyx Apr 22, 2026
adae8dc
feat: ContainerProfileCache reconciler with evict + refresh (step 5)
matthyx Apr 22, 2026
077378a
feat: profilehelper CP->legacy-shape shims + ContainerProfileCache ag…
matthyx Apr 22, 2026
9f5facb
refactor: migrate 20 CEL call sites to GetContainerProfile (step 6b)
matthyx Apr 22, 2026
c474c95
refactor: delete profilehelper shims + migrate rule_manager + creator…
matthyx Apr 22, 2026
46c424e
refactor: ObjectCache aggregator CP-only + collapse 2 callbacks to 1 …
matthyx Apr 22, 2026
71167cf
refactor: delete legacy AP/NN cache packages + move callstackcache (s…
matthyx Apr 22, 2026
bd6411f
test: add T2 init-eviction, T5 packages-deleted, T7 lock-stress (step…
matthyx Apr 22, 2026
c2966c0
fix: address Phase 4 review P1 findings
matthyx Apr 22, 2026
9f2d831
fix: retry pending ContainerProfile GETs when CP appears after contai…
matthyx Apr 22, 2026
314d93c
fix: cache correctness — right CP slug, partial-on-restart, overlay r…
matthyx Apr 22, 2026
d27be01
fix: read workload-level AP/NN as primary data source
matthyx Apr 22, 2026
ce32919
debug: add tick-loop start log + change-detection log in reconciler
matthyx Apr 22, 2026
c45803f
fix: remove overly-aggressive pending GC that dropped entries before …
matthyx Apr 22, 2026
32a76c0
fix: merge user-managed AP/NN and refresh workload-level sources
matthyx Apr 22, 2026
d9ae0ac
fix: reconcileOnce no longer evicts on pod-cache lag, only on Terminated
matthyx Apr 22, 2026
dfb88ae
refactor: drop workload-level AP/NN fetch; CP-direct reading is autho…
matthyx Apr 22, 2026
8cd9b16
fix: synthetic entry CPName override, PodUID backfill, phase-labeled …
matthyx Apr 22, 2026
74d4652
fix: address all CodeRabbit review comments on PR #788
matthyx Apr 22, 2026
ee81dc3
feat: thread context.Context through ProfileClient and add per-call R…
matthyx Apr 22, 2026
16ecd38
test: shared-pointer race-fuzz test + WarmContainerLocksForTest helper
matthyx Apr 22, 2026
42284f5
docs: document SetApplicationProfile / SetNetworkNeighborhood field p…
matthyx Apr 22, 2026
2f88029
refactor: T8 integration mirror, mock setter contract doc, SeedEntryW…
matthyx Apr 22, 2026
db5c58c
fix: address Phase 4 code-review findings
matthyx Apr 22, 2026
3909a3b
fix: preserve cached entry when overlay AP/NN fetch fails transiently
matthyx Apr 22, 2026
ff0d1ff
fix: address CodeRabbit review issues on PR #788
matthyx Apr 23, 2026
067baa4
fix: distinct RNG seed per stress-test worker
matthyx Apr 23, 2026
45499f9
refactor: move test helpers out of production source into testing.go
matthyx Apr 23, 2026
faa83de
refactor: move integration tests into package dir; use export_test.go
matthyx Apr 23, 2026
936bce8
fix: nil out overlay pointers when k8s client returns zero-value on 404
matthyx Apr 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ TAG?=test
binary:
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o $(BINARY_NAME) ./cmd/main.go

.PHONY: check-legacy-packages
check-legacy-packages:
go test ./tests/containerprofilecache -run TestLegacyPackagesDeleted

docker-build-only:
docker buildx build --platform linux/amd64 -t $(IMAGE):$(TAG) -f $(DOCKERFILE_PATH) --load .

Expand Down
18 changes: 7 additions & 11 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,9 @@ import (
"github.com/kubescape/node-agent/pkg/nodeprofilemanager"
nodeprofilemanagerv1 "github.com/kubescape/node-agent/pkg/nodeprofilemanager/v1"
"github.com/kubescape/node-agent/pkg/objectcache"
"github.com/kubescape/node-agent/pkg/objectcache/applicationprofilecache"
"github.com/kubescape/node-agent/pkg/objectcache/containerprofilecache"
"github.com/kubescape/node-agent/pkg/objectcache/dnscache"
"github.com/kubescape/node-agent/pkg/objectcache/k8scache"
"github.com/kubescape/node-agent/pkg/objectcache/networkneighborhoodcache"
objectcachev1 "github.com/kubescape/node-agent/pkg/objectcache/v1"
"github.com/kubescape/node-agent/pkg/processtree"
containerprocesstree "github.com/kubescape/node-agent/pkg/processtree/container"
Expand Down Expand Up @@ -297,16 +296,14 @@ func main() {
ruleBindingNotify = make(chan rulebinding.RuleBindingNotify, 100)
ruleBindingCache.AddNotifier(&ruleBindingNotify)

apc := applicationprofilecache.NewApplicationProfileCache(cfg, storageClient, k8sObjectCache)
apc.Start(ctx)

nnc := networkneighborhoodcache.NewNetworkNeighborhoodCache(cfg, storageClient, k8sObjectCache)
nnc.Start(ctx)
cpc := containerprofilecache.NewContainerProfileCache(cfg, storageClient, k8sObjectCache, prometheusExporter)
cpc.Start(ctx)
logger.L().Info("ContainerProfileCache active; legacy AP/NN caches removed")

dc := dnscache.NewDnsCache(dnsResolver)

// create object cache
objCache = objectcachev1.NewObjectCache(k8sObjectCache, apc, nnc, dc)
objCache = objectcachev1.NewObjectCache(k8sObjectCache, cpc, dc)

ruleCooldown := rulecooldown.NewRuleCooldown(cfg.RuleCoolDown)

Expand All @@ -328,10 +325,9 @@ func main() {

} else {
ruleManager = rulemanager.CreateRuleManagerMock()
apc := &objectcache.ApplicationProfileCacheMock{}
nnc := &objectcache.NetworkNeighborhoodCacheMock{}
cpc := &objectcache.ContainerProfileCacheMock{}
dc := &objectcache.DnsCacheMock{}
objCache = objectcachev1.NewObjectCache(k8sObjectCache, apc, nnc, dc)
objCache = objectcachev1.NewObjectCache(k8sObjectCache, cpc, dc)
ruleBindingNotify = make(chan rulebinding.RuleBindingNotify, 1)
}

Expand Down
1 change: 1 addition & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ type Config struct {
ProcfsPidScanInterval time.Duration `mapstructure:"procfsPidScanInterval"`
ProcfsScanInterval time.Duration `mapstructure:"procfsScanInterval"`
ProfilesCacheRefreshRate time.Duration `mapstructure:"profilesCacheRefreshRate"`
StorageRPCBudget time.Duration `mapstructure:"storageRPCBudget"`
RuleCoolDown rulecooldown.RuleCooldownConfig `mapstructure:"ruleCooldown"`
TestMode bool `mapstructure:"testMode"`
UpdateDataPeriod time.Duration `mapstructure:"updateDataPeriod"`
Expand Down
3 changes: 1 addition & 2 deletions pkg/containerwatcher/v2/container_watcher_collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ func (cw *ContainerWatcher) StartContainerCollection(ctx context.Context) error
cw.containerCallbackAsync,
cw.containerProcessTree.ContainerCallback,
cw.containerProfileManager.ContainerCallback,
cw.objectCache.ApplicationProfileCache().ContainerCallback,
cw.objectCache.NetworkNeighborhoodCache().ContainerCallback,
cw.objectCache.ContainerProfileCache().ContainerCallback,
cw.malwareManager.ContainerCallback,
cw.ruleManager.ContainerCallback,
cw.sbomManager.ContainerCallback,
Expand Down
5 changes: 5 additions & 0 deletions pkg/metricsmanager/metrics_manager_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ type MetricsManager interface {
ReportContainerStart()
ReportContainerStop()
ReportDedupEvent(eventType utils.EventType, duplicate bool)
ReportContainerProfileLegacyLoad(kind, completeness string)
SetContainerProfileCacheEntries(kind string, count float64)
ReportContainerProfileCacheHit(hit bool)
ReportContainerProfileReconcilerDuration(phase string, duration time.Duration)
ReportContainerProfileReconcilerEviction(reason string)
}
7 changes: 6 additions & 1 deletion pkg/metricsmanager/metrics_manager_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,9 @@ func (m *MetricsMock) ReportContainerStart() {}

func (m *MetricsMock) ReportContainerStop() {}

func (m *MetricsMock) ReportDedupEvent(eventType utils.EventType, duplicate bool) {}
func (m *MetricsMock) ReportDedupEvent(eventType utils.EventType, duplicate bool) {}
func (m *MetricsMock) ReportContainerProfileLegacyLoad(_, _ string) {}
func (m *MetricsMock) SetContainerProfileCacheEntries(_ string, _ float64) {}
func (m *MetricsMock) ReportContainerProfileCacheHit(_ bool) {}
func (m *MetricsMock) ReportContainerProfileReconcilerDuration(_ string, _ time.Duration) {}
func (m *MetricsMock) ReportContainerProfileReconcilerEviction(_ string) {}
5 changes: 5 additions & 0 deletions pkg/metricsmanager/metrics_manager_noop.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ func (m *MetricsNoop) ReportRuleEvaluationTime(_ string, _ utils.EventType, _ ti
func (m *MetricsNoop) ReportContainerStart() {}
func (m *MetricsNoop) ReportContainerStop() {}
func (m *MetricsNoop) ReportDedupEvent(_ utils.EventType, _ bool) {}
func (m *MetricsNoop) ReportContainerProfileLegacyLoad(_, _ string) {}
func (m *MetricsNoop) SetContainerProfileCacheEntries(_ string, _ float64) {}
func (m *MetricsNoop) ReportContainerProfileCacheHit(_ bool) {}
func (m *MetricsNoop) ReportContainerProfileReconcilerDuration(_ string, _ time.Duration) {}
func (m *MetricsNoop) ReportContainerProfileReconcilerEviction(_ string) {}
59 changes: 59 additions & 0 deletions pkg/metricsmanager/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ type PrometheusMetric struct {
// Dedup metrics
dedupEventCounter *prometheus.CounterVec

// ContainerProfile cache metrics
cpCacheLegacyLoadsCounter *prometheus.CounterVec
cpCacheEntriesGauge *prometheus.GaugeVec
cpCacheHitCounter *prometheus.CounterVec
cpReconcilerDurationHistogram *prometheus.HistogramVec
cpReconcilerEvictionsCounter *prometheus.CounterVec

// Cache to avoid allocating Labels maps on every call
ruleCounterCache map[string]prometheus.Counter
rulePrefilteredCounterCache map[string]prometheus.Counter
Expand Down Expand Up @@ -215,6 +222,29 @@ func NewPrometheusMetric() *PrometheusMetric {
Help: "Total number of events processed by the dedup layer",
}, []string{eventTypeLabel, "result"}),

// ContainerProfile cache metrics
cpCacheLegacyLoadsCounter: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "node_agent_user_profile_legacy_loads_total",
Help: "Number of times a user-authored legacy ApplicationProfile or NetworkNeighborhood was loaded into the ContainerProfileCache; will be removed in a future release.",
}, []string{"kind", "completeness"}),
cpCacheEntriesGauge: promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "node_agent_containerprofile_cache_entries",
Help: "Current number of cached ContainerProfile entries per kind.",
}, []string{"kind"}),
cpCacheHitCounter: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "node_agent_containerprofile_cache_hit_total",
Help: "Total number of ContainerProfile cache lookups by result.",
}, []string{"result"}),
cpReconcilerDurationHistogram: promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "node_agent_containerprofile_reconciler_duration_seconds",
Help: "Duration of ContainerProfile reconciler phases in seconds.",
Buckets: prometheus.DefBuckets,
}, []string{"phase"}),
cpReconcilerEvictionsCounter: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "node_agent_containerprofile_reconciler_evictions_total",
Help: "Total number of ContainerProfile cache evictions by reason.",
}, []string{"reason"}),
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Initialize counter caches
ruleCounterCache: make(map[string]prometheus.Counter),
rulePrefilteredCounterCache: make(map[string]prometheus.Counter),
Expand Down Expand Up @@ -256,6 +286,11 @@ func (p *PrometheusMetric) Destroy() {
prometheus.Unregister(p.containerStartCounter)
prometheus.Unregister(p.containerStopCounter)
prometheus.Unregister(p.dedupEventCounter)
prometheus.Unregister(p.cpCacheLegacyLoadsCounter)
prometheus.Unregister(p.cpCacheEntriesGauge)
prometheus.Unregister(p.cpCacheHitCounter)
prometheus.Unregister(p.cpReconcilerDurationHistogram)
prometheus.Unregister(p.cpReconcilerEvictionsCounter)
// Unregister program ID metrics
prometheus.Unregister(p.programRuntimeGauge)
prometheus.Unregister(p.programRunCountGauge)
Expand Down Expand Up @@ -432,3 +467,27 @@ func (p *PrometheusMetric) ReportDedupEvent(eventType utils.EventType, duplicate
}
p.dedupEventCounter.WithLabelValues(string(eventType), result).Inc()
}

func (p *PrometheusMetric) ReportContainerProfileLegacyLoad(kind, completeness string) {
p.cpCacheLegacyLoadsCounter.WithLabelValues(kind, completeness).Inc()
}

func (p *PrometheusMetric) SetContainerProfileCacheEntries(kind string, count float64) {
p.cpCacheEntriesGauge.WithLabelValues(kind).Set(count)
}

func (p *PrometheusMetric) ReportContainerProfileCacheHit(hit bool) {
result := "hit"
if !hit {
result = "miss"
}
p.cpCacheHitCounter.WithLabelValues(result).Inc()
}

func (p *PrometheusMetric) ReportContainerProfileReconcilerDuration(phase string, duration time.Duration) {
p.cpReconcilerDurationHistogram.WithLabelValues(phase).Observe(duration.Seconds())
}

func (p *PrometheusMetric) ReportContainerProfileReconcilerEviction(reason string) {
p.cpReconcilerEvictionsCounter.WithLabelValues(reason).Inc()
}
Loading
Loading