From 09bbd7dd83c53797410e22ed9089e0eee0bb8c00 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 27 Feb 2026 16:17:52 -0500 Subject: [PATCH 1/7] Dynamic add/remove of PID selection --- docs/config-schema.json | 2 +- pkg/appolly/discover/finder.go | 14 ++- pkg/appolly/discover/matcher.go | 26 +++++- pkg/appolly/discover/matcher_test.go | 73 +++++++++++++--- pkg/appolly/discover/typer_test.go | 2 + pkg/appolly/discover/watcher_kube_test.go | 2 +- pkg/appolly/discover/watcher_proc.go | 6 +- pkg/appolly/discover/watcher_proc_test.go | 4 +- pkg/appolly/instrumenter.go | 8 +- pkg/appolly/services/attr_glob.go | 46 +++++++++- pkg/appolly/services/attr_glob_test.go | 64 ++++++++++++++ pkg/appolly/services/attr_regex.go | 2 + pkg/appolly/services/criteria.go | 4 + pkg/instrumenter/instrumenter.go | 4 + pkg/instrumenter/instrumenter_test.go | 102 ++++++++++++++++++++++ pkg/instrumenter/opts.go | 20 +++++ pkg/internal/appolly/appolly.go | 56 ++++++++++-- pkg/internal/appolly/appolly_test.go | 34 ++++++++ pkg/pipe/global/context.go | 4 + 19 files changed, 441 insertions(+), 32 deletions(-) create mode 100644 pkg/appolly/services/attr_glob_test.go diff --git a/docs/config-schema.json b/docs/config-schema.json index ee47e3d76e..cee35d41f9 100644 --- a/docs/config-schema.json +++ b/docs/config-schema.json @@ -671,7 +671,7 @@ "type": "integer" }, "type": "array", - "description": "PIDs allows selecting processes by PID. When non-empty, the process PID must be in this list (in addition to any path/port criteria).", + "description": "PIDs allows selecting processes by PID. When non-empty, the process PID must be in this list (in addition to any path/port criteria). Updated at runtime via AddPIDs/RemovePIDs; pidsMu protects concurrent access.", "x-env-var": "OTEL_EBPF_TARGET_PID" } }, diff --git a/pkg/appolly/discover/finder.go b/pkg/appolly/discover/finder.go index 9d37deac2c..8415b83cc7 100644 --- a/pkg/appolly/discover/finder.go +++ b/pkg/appolly/discover/finder.go @@ -8,6 +8,7 @@ import ( "fmt" "go.opentelemetry.io/obi/pkg/appolly/app/request" + "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/ebpf" ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" "go.opentelemetry.io/obi/pkg/export/imetrics" @@ -42,6 +43,7 @@ func NewProcessFinder( type processFinderStartConfig struct { enrichedProcessEvents *msg.Queue[[]Event[ProcessAttrs]] + pidSelector services.Selector } // ProcessFinderStartOpt allows overriding some internal behavior of ProcessFinder.Start method. @@ -57,6 +59,14 @@ func WithEnrichedProcessEvents(enrichedProcessEvents *msg.Queue[[]Event[ProcessA } } +// WithPIDSelector supplies an additional selector for PID-based discovery. When non-nil, it is prepended +// to the criteria from config (e.g. Instrumenter passes a GlobAttributes; use AddPIDs/RemovePIDs for runtime updates). +func WithPIDSelector(selector services.Selector) ProcessFinderStartOpt { + return func(cfg *processFinderStartConfig) { + cfg.pidSelector = selector + } +} + // Start the ProcessFinder pipeline in background. It returns a channel where each new discovered // ebpf.ProcessTracer will be notified. func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOpt) (<-chan Event[*ebpf.Instrumentable], error) { @@ -70,7 +80,7 @@ func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOp swi := swarm.Instancer{} processEvents := msgh.QueueFromConfig[[]Event[ProcessAttrs]](pf.cfg, "processEvents") - swi.Add(swarm.DirectInstance(ProcessWatcherFunc(pf.cfg, pf.ebpfEventContext, processEvents)), + swi.Add(swarm.DirectInstance(ProcessWatcherFunc(pf.cfg, pf.ebpfEventContext, processEvents, startConfig.pidSelector)), swarm.WithID("ProcessWatcher")) kubeEnrichedEvents := msgh.QueueFromConfig[[]Event[ProcessAttrs]](pf.cfg, "kubeEnrichedEvents") @@ -96,7 +106,7 @@ func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOp ), swarm.WithID("LanguageDecoratorProvider")) criteriaFilteredEvents := msgh.QueueFromConfig[[]Event[ProcessMatch]](pf.cfg, "criteriaFilteredEvents") - swi.Add(criteriaMatcherProvider(pf.cfg, langEnrichedEvents, criteriaFilteredEvents), + swi.Add(criteriaMatcherProvider(pf.cfg, langEnrichedEvents, criteriaFilteredEvents, startConfig.pidSelector), swarm.WithID("CriteriaMatcher")) executableTypes := msgh.QueueFromConfig[[]Event[ebpf.Instrumentable]](pf.cfg, "executableTypes") diff --git a/pkg/appolly/discover/matcher.go b/pkg/appolly/discover/matcher.go index fc44318427..87badd0679 100644 --- a/pkg/appolly/discover/matcher.go +++ b/pkg/appolly/discover/matcher.go @@ -34,11 +34,12 @@ func criteriaMatcherProvider( cfg *obi.Config, input *msg.Queue[[]Event[ProcessAttrs]], output *msg.Queue[[]Event[ProcessMatch]], + pidSelector services.Selector, ) swarm.InstanceFunc { instrumenterNamespace, _ := namespaceFetcherFunc(app.PID(osPidFunc())) m := &Matcher{ Log: slog.With("component", "discover.CriteriaMatcher"), - Criteria: FindingCriteria(cfg), + Criteria: FindingCriteria(cfg, pidSelector), ExcludeCriteria: ExcludingCriteria(cfg), LogEnricherCriteria: LogEnricherFindingCriteria(cfg), ProcessHistory: map[app.PID]ProcessMatch{}, @@ -372,9 +373,30 @@ func LogEnricherFindingCriteria(cfg *obi.Config) []services.Selector { return selectors } -func FindingCriteria(cfg *obi.Config) []services.Selector { +// FindingCriteria returns discovery criteria from config. When pidSelector is non-nil +// and has PIDs (e.g. Instrumenter's GlobAttributes with target_pids or runtime AddPIDs), +// it is the only criterion: config target PIDs are prefilled into it and it is returned alone. +// When pidSelector is nil or has no PIDs (standalone config with discovery.instrument, no target_pids), +// config-based criteria (discovery.instrument, etc.) are used so processes are still discovered. +func FindingCriteria(cfg *obi.Config, pidSelector services.Selector) []services.Selector { logDeprecationAndConflicts(cfg) + if pidSelector != nil { + if cfg.TargetPIDs.Len() > 0 { + vals := cfg.TargetPIDs.AllValues() + pids := make([]uint32, 0, len(vals)) + for _, v := range vals { + pids = append(pids, uint32(v)) + } + pidSelector.AddPIDs(pids...) + } + // Use pidSelector as the only criterion only when it has effective PID criteria. + // Otherwise (e.g. Instrumenter started with no target_pids) use config-based discovery. + if pids, ok := pidSelector.GetPIDs(); ok && len(pids) > 0 { + return []services.Selector{pidSelector} + } + } + if cfg.TargetPIDs.Len() > 0 { vals := cfg.TargetPIDs.AllValues() pids := make([]uint32, 0, len(vals)) diff --git a/pkg/appolly/discover/matcher_test.go b/pkg/appolly/discover/matcher_test.go index 9027166080..f6a3d7de00 100644 --- a/pkg/appolly/discover/matcher_test.go +++ b/pkg/appolly/discover/matcher_test.go @@ -5,6 +5,7 @@ package discover import ( "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -51,7 +52,7 @@ func TestCriteriaMatcher(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -102,7 +103,7 @@ func TestCriteriaMatcherLanguage(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -152,7 +153,7 @@ func TestCriteriaMatcher_Exclude(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -193,7 +194,7 @@ func TestCriteriaMatcher_Exclude_Metadata(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -241,7 +242,7 @@ func TestCriteriaMatcher_MustMatchAllAttributes(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -296,7 +297,7 @@ func TestCriteriaMatcherMissingPort(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -355,7 +356,7 @@ func TestCriteriaMatcherContainersOnly(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -494,7 +495,7 @@ func TestInstrumentation_CoexistingWithDeprecatedServices(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&tc.cfg, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&tc.cfg, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -529,7 +530,7 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -560,7 +561,7 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -586,6 +587,56 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { }) } +func TestCriteriaMatcher_DynamicTargetPIDs(t *testing.T) { + // Use a GlobAttributes as pidSelector and update its PIDs at runtime; matcher should see updated list. + pipeConfig := obi.Config{ServiceName: "dyn-svc", ServiceNamespace: "ns"} + pidSelector := &services.GlobAttributes{ + Name: "dyn-svc", + Namespace: "ns", + PIDs: []uint32{42}, + } + + discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) + filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) + filteredProcesses := filteredProcessesQu.Subscribe() + // Set processInfo before starting the matcher so it uses the fake; otherwise the matcher + // may call the default (real) processInfo for PID 42 and fail when that process does not exist. + processInfo = func(pp ProcessAttrs) (*services.ProcessInfo, error) { + return &services.ProcessInfo{Pid: pp.pid, ExePath: "/any/exe", OpenPorts: pp.openPorts}, nil + } + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, pidSelector)(t.Context()) + require.NoError(t, err) + go matcherFunc(t.Context()) + defer filteredProcessesQu.Close() + + // First batch: only PID 42 is in selector, so only 42 matches + discoveredProcesses.Send([]Event[ProcessAttrs]{ + {Type: EventCreated, Obj: ProcessAttrs{pid: 42, openPorts: []uint32{}}}, + {Type: EventCreated, Obj: ProcessAttrs{pid: 100, openPorts: []uint32{}}}, + }) + matches := testutil.ReadChannel(t, filteredProcesses, testTimeout) + require.Len(t, matches, 1) + assert.Equal(t, app.PID(42), matches[0].Obj.Process.Pid) + + // Add PID 100 at runtime (e.g. AddTargetPIDs(100)) + pidSelector.AddPIDs(100) + + // Second batch: 100 is now in selector, so 100 matches + discoveredProcesses.Send([]Event[ProcessAttrs]{ + {Type: EventCreated, Obj: ProcessAttrs{pid: 100, openPorts: []uint32{}}}, + }) + matches = testutil.ReadChannel(t, filteredProcesses, testTimeout) + require.Len(t, matches, 1) + assert.Equal(t, app.PID(100), matches[0].Obj.Process.Pid) + + // Remove 100; only 42 remains in selector. Sending 100 again should not match (no event). + pidSelector.RemovePIDs(100) + discoveredProcesses.Send([]Event[ProcessAttrs]{ + {Type: EventCreated, Obj: ProcessAttrs{pid: 100, openPorts: []uint32{}}}, + }) + testutil.ChannelEmpty(t, filteredProcesses, 100*time.Millisecond) +} + func TestCriteriaMatcher_Granular(t *testing.T) { pipeConfig := obi.Config{} @@ -610,7 +661,7 @@ func TestCriteriaMatcher_Granular(t *testing.T) { filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) require.NoError(t, err) diff --git a/pkg/appolly/discover/typer_test.go b/pkg/appolly/discover/typer_test.go index 6241a87b76..e7d23bb6bd 100644 --- a/pkg/appolly/discover/typer_test.go +++ b/pkg/appolly/discover/typer_test.go @@ -37,6 +37,8 @@ func (d dummyCriterion) IsContainersOnly() bool func (d dummyCriterion) GetPathRegexp() services.StringMatcher { return nil } func (d dummyCriterion) GetCmdArgs() services.StringMatcher { return nil } func (d dummyCriterion) GetPIDs() ([]app.PID, bool) { return nil, false } +func (d dummyCriterion) AddPIDs(_ ...uint32) {} +func (d dummyCriterion) RemovePIDs(_ ...uint32) {} func (d dummyCriterion) GetNamespace() string { return d.namespace } func (d dummyCriterion) GetExportModes() services.ExportModes { return d.export } func (d dummyCriterion) GetSamplerConfig() *services.SamplerConfig { return d.sampler } diff --git a/pkg/appolly/discover/watcher_kube_test.go b/pkg/appolly/discover/watcher_kube_test.go index ac1752d72a..d607882b60 100644 --- a/pkg/appolly/discover/watcher_kube_test.go +++ b/pkg/appolly/discover/watcher_kube_test.go @@ -164,7 +164,7 @@ func TestWatcherKubeEnricherWithMatcher(t *testing.T) { version: "v[0-9]*" `), &pipeConfig)) - swi.Add(criteriaMatcherProvider(&pipeConfig, connectQueue, outputQueue)) + swi.Add(criteriaMatcherProvider(&pipeConfig, connectQueue, outputQueue, nil)) nodesRunner, err := swi.Instance(t.Context()) require.NoError(t, err) diff --git a/pkg/appolly/discover/watcher_proc.go b/pkg/appolly/discover/watcher_proc.go index 34d2ee869e..271c152842 100644 --- a/pkg/appolly/discover/watcher_proc.go +++ b/pkg/appolly/discover/watcher_proc.go @@ -67,8 +67,8 @@ func wplog() *slog.Logger { } // ProcessWatcherFunc polls every PollInterval for new processes and forwards either new or deleted process PIDs -// as well as PIDs from processes that setup a new connection -func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContext, output *msg.Queue[[]Event[ProcessAttrs]]) swarm.RunFunc { +// as well as PIDs from processes that setup a new connection. +func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContext, output *msg.Queue[[]Event[ProcessAttrs]], pidSelector services.Selector) swarm.RunFunc { acc := pollAccounter{ cfg: cfg, output: output, @@ -82,7 +82,7 @@ func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContex fetchPorts: true, // must be true until we've activated the bpf watcher component bpfWatcherEnabled: false, // async set by listening on the bpfWatchEvents channel stateMux: sync.Mutex{}, - findingCriteria: FindingCriteria(cfg), + findingCriteria: FindingCriteria(cfg, pidSelector), ebpfContext: ebpfContext, } if acc.interval == 0 { diff --git a/pkg/appolly/discover/watcher_proc_test.go b/pkg/appolly/discover/watcher_proc_test.go index 9577e10cbc..eb92584953 100644 --- a/pkg/appolly/discover/watcher_proc_test.go +++ b/pkg/appolly/discover/watcher_proc_test.go @@ -202,7 +202,7 @@ func TestPortsFetchRequired(t *testing.T) { stateMux: sync.Mutex{}, bpfWatcherEnabled: false, fetchPorts: true, - findingCriteria: FindingCriteria(cfg), + findingCriteria: FindingCriteria(cfg, nil), output: msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(1)), } @@ -300,7 +300,7 @@ func TestMinProcessAge(t *testing.T) { stateMux: sync.Mutex{}, bpfWatcherEnabled: false, fetchPorts: true, - findingCriteria: FindingCriteria(cfg), + findingCriteria: FindingCriteria(cfg, nil), output: msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(1)), } diff --git a/pkg/appolly/instrumenter.go b/pkg/appolly/instrumenter.go index 1c4427a7d7..0543f2b3e5 100644 --- a/pkg/appolly/instrumenter.go +++ b/pkg/appolly/instrumenter.go @@ -251,11 +251,11 @@ func spanPtrPromGetters(cfg *obi.Config) attributes.NamedGetters[request.Span, s // Then they would be used or not for each service, based on the per-service features-. func joinMetricsConfig(cfg *obi.Config) *perapp.MetricsConfig { mc := cfg.Metrics - for _, d := range cfg.Discovery.Instrument { - mc.Features |= d.Metrics.Features + for i := range cfg.Discovery.Instrument { + mc.Features |= cfg.Discovery.Instrument[i].Metrics.Features } - for _, d := range cfg.Discovery.Services { - mc.Features |= d.Metrics.Features + for i := range cfg.Discovery.Services { + mc.Features |= cfg.Discovery.Services[i].Metrics.Features } return &mc } diff --git a/pkg/appolly/services/attr_glob.go b/pkg/appolly/services/attr_glob.go index 5f2242a88c..346c0a0c92 100644 --- a/pkg/appolly/services/attr_glob.go +++ b/pkg/appolly/services/attr_glob.go @@ -6,6 +6,7 @@ package services // import "go.opentelemetry.io/obi/pkg/appolly/services" import ( "fmt" "iter" + "sync" "github.com/gobwas/glob" "github.com/invopop/jsonschema" @@ -92,7 +93,9 @@ type GlobAttributes struct { Languages GlobAttr `yaml:"languages"` // PIDs allows selecting processes by PID. When non-empty, the process PID must be in this list (in addition to any path/port criteria). - PIDs []uint32 `yaml:"target_pids"` + // Updated at runtime via AddPIDs/RemovePIDs; pidsMu protects concurrent access. + PIDs []uint32 `yaml:"target_pids"` + pidsMu sync.RWMutex // Path allows defining the regular expression matching the full executable path. Path GlobAttr `yaml:"exe_path"` @@ -240,6 +243,8 @@ func (ga *GlobAttributes) GetSamplerConfig() *SamplerConfig { return ga.SamplerC func (ga *GlobAttributes) GetRoutesConfig() *CustomRoutesConfig { return ga.Routes } func (ga *GlobAttributes) pids() ([]app.PID, bool) { + ga.pidsMu.RLock() + defer ga.pidsMu.RUnlock() if len(ga.PIDs) == 0 { return nil, false } @@ -250,6 +255,45 @@ func (ga *GlobAttributes) pids() ([]app.PID, bool) { return out, true } +// AddPIDs adds PIDs to the selector's list (for runtime add; thread-safe with GetPIDs). +func (ga *GlobAttributes) AddPIDs(pids ...uint32) { + if len(pids) == 0 { + return + } + ga.pidsMu.Lock() + defer ga.pidsMu.Unlock() + existing := make(map[uint32]struct{}, len(ga.PIDs)) + for _, p := range ga.PIDs { + existing[p] = struct{}{} + } + for _, u := range pids { + if _, ok := existing[u]; !ok { + existing[u] = struct{}{} + ga.PIDs = append(ga.PIDs, u) + } + } +} + +// RemovePIDs removes PIDs from the selector's list (for runtime remove; thread-safe with GetPIDs). +func (ga *GlobAttributes) RemovePIDs(pids ...uint32) { + if len(pids) == 0 { + return + } + toRemove := make(map[uint32]struct{}) + for _, u := range pids { + toRemove[u] = struct{}{} + } + ga.pidsMu.Lock() + defer ga.pidsMu.Unlock() + newPids := ga.PIDs[:0] + for _, p := range ga.PIDs { + if _, remove := toRemove[p]; !remove { + newPids = append(newPids, p) + } + } + ga.PIDs = newPids +} + type nilMatcher struct{} func (n nilMatcher) IsSet() bool { return false } diff --git a/pkg/appolly/services/attr_glob_test.go b/pkg/appolly/services/attr_glob_test.go new file mode 100644 index 0000000000..d4585c1d1c --- /dev/null +++ b/pkg/appolly/services/attr_glob_test.go @@ -0,0 +1,64 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package services + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/obi/pkg/appolly/app" +) + +func TestGlobAttributes_AddPIDs_RemovePIDs_GetPIDs(t *testing.T) { + ga := &GlobAttributes{Name: "svc", Namespace: "ns"} + + // Initially empty + pids, ok := ga.GetPIDs() + assert.False(t, ok) + assert.Nil(t, pids) + + // Add PIDs + ga.AddPIDs(1, 2, 3) + pids, ok = ga.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 2, 3}, pids) + + // Add duplicates and new: no duplicates added + ga.AddPIDs(2, 3, 4) + pids, ok = ga.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 2, 3, 4}, pids) + + // Remove PIDs + ga.RemovePIDs(2, 4) + pids, ok = ga.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 3}, pids) + + // Remove all + ga.RemovePIDs(1, 3) + pids, ok = ga.GetPIDs() + assert.False(t, ok) + assert.Nil(t, pids) + + // Add after empty + ga.AddPIDs(42) + pids, ok = ga.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{42}, pids) +} + +func TestGlobAttributes_AddPIDs_RemovePIDs_emptyNoOp(t *testing.T) { + ga := &GlobAttributes{PIDs: []uint32{1, 2, 3}} + + // No-op when called with no args + ga.AddPIDs() + ga.RemovePIDs() + + pids, ok := ga.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 2, 3}, pids) +} diff --git a/pkg/appolly/services/attr_regex.go b/pkg/appolly/services/attr_regex.go index 1136280d51..df6c5cdc74 100644 --- a/pkg/appolly/services/attr_regex.go +++ b/pkg/appolly/services/attr_regex.go @@ -202,6 +202,8 @@ func (a *RegexSelector) GetPathRegexp() StringMatcher { return &a.Path func (a *RegexSelector) GetOpenPorts() *IntEnum { return &a.OpenPorts } func (a *RegexSelector) GetPIDs() ([]app.PID, bool) { return a.pids() } func (a *RegexSelector) GetCmdArgs() StringMatcher { return &a.CmdArgs } +func (a *RegexSelector) AddPIDs(_ ...uint32) {} +func (a *RegexSelector) RemovePIDs(_ ...uint32) {} func (a *RegexSelector) IsContainersOnly() bool { return a.ContainersOnly } func (a *RegexSelector) MetricsConfig() perapp.SvcMetricsConfig { return a.Metrics } func (a *RegexSelector) RangeMetadata() iter.Seq2[string, StringMatcher] { diff --git a/pkg/appolly/services/criteria.go b/pkg/appolly/services/criteria.go index 6f036a3e01..85ed09fd9e 100644 --- a/pkg/appolly/services/criteria.go +++ b/pkg/appolly/services/criteria.go @@ -171,6 +171,10 @@ type Selector interface { GetLanguages() StringMatcher // GetPIDs returns the list of target PIDs and true when this selector has PID criteria (analogous to OpenPorts). GetPIDs() ([]app.PID, bool) + // AddPIDs adds PIDs to the selector's list; no-op for selectors that do not support dynamic PIDs. + AddPIDs(pids ...uint32) + // RemovePIDs removes PIDs from the selector's list; no-op for selectors that do not support dynamic PIDs. + RemovePIDs(pids ...uint32) GetCmdArgs() StringMatcher IsContainersOnly() bool RangeMetadata() iter.Seq2[string, StringMatcher] diff --git a/pkg/instrumenter/instrumenter.go b/pkg/instrumenter/instrumenter.go index 7a96d389f5..3548872e9b 100644 --- a/pkg/instrumenter/instrumenter.go +++ b/pkg/instrumenter/instrumenter.go @@ -87,6 +87,10 @@ func setupAppO11y(ctx context.Context, ctxInfo *global.ContextInfo, config *obi. slog.Debug("can't create new instrumenter", "error", err) return fmt.Errorf("can't create new instrumenter: %w", err) } + // Notify callers that requested it (e.g. WithTargetPIDsUpdater); they receive the App O11y instrumenter. + if ctxInfo.AppO11y.OnTargetPIDsUpdaterReady != nil { + ctxInfo.AppO11y.OnTargetPIDsUpdaterReady(instr) + } if err := instr.FindAndInstrument(ctx); err != nil { slog.Debug("can't find target process", "error", err) diff --git a/pkg/instrumenter/instrumenter_test.go b/pkg/instrumenter/instrumenter_test.go index 5ee45820ef..64537933ce 100644 --- a/pkg/instrumenter/instrumenter_test.go +++ b/pkg/instrumenter/instrumenter_test.go @@ -4,11 +4,16 @@ package instrumenter import ( + "context" + "sync" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/obi/pkg/appolly/services" + "go.opentelemetry.io/obi/pkg/export/otel/otelcfg" "go.opentelemetry.io/obi/pkg/obi" "go.opentelemetry.io/obi/pkg/transform" ) @@ -39,3 +44,100 @@ func TestServiceNameTemplate(t *testing.T) { require.NoError(t, err) assert.Nil(t, temp) } + +// TestRun_WithTargetPIDsUpdater_OnlyDynamicPIDSelector verifies that when the instrumenter +// is initialized with a dynamic PID selector (via WithTargetPIDsUpdater), that selector is +// the only discovery criterion—config-based criteria (e.g. discovery.instrument) are not +// used. The selector may be empty by default; callers add/remove PIDs via the updater. +func TestRun_WithTargetPIDsUpdater_OnlyDynamicPIDSelector(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Config has discovery.instrument set, but when using WithTargetPIDsUpdater the + // dynamic PID selector is the sole criterion (this config's Instrument is ignored). + cfg := &obi.Config{ + ChannelBufferLen: 1, + Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost:0"}, + Discovery: services.DiscoveryConfig{ + Instrument: services.GlobDefinitionCriteria{ + services.GlobAttributes{Name: "ignored", OpenPorts: services.IntEnum{Ranges: []services.IntRange{{Start: 8080}}}}, + }, + }, + } + require.True(t, cfg.Enabled(obi.FeatureAppO11y), "test config must enable App O11y") + + var callbackRan bool + opts := []Option{ + WithTargetPIDsUpdater(func(u TargetPIDsUpdater) { + require.NotNil(t, u, "caller should receive non-nil TargetPIDsUpdater when using dynamic PID selector") + // Selector is the only criterion; it may be empty. Caller controls targets via Add/Remove. + callbackRan = true + }), + } + + done := make(chan error, 1) + go func() { done <- Run(ctx, cfg, opts...) }() + + time.Sleep(2 * time.Second) + cancel() + <-done + + require.True(t, callbackRan, "WithTargetPIDsUpdater callback must run; discovery then uses only the dynamic PID selector (even if empty)") +} + +// TestRun_WithTargetPIDsUpdater_DynamicUpdate shows that a caller using the public API +// (Run + WithTargetPIDsUpdater) receives the instrumenter and can dynamically add/remove +// target PIDs at runtime. Run is started with a cancelable context and stopped shortly +// after the callback runs so we don't run the full pipeline. +func TestRun_WithTargetPIDsUpdater_DynamicUpdate(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Minimal config: enable App O11y (discovery.instrument) and at least one exporter (traces endpoint). + cfg := &obi.Config{ + ChannelBufferLen: 1, + Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost:0"}, + Discovery: services.DiscoveryConfig{ + Instrument: services.GlobDefinitionCriteria{ + services.GlobAttributes{Name: "test-svc", OpenPorts: services.IntEnum{Ranges: []services.IntRange{{Start: 8080}}}}, + }, + }, + } + // Ensure Enabled(FeatureAppO11y) is true (Discovery.Instrument is set) + require.True(t, cfg.Enabled(obi.FeatureAppO11y), "test config must enable App O11y") + + var ( + receivedUpdater bool + updaterMu sync.Mutex + ) + opts := []Option{ + WithTargetPIDsUpdater(func(u TargetPIDsUpdater) { + updaterMu.Lock() + defer updaterMu.Unlock() + require.NotNil(t, u, "caller should receive non-nil TargetPIDsUpdater") + // Demonstrate dynamic update: add and remove PIDs at runtime. + u.AddTargetPIDs(42, 100) + u.AddTargetPIDs(42, 200) // duplicate 42, new 200 + u.RemoveTargetPIDs(42) + u.RemoveTargetPIDs(999) // not present, no-op + receivedUpdater = true + }), + } + + done := make(chan error, 1) + go func() { done <- Run(ctx, cfg, opts...) }() + + // Give setupAppO11y time to create the instrumenter and invoke OnTargetPIDsUpdaterReady. + // We don't need to wait for the full pipeline; just for the callback. + time.Sleep(2 * time.Second) + cancel() + + err := <-done + // Run typically returns context.Canceled or similar after cancel. + require.Error(t, err) + + updaterMu.Lock() + got := receivedUpdater + updaterMu.Unlock() + require.True(t, got, "WithTargetPIDsUpdater callback should be invoked with the instrumenter; caller can then add/remove PIDs at runtime") +} diff --git a/pkg/instrumenter/opts.go b/pkg/instrumenter/opts.go index c3ff2b8876..492f1055d9 100644 --- a/pkg/instrumenter/opts.go +++ b/pkg/instrumenter/opts.go @@ -12,6 +12,26 @@ import ( // Option that override the instantiation of the instrumenter type Option func(info *global.ContextInfo) +// TargetPIDsUpdater is implemented by the default App O11y instrumenter. It allows adding or +// removing target PIDs at runtime. Works with any config; no initial target_pids required. +type TargetPIDsUpdater interface { + AddTargetPIDs(pids ...int) + RemoveTargetPIDs(pids ...int) +} + +// WithTargetPIDsUpdater calls the given function with a TargetPIDsUpdater after the App O11y +// instrumenter is created. Callers receive the App O11y instrumenter (as this interface); store it +// and call AddTargetPIDs/RemoveTargetPIDs at runtime to change which PIDs are instrumented. +func WithTargetPIDsUpdater(onReady func(TargetPIDsUpdater)) Option { + return func(info *global.ContextInfo) { + info.AppO11y.OnTargetPIDsUpdaterReady = func(v any) { + if u, ok := v.(TargetPIDsUpdater); ok { + onReady(u) + } + } + } +} + // OverrideAppExportQueue allows to override the queue used to export the spans. // This is useful to run the instrumenter in vendored mode, and you want to provide your // own spans exporter. diff --git a/pkg/internal/appolly/appolly.go b/pkg/internal/appolly/appolly.go index be34a06f06..a4d11444c4 100644 --- a/pkg/internal/appolly/appolly.go +++ b/pkg/internal/appolly/appolly.go @@ -16,6 +16,7 @@ import ( "go.opentelemetry.io/obi/pkg/appolly/app/request" "go.opentelemetry.io/obi/pkg/appolly/discover" "go.opentelemetry.io/obi/pkg/appolly/discover/exec" + "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/appolly/traces" "go.opentelemetry.io/obi/pkg/ebpf" ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" @@ -51,7 +52,10 @@ type Instrumenter struct { // global data structures for all eBPF tracers ebpfEventContext *ebpfcommon.EBPFEventContext - finishers []finisher + // pidSelector is the PID criteria passed to discovery; this Instrumenter implements TargetPIDsUpdater + // (AddTargetPIDs/RemoveTargetPIDs) so callers who use WithTargetPIDsUpdater receive this instance. + pidSelector services.Selector // *services.GlobAttributes always set; PIDs from config or empty, updated at runtime via AddPIDs/RemovePIDs + finishers []finisher } type finisher struct { @@ -96,7 +100,15 @@ func New(ctx context.Context, ctxInfo *global.ContextInfo, config *obi.Config) ( return nil, fmt.Errorf("can't instantiate instrumentation pipeline: %w", err) } - return &Instrumenter{ + var initialPIDs []uint32 + if config.TargetPIDs.Len() > 0 { + initial := config.TargetPIDs.AllValues() + initialPIDs = make([]uint32, len(initial)) + for i, p := range initial { + initialPIDs[i] = uint32(p) + } + } + instr := &Instrumenter{ config: config, ctxInfo: ctxInfo, tracersWg: &sync.WaitGroup{}, @@ -105,7 +117,13 @@ func New(ctx context.Context, ctxInfo *global.ContextInfo, config *obi.Config) ( bp: bp, peGraphBuilder: swi, ebpfEventContext: ebpfcommon.NewEBPFEventContext(), - }, nil + pidSelector: &services.GlobAttributes{ + Name: config.ServiceName, + Namespace: config.ServiceNamespace, + PIDs: initialPIDs, + }, + } + return instr, nil } // FindAndInstrument searches in background for any new executable matching the @@ -114,7 +132,8 @@ func New(ctx context.Context, ctxInfo *global.ContextInfo, config *obi.Config) ( // This is: when the context is cancelled, it has unloaded all the eBPF probes. func (i *Instrumenter) FindAndInstrument(ctx context.Context) error { finder := discover.NewProcessFinder(i.config, i.ctxInfo, i.tracesInput, i.ebpfEventContext) - processEvents, err := finder.Start(ctx) + opts := []discover.ProcessFinderStartOpt{discover.WithPIDSelector(i.pidSelector)} + processEvents, err := finder.Start(ctx, opts...) if err != nil { return fmt.Errorf("couldn't start Process Finder: %w", err) } @@ -251,5 +270,32 @@ func refreshK8sInformerCache(ctx context.Context, ctxInfo *global.ContextInfo) e } func (i *Instrumenter) processEventsPipeline(ctx context.Context, graph *swarm.Runner) { - graph.Start(ctx, swarm.WithCancelTimeout(i.config.ShutdownTimeout)) // zurulao + graph.Start(ctx, swarm.WithCancelTimeout(i.config.ShutdownTimeout)) +} + +// AddTargetPIDs adds PIDs to the set of target PIDs instrumented at runtime. +// Part of TargetPIDsUpdater; callers receive this Instrumenter via WithTargetPIDsUpdater. +// Works with any config; the PID selector is always present and picks up new PIDs on the next discovery poll. +func (i *Instrumenter) AddTargetPIDs(pids ...int) { + if len(pids) == 0 { + return + } + u32 := make([]uint32, len(pids)) + for j, p := range pids { + u32[j] = uint32(p) + } + i.pidSelector.AddPIDs(u32...) +} + +// RemoveTargetPIDs removes PIDs from the set of target PIDs. Removed PIDs stop being +// matched on the next discovery poll. Part of TargetPIDsUpdater; callers receive this Instrumenter via WithTargetPIDsUpdater. +func (i *Instrumenter) RemoveTargetPIDs(pids ...int) { + if len(pids) == 0 { + return + } + u32 := make([]uint32, len(pids)) + for j, p := range pids { + u32[j] = uint32(p) + } + i.pidSelector.RemovePIDs(u32...) } diff --git a/pkg/internal/appolly/appolly_test.go b/pkg/internal/appolly/appolly_test.go index 406b2053d9..af3416df55 100644 --- a/pkg/internal/appolly/appolly_test.go +++ b/pkg/internal/appolly/appolly_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/discover" @@ -45,3 +46,36 @@ func TestProcessEventsLoopDoesntBlock(t *testing.T) { assert.NoError(t, err) } + +// targetPIDsUpdater is the same as instrumenter.TargetPIDsUpdater; used to avoid import cycle. +type targetPIDsUpdater interface { + AddTargetPIDs(pids ...int) + RemoveTargetPIDs(pids ...int) +} + +func TestInstrumenter_ImplementsTargetPIDsUpdater(t *testing.T) { + instr, err := New( + t.Context(), + &global.ContextInfo{Prometheus: &connector.PrometheusManager{}}, + &obi.Config{ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost"}}, + ) + require.NoError(t, err) + var _ targetPIDsUpdater = instr +} + +func TestInstrumenter_AddTargetPIDs_RemoveTargetPIDs(t *testing.T) { + instr, err := New( + t.Context(), + &global.ContextInfo{Prometheus: &connector.PrometheusManager{}}, + &obi.Config{ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost"}}, + ) + require.NoError(t, err) + + // AddTargetPIDs and RemoveTargetPIDs should not panic; instrumenter always has pidSelector + instr.AddTargetPIDs(1, 2, 3) + instr.AddTargetPIDs(2, 4) // 2 duplicate, 4 new + instr.RemoveTargetPIDs(2) + instr.RemoveTargetPIDs(99) // not present, no-op + instr.AddTargetPIDs() // no-op + instr.RemoveTargetPIDs() // no-op +} diff --git a/pkg/pipe/global/context.go b/pkg/pipe/global/context.go index 3e07eaa4ee..777af6ad73 100644 --- a/pkg/pipe/global/context.go +++ b/pkg/pipe/global/context.go @@ -64,4 +64,8 @@ type ContextInfo struct { type AppO11y struct { // ReportRoutes sets whether the metrics should set the http.route attribute ReportRoutes bool + // OnTargetPIDsUpdaterReady, when set, is called with the App O11y instrumenter after it is created. + // Callers receive that instrumenter (it implements TargetPIDsUpdater by default) and may store it + // to add or remove target PIDs at runtime. + OnTargetPIDsUpdaterReady func(any) } From 8c532364f90ef616549954d95fedbdac9f028c02 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 6 Mar 2026 14:53:14 -0500 Subject: [PATCH 2/7] Notify attacher of delete events --- pkg/appolly/discover/attacher.go | 5 +- pkg/appolly/discover/attacher_test.go | 201 ++++++++++++++++++++++ pkg/appolly/discover/finder.go | 17 +- pkg/appolly/discover/matcher.go | 62 ++++++- pkg/appolly/discover/matcher_test.go | 65 +++++-- pkg/appolly/discover/watcher_kube_test.go | 2 +- pkg/appolly/services/attr_glob.go | 28 ++- pkg/appolly/services/attr_glob_test.go | 29 ++++ pkg/instrumenter/instrumenter_test.go | 2 + pkg/instrumenter/opts.go | 10 +- pkg/internal/appolly/appolly.go | 28 ++- pkg/internal/appolly/appolly_test.go | 19 ++ 12 files changed, 442 insertions(+), 26 deletions(-) create mode 100644 pkg/appolly/discover/attacher_test.go diff --git a/pkg/appolly/discover/attacher.go b/pkg/appolly/discover/attacher.go index 07a394db54..0a8e6df663 100644 --- a/pkg/appolly/discover/attacher.go +++ b/pkg/appolly/discover/attacher.go @@ -28,6 +28,9 @@ import ( "go.opentelemetry.io/obi/pkg/pipe/swarm/swarms" ) +// Swappable in tests so attacher tests don't depend on memlock permissions. +var removeMemlock = rlimit.RemoveMemlock + // traceAttacher creates the available trace.Tracer implementations (Go HTTP tracer, GRPC tracer, Generic tracer...) // for each received Instrumentable process and forwards an ebpf.ProcessTracer instance ready to run and start // instrumenting the executable @@ -411,7 +414,7 @@ func (ta *traceAttacher) notifyProcessDeletion(ie *ebpf.Instrumentable) { } func (ta *traceAttacher) init() error { - if err := rlimit.RemoveMemlock(); err != nil { + if err := removeMemlock(); err != nil { return fmt.Errorf("removing memory lock: %w", err) } return nil diff --git a/pkg/appolly/discover/attacher_test.go b/pkg/appolly/discover/attacher_test.go new file mode 100644 index 0000000000..0b38c31e84 --- /dev/null +++ b/pkg/appolly/discover/attacher_test.go @@ -0,0 +1,201 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "io" + "testing" + + cebpf "github.com/cilium/ebpf" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/obi/pkg/appolly/app" + "go.opentelemetry.io/obi/pkg/appolly/app/request" + "go.opentelemetry.io/obi/pkg/appolly/app/svc" + execpkg "go.opentelemetry.io/obi/pkg/appolly/discover/exec" + "go.opentelemetry.io/obi/pkg/appolly/services" + "go.opentelemetry.io/obi/pkg/ebpf" + ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" + "go.opentelemetry.io/obi/pkg/export/imetrics" + "go.opentelemetry.io/obi/pkg/internal/goexec" + "go.opentelemetry.io/obi/pkg/internal/testutil" + "go.opentelemetry.io/obi/pkg/obi" + "go.opentelemetry.io/obi/pkg/pipe/msg" +) + +type blockedPID struct { + pid app.PID + ns uint32 +} + +type recordingTracer struct { + blocked []blockedPID +} + +func (r *recordingTracer) AllowPID(app.PID, uint32, *svc.Attrs) {} +func (r *recordingTracer) BlockPID(pid app.PID, ns uint32) { + r.blocked = append(r.blocked, blockedPID{pid: pid, ns: ns}) +} +func (r *recordingTracer) LoadSpecs() ([]*ebpfcommon.SpecBundle, error) { return nil, nil } +func (r *recordingTracer) AddCloser(...io.Closer) {} +func (r *recordingTracer) SetupTailCalls() {} +func (r *recordingTracer) KProbes() map[string]ebpfcommon.ProbeDesc { return nil } +func (r *recordingTracer) Tracepoints() map[string]ebpfcommon.ProbeDesc { return nil } +func (r *recordingTracer) GoProbes() map[string][]*ebpfcommon.ProbeDesc { return nil } +func (r *recordingTracer) UProbes() map[string]map[string][]*ebpfcommon.ProbeDesc { return nil } +func (r *recordingTracer) SocketFilters() []*cebpf.Program { return nil } +func (r *recordingTracer) SockMsgs() []ebpfcommon.SockMsg { return nil } +func (r *recordingTracer) SockOps() []ebpfcommon.SockOps { return nil } +func (r *recordingTracer) Iters() []*ebpfcommon.Iter { return nil } +func (r *recordingTracer) Tracing() []*ebpfcommon.Tracing { return nil } +func (r *recordingTracer) RecordInstrumentedLib(uint64, []io.Closer) {} +func (r *recordingTracer) AddInstrumentedLibRef(uint64) {} +func (r *recordingTracer) AlreadyInstrumentedLib(uint64) bool { return false } +func (r *recordingTracer) UnlinkInstrumentedLib(uint64) {} +func (r *recordingTracer) RegisterOffsets(*execpkg.FileInfo, *goexec.Offsets) {} +func (r *recordingTracer) ProcessBinary(*execpkg.FileInfo) {} +func (r *recordingTracer) Required() bool { return false } +func (r *recordingTracer) Run(context.Context, *ebpfcommon.EBPFEventContext, *msg.Queue[[]request.Span]) { +} + +func TestSyntheticDeletePath_TraceAttacherDeletesTracer(t *testing.T) { + ctx, cancel := context.WithCancel(t.Context()) + defer cancel() + origRemoveMemlock := removeMemlock + removeMemlock = func() error { return nil } + defer func() { removeMemlock = origRemoveMemlock }() + + processMatches := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) + instrumentables := msg.NewQueue[[]Event[ebpf.Instrumentable]](msg.ChannelBufferLen(10)) + tracerEventsQu := msg.NewQueue[Event[*ebpf.Instrumentable]](msg.ChannelBufferLen(10)) + tracerEvents := tracerEventsQu.Subscribe() + + fileInfo := &execpkg.FileInfo{ + Service: svc.Attrs{UID: svc.UID{Name: "dyn-svc", Namespace: "ns"}}, + CmdExePath: "/bin/test", + Pid: 42, + Ino: 1234, + Ns: 17, + } + startDeletedTyperPipeline(ctx, &typer{ + currentPids: map[app.PID]*execpkg.FileInfo{42: fileInfo}, + }, processMatches, instrumentables) + + ta := &traceAttacher{ + Cfg: &obi.Config{}, + Metrics: imetrics.NoopReporter{}, + InputInstrumentables: instrumentables, + OutputTracerEvents: tracerEventsQu, + EbpfEventContext: &ebpfcommon.EBPFEventContext{}, + } + run, err := ta.attacherLoop(ctx) + require.NoError(t, err) + + prog := &recordingTracer{} + tracer := &ebpf.ProcessTracer{Type: ebpf.Generic, Programs: []ebpf.Tracer{prog}} + ta.existingTracers[fileInfo.Ino] = tracer + ta.processInstances.Inc(fileInfo.Ino) + + go run(ctx) + + processMatches.Send([]Event[ProcessMatch]{{ + Type: EventDeleted, + Obj: ProcessMatch{ + Process: &services.ProcessInfo{Pid: 42}, + }, + }}) + + ev := testutil.ReadChannel(t, tracerEvents, testTimeout) + require.Equal(t, EventDeleted, ev.Type) + require.NotNil(t, ev.Obj) + assert.Equal(t, app.PID(42), ev.Obj.FileInfo.Pid) + assert.Same(t, tracer, ev.Obj.Tracer) + assert.Equal(t, []blockedPID{{pid: 42, ns: 17}}, prog.blocked) + _, exists := ta.existingTracers[fileInfo.Ino] + assert.False(t, exists) +} + +func TestSyntheticDeletePath_TraceAttacherDeletesInstance(t *testing.T) { + ctx, cancel := context.WithCancel(t.Context()) + defer cancel() + origRemoveMemlock := removeMemlock + removeMemlock = func() error { return nil } + defer func() { removeMemlock = origRemoveMemlock }() + + processMatches := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) + instrumentables := msg.NewQueue[[]Event[ebpf.Instrumentable]](msg.ChannelBufferLen(10)) + tracerEventsQu := msg.NewQueue[Event[*ebpf.Instrumentable]](msg.ChannelBufferLen(10)) + tracerEvents := tracerEventsQu.Subscribe() + + fileInfo := &execpkg.FileInfo{ + Service: svc.Attrs{UID: svc.UID{Name: "dyn-svc", Namespace: "ns"}}, + CmdExePath: "/bin/test", + Pid: 42, + Ino: 1234, + Ns: 17, + } + startDeletedTyperPipeline(ctx, &typer{ + currentPids: map[app.PID]*execpkg.FileInfo{42: fileInfo}, + }, processMatches, instrumentables) + + ta := &traceAttacher{ + Cfg: &obi.Config{}, + Metrics: imetrics.NoopReporter{}, + InputInstrumentables: instrumentables, + OutputTracerEvents: tracerEventsQu, + EbpfEventContext: &ebpfcommon.EBPFEventContext{}, + } + run, err := ta.attacherLoop(ctx) + require.NoError(t, err) + + prog := &recordingTracer{} + tracer := &ebpf.ProcessTracer{Type: ebpf.Generic, Programs: []ebpf.Tracer{prog}} + ta.existingTracers[fileInfo.Ino] = tracer + ta.processInstances.Inc(fileInfo.Ino) + ta.processInstances.Inc(fileInfo.Ino) + + go run(ctx) + + processMatches.Send([]Event[ProcessMatch]{{ + Type: EventDeleted, + Obj: ProcessMatch{ + Process: &services.ProcessInfo{Pid: 42}, + }, + }}) + + ev := testutil.ReadChannel(t, tracerEvents, testTimeout) + require.Equal(t, EventInstanceDeleted, ev.Type) + require.NotNil(t, ev.Obj) + assert.Equal(t, app.PID(42), ev.Obj.FileInfo.Pid) + assert.Nil(t, ev.Obj.Tracer) + assert.Equal(t, []blockedPID{{pid: 42, ns: 17}}, prog.blocked) + assert.Same(t, tracer, ta.existingTracers[fileInfo.Ino]) +} + +func startDeletedTyperPipeline( + ctx context.Context, + tp *typer, + input *msg.Queue[[]Event[ProcessMatch]], + output *msg.Queue[[]Event[ebpf.Instrumentable]], +) { + in := input.Subscribe(msg.SubscriberName("testExecTyper")) + go func() { + defer output.Close() + for { + select { + case <-ctx.Done(): + return + case evs, ok := <-in: + if !ok { + return + } + if out := tp.FilterClassify(evs); len(out) > 0 { + output.Send(out) + } + } + } + }() +} diff --git a/pkg/appolly/discover/finder.go b/pkg/appolly/discover/finder.go index 8415b83cc7..d2366cf7a7 100644 --- a/pkg/appolly/discover/finder.go +++ b/pkg/appolly/discover/finder.go @@ -7,6 +7,7 @@ import ( "context" "fmt" + "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/ebpf" @@ -42,8 +43,9 @@ func NewProcessFinder( } type processFinderStartConfig struct { - enrichedProcessEvents *msg.Queue[[]Event[ProcessAttrs]] - pidSelector services.Selector + enrichedProcessEvents *msg.Queue[[]Event[ProcessAttrs]] + pidSelector services.Selector + pidSelectorChangeNotify <-chan []app.PID } // ProcessFinderStartOpt allows overriding some internal behavior of ProcessFinder.Start method. @@ -67,6 +69,15 @@ func WithPIDSelector(selector services.Selector) ProcessFinderStartOpt { } } +// WithPIDSelectorChangeNotifier provides a channel the matcher listens to; when the caller sends the +// removed PIDs on it, the matcher emits targeted synthetic deletes so the attacher uninstruments them +// immediately instead of rescanning all tracked PIDs. +func WithPIDSelectorChangeNotifier(ch <-chan []app.PID) ProcessFinderStartOpt { + return func(cfg *processFinderStartConfig) { + cfg.pidSelectorChangeNotify = ch + } +} + // Start the ProcessFinder pipeline in background. It returns a channel where each new discovered // ebpf.ProcessTracer will be notified. func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOpt) (<-chan Event[*ebpf.Instrumentable], error) { @@ -106,7 +117,7 @@ func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOp ), swarm.WithID("LanguageDecoratorProvider")) criteriaFilteredEvents := msgh.QueueFromConfig[[]Event[ProcessMatch]](pf.cfg, "criteriaFilteredEvents") - swi.Add(criteriaMatcherProvider(pf.cfg, langEnrichedEvents, criteriaFilteredEvents, startConfig.pidSelector), + swi.Add(criteriaMatcherProvider(pf.cfg, langEnrichedEvents, criteriaFilteredEvents, startConfig.pidSelector, startConfig.pidSelectorChangeNotify), swarm.WithID("CriteriaMatcher")) executableTypes := msgh.QueueFromConfig[[]Event[ebpf.Instrumentable]](pf.cfg, "executableTypes") diff --git a/pkg/appolly/discover/matcher.go b/pkg/appolly/discover/matcher.go index 87badd0679..9e2cbb84be 100644 --- a/pkg/appolly/discover/matcher.go +++ b/pkg/appolly/discover/matcher.go @@ -30,16 +30,20 @@ var ( ) // criteriaMatcherProvider filters the processes that match the discovery criteria. +// If pidSelectorChangeNotify is non-nil, the matcher listens to removed-PID notifications +// and runs synthetic deletes only when notified, not on every batch. func criteriaMatcherProvider( cfg *obi.Config, input *msg.Queue[[]Event[ProcessAttrs]], output *msg.Queue[[]Event[ProcessMatch]], pidSelector services.Selector, + pidSelectorChangeNotify <-chan []app.PID, ) swarm.InstanceFunc { instrumenterNamespace, _ := namespaceFetcherFunc(app.PID(osPidFunc())) + criteria := FindingCriteria(cfg, pidSelector) m := &Matcher{ Log: slog.With("component", "discover.CriteriaMatcher"), - Criteria: FindingCriteria(cfg, pidSelector), + Criteria: criteria, ExcludeCriteria: ExcludingCriteria(cfg), LogEnricherCriteria: LogEnricherFindingCriteria(cfg), ProcessHistory: map[app.PID]ProcessMatch{}, @@ -47,6 +51,7 @@ func criteriaMatcherProvider( Output: output, Namespace: instrumenterNamespace, HasHostPidAccess: hasHostPidAccess(), + RemovedPIDsNotify: pidSelectorChangeNotify, } return swarm.DirectInstance(m.Run) } @@ -66,6 +71,9 @@ type Matcher struct { Output *msg.Queue[[]Event[ProcessMatch]] Namespace string HasHostPidAccess bool + // RemovedPIDsNotify, when set, carries the PIDs removed from the dynamic selector so the + // matcher can emit targeted synthetic deletes without rescanning ProcessHistory. + RemovedPIDsNotify <-chan []app.PID } // ProcessMatch matches a found process with the first selection criteria it fulfilled. @@ -82,6 +90,10 @@ func (pm ProcessMatch) LogEnricherEnabled() bool { func (m *Matcher) Run(ctx context.Context) { defer m.Output.Close() m.Log.Debug("starting criteria matcher node") + if m.RemovedPIDsNotify != nil { + m.runWithNotify(ctx) + return + } swarms.ForEachInput(ctx, m.Input, m.Log.Debug, func(i []Event[ProcessAttrs]) { m.Log.Debug("filtering processes", "len", len(i)) o := m.filter(i) @@ -92,6 +104,33 @@ func (m *Matcher) Run(ctx context.Context) { }) } +func (m *Matcher) runWithNotify(ctx context.Context) { + for { + select { + case <-ctx.Done(): + m.Log.Debug("context done, stopping node") + return + case i, ok := <-m.Input: + if !ok { + m.Log.Debug("input channel closed, stopping node") + return + } + m.Log.Debug("filtering processes", "len", len(i)) + o := m.filter(i) + m.Log.Debug("processes matching selection criteria", "len", len(o)) + if len(o) > 0 { + m.Output.Send(o) + } + case removedPIDs := <-m.RemovedPIDsNotify: + o := m.syntheticDeletesForRemovedPIDs(removedPIDs) + if len(o) > 0 { + m.Log.Debug("synthetic deletes for removed PIDs", "len", len(o)) + m.Output.Send(o) + } + } + } +} + func (m *Matcher) filter(events []Event[ProcessAttrs]) []Event[ProcessMatch] { var matches []Event[ProcessMatch] for _, ev := range events { @@ -108,6 +147,27 @@ func (m *Matcher) filter(events []Event[ProcessAttrs]) []Event[ProcessMatch] { return matches } +// syntheticDeletesForRemovedPIDs returns EventDeleted for the specific PIDs removed from the +// dynamic selector. This is the matcher side of the edge-based removal path: the selector sends +// the exact removed PIDs, so we can look them up directly instead of doing a level-based scan of +// ProcessHistory against the selector's current PID set. +func (m *Matcher) syntheticDeletesForRemovedPIDs(removedPIDs []app.PID) []Event[ProcessMatch] { + if len(removedPIDs) == 0 { + return nil + } + var out []Event[ProcessMatch] + for _, pid := range removedPIDs { + procMatch, instrumented := m.ProcessHistory[pid] + if !instrumented { + continue + } + delete(m.ProcessHistory, pid) + m.Log.Debug("pid removed from dynamic selector, uninstrumenting", "pid", pid, "comm", procMatch.Process.ExePath) + out = append(out, Event[ProcessMatch]{Type: EventDeleted, Obj: procMatch}) + } + return out +} + func (m *Matcher) alreadyMatched(pid app.PID) bool { _, ok := m.ProcessHistory[pid] return ok diff --git a/pkg/appolly/discover/matcher_test.go b/pkg/appolly/discover/matcher_test.go index f6a3d7de00..2c625b0d9e 100644 --- a/pkg/appolly/discover/matcher_test.go +++ b/pkg/appolly/discover/matcher_test.go @@ -52,7 +52,7 @@ func TestCriteriaMatcher(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -103,7 +103,7 @@ func TestCriteriaMatcherLanguage(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -153,7 +153,7 @@ func TestCriteriaMatcher_Exclude(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -194,7 +194,7 @@ func TestCriteriaMatcher_Exclude_Metadata(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -242,7 +242,7 @@ func TestCriteriaMatcher_MustMatchAllAttributes(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -297,7 +297,7 @@ func TestCriteriaMatcherMissingPort(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -356,7 +356,7 @@ func TestCriteriaMatcherContainersOnly(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -495,7 +495,7 @@ func TestInstrumentation_CoexistingWithDeprecatedServices(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&tc.cfg, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&tc.cfg, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -530,7 +530,7 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -561,7 +561,7 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -604,7 +604,7 @@ func TestCriteriaMatcher_DynamicTargetPIDs(t *testing.T) { processInfo = func(pp ProcessAttrs) (*services.ProcessInfo, error) { return &services.ProcessInfo{Pid: pp.pid, ExePath: "/any/exe", OpenPorts: pp.openPorts}, nil } - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, pidSelector)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, pidSelector, nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -637,6 +637,47 @@ func TestCriteriaMatcher_DynamicTargetPIDs(t *testing.T) { testutil.ChannelEmpty(t, filteredProcesses, 100*time.Millisecond) } +func TestCriteriaMatcher_DynamicTargetPIDs_RemoveNotification(t *testing.T) { + pipeConfig := obi.Config{ServiceName: "dyn-svc", ServiceNamespace: "ns"} + pidSelector := &services.GlobAttributes{ + Name: "dyn-svc", + Namespace: "ns", + PIDs: []uint32{42, 100}, + } + removedPIDs := make(chan []app.PID, 2) + pidSelector.SetPIDsChangeNotify(removedPIDs) + + discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) + filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) + filteredProcesses := filteredProcessesQu.Subscribe() + processInfo = func(pp ProcessAttrs) (*services.ProcessInfo, error) { + return &services.ProcessInfo{Pid: pp.pid, ExePath: "/any/exe", OpenPorts: pp.openPorts}, nil + } + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, pidSelector, removedPIDs)(t.Context()) + require.NoError(t, err) + go matcherFunc(t.Context()) + defer filteredProcessesQu.Close() + + discoveredProcesses.Send([]Event[ProcessAttrs]{ + {Type: EventCreated, Obj: ProcessAttrs{pid: 42}}, + {Type: EventCreated, Obj: ProcessAttrs{pid: 100}}, + }) + matches := testutil.ReadChannel(t, filteredProcesses, testTimeout) + require.Len(t, matches, 2) + + pidSelector.RemovePIDs(100) + matches = testutil.ReadChannel(t, filteredProcesses, testTimeout) + require.Len(t, matches, 1) + assert.Equal(t, EventDeleted, matches[0].Type) + assert.Equal(t, app.PID(100), matches[0].Obj.Process.Pid) + + pidSelector.RemovePIDs(42) + matches = testutil.ReadChannel(t, filteredProcesses, testTimeout) + require.Len(t, matches, 1) + assert.Equal(t, EventDeleted, matches[0].Type) + assert.Equal(t, app.PID(42), matches[0].Obj.Process.Pid) +} + func TestCriteriaMatcher_Granular(t *testing.T) { pipeConfig := obi.Config{} @@ -661,7 +702,7 @@ func TestCriteriaMatcher_Granular(t *testing.T) { filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) require.NoError(t, err) diff --git a/pkg/appolly/discover/watcher_kube_test.go b/pkg/appolly/discover/watcher_kube_test.go index d607882b60..cf6a8dfb47 100644 --- a/pkg/appolly/discover/watcher_kube_test.go +++ b/pkg/appolly/discover/watcher_kube_test.go @@ -164,7 +164,7 @@ func TestWatcherKubeEnricherWithMatcher(t *testing.T) { version: "v[0-9]*" `), &pipeConfig)) - swi.Add(criteriaMatcherProvider(&pipeConfig, connectQueue, outputQueue, nil)) + swi.Add(criteriaMatcherProvider(&pipeConfig, connectQueue, outputQueue, nil, nil)) nodesRunner, err := swi.Instance(t.Context()) require.NoError(t, err) diff --git a/pkg/appolly/services/attr_glob.go b/pkg/appolly/services/attr_glob.go index 346c0a0c92..e209f969db 100644 --- a/pkg/appolly/services/attr_glob.go +++ b/pkg/appolly/services/attr_glob.go @@ -96,6 +96,9 @@ type GlobAttributes struct { // Updated at runtime via AddPIDs/RemovePIDs; pidsMu protects concurrent access. PIDs []uint32 `yaml:"target_pids"` pidsMu sync.RWMutex + // pidsChangeNotify, when set, receives the PIDs actually removed from the selector so the matcher can + // uninstrument them without rescanning all tracked PIDs. + pidsChangeNotify chan<- []app.PID // Path allows defining the regular expression matching the full executable path. Path GlobAttr `yaml:"exe_path"` @@ -274,6 +277,25 @@ func (ga *GlobAttributes) AddPIDs(pids ...uint32) { } } +// SetPIDsChangeNotify sets the channel to notify with the PIDs removed from the selector. +// Used by the discover matcher to emit targeted synthetic deletes when the PID list changes. +func (ga *GlobAttributes) SetPIDsChangeNotify(ch chan<- []app.PID) { + ga.pidsChangeNotify = ch +} + +func (ga *GlobAttributes) notifyPIDsChanged(removedPIDs []app.PID) { + if ga.pidsChangeNotify == nil || len(removedPIDs) == 0 { + return + } + // This channel is edge-based: it carries the exact removal that just happened. + // That avoids the matcher doing a level-based recompute by scanning all tracked + // PIDs against the selector's current state after every change. + // + // Because the payload is the removal event itself, dropping sends would lose + // information. We therefore use a blocking send instead of a best-effort wakeup. + ga.pidsChangeNotify <- removedPIDs +} + // RemovePIDs removes PIDs from the selector's list (for runtime remove; thread-safe with GetPIDs). func (ga *GlobAttributes) RemovePIDs(pids ...uint32) { if len(pids) == 0 { @@ -284,14 +306,18 @@ func (ga *GlobAttributes) RemovePIDs(pids ...uint32) { toRemove[u] = struct{}{} } ga.pidsMu.Lock() - defer ga.pidsMu.Unlock() newPids := ga.PIDs[:0] + removedPIDs := make([]app.PID, 0, len(pids)) for _, p := range ga.PIDs { if _, remove := toRemove[p]; !remove { newPids = append(newPids, p) + continue } + removedPIDs = append(removedPIDs, app.PID(p)) } ga.PIDs = newPids + ga.pidsMu.Unlock() + ga.notifyPIDsChanged(removedPIDs) } type nilMatcher struct{} diff --git a/pkg/appolly/services/attr_glob_test.go b/pkg/appolly/services/attr_glob_test.go index d4585c1d1c..708c0a6098 100644 --- a/pkg/appolly/services/attr_glob_test.go +++ b/pkg/appolly/services/attr_glob_test.go @@ -62,3 +62,32 @@ func TestGlobAttributes_AddPIDs_RemovePIDs_emptyNoOp(t *testing.T) { require.True(t, ok) assert.Equal(t, []app.PID{1, 2, 3}, pids) } + +func TestGlobAttributes_RemovePIDs_NotifyRemovedPIDs(t *testing.T) { + ga := &GlobAttributes{PIDs: []uint32{1, 2, 3}} + removedPIDs := make(chan []app.PID, 1) + ga.SetPIDsChangeNotify(removedPIDs) + + ga.AddPIDs(4) + select { + case got := <-removedPIDs: + t.Fatalf("unexpected add notification: %v", got) + default: + } + + ga.RemovePIDs(2, 4, 99) + + select { + case got := <-removedPIDs: + assert.Equal(t, []app.PID{2, 4}, got) + default: + t.Fatal("expected removed PID notification") + } + + ga.RemovePIDs(99) + select { + case got := <-removedPIDs: + t.Fatalf("unexpected remove notification for missing pid: %v", got) + default: + } +} diff --git a/pkg/instrumenter/instrumenter_test.go b/pkg/instrumenter/instrumenter_test.go index 64537933ce..1093f41e13 100644 --- a/pkg/instrumenter/instrumenter_test.go +++ b/pkg/instrumenter/instrumenter_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/export/otel/otelcfg" "go.opentelemetry.io/obi/pkg/obi" @@ -120,6 +121,7 @@ func TestRun_WithTargetPIDsUpdater_DynamicUpdate(t *testing.T) { u.AddTargetPIDs(42, 200) // duplicate 42, new 200 u.RemoveTargetPIDs(42) u.RemoveTargetPIDs(999) // not present, no-op + assert.Equal(t, []app.PID{100, 200}, u.TargetPIDs()) receivedUpdater = true }), } diff --git a/pkg/instrumenter/opts.go b/pkg/instrumenter/opts.go index 492f1055d9..2dc99d89e8 100644 --- a/pkg/instrumenter/opts.go +++ b/pkg/instrumenter/opts.go @@ -4,6 +4,7 @@ package instrumenter // import "go.opentelemetry.io/obi/pkg/instrumenter" import ( + "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" "go.opentelemetry.io/obi/pkg/pipe/global" "go.opentelemetry.io/obi/pkg/pipe/msg" @@ -12,16 +13,19 @@ import ( // Option that override the instantiation of the instrumenter type Option func(info *global.ContextInfo) -// TargetPIDsUpdater is implemented by the default App O11y instrumenter. It allows adding or -// removing target PIDs at runtime. Works with any config; no initial target_pids required. +// TargetPIDsUpdater is implemented by the default App O11y instrumenter. It allows adding, +// removing, and inspecting target PIDs at runtime. Works with any config; no initial +// target_pids required. type TargetPIDsUpdater interface { AddTargetPIDs(pids ...int) RemoveTargetPIDs(pids ...int) + TargetPIDs() []app.PID } // WithTargetPIDsUpdater calls the given function with a TargetPIDsUpdater after the App O11y // instrumenter is created. Callers receive the App O11y instrumenter (as this interface); store it -// and call AddTargetPIDs/RemoveTargetPIDs at runtime to change which PIDs are instrumented. +// and call AddTargetPIDs/RemoveTargetPIDs at runtime to change which PIDs are instrumented, or +// TargetPIDs to inspect the currently tracked set. func WithTargetPIDsUpdater(onReady func(TargetPIDsUpdater)) Option { return func(info *global.ContextInfo) { info.AppO11y.OnTargetPIDsUpdaterReady = func(v any) { diff --git a/pkg/internal/appolly/appolly.go b/pkg/internal/appolly/appolly.go index a4d11444c4..48a3fcea4a 100644 --- a/pkg/internal/appolly/appolly.go +++ b/pkg/internal/appolly/appolly.go @@ -13,6 +13,7 @@ import ( "time" "go.opentelemetry.io/obi/pkg/appolly" + "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" "go.opentelemetry.io/obi/pkg/appolly/discover" "go.opentelemetry.io/obi/pkg/appolly/discover/exec" @@ -132,7 +133,14 @@ func New(ctx context.Context, ctxInfo *global.ContextInfo, config *obi.Config) ( // This is: when the context is cancelled, it has unloaded all the eBPF probes. func (i *Instrumenter) FindAndInstrument(ctx context.Context) error { finder := discover.NewProcessFinder(i.config, i.ctxInfo, i.tracesInput, i.ebpfEventContext) - opts := []discover.ProcessFinderStartOpt{discover.WithPIDSelector(i.pidSelector)} + pidSelectorChangeNotify := make(chan []app.PID, 1) + opts := []discover.ProcessFinderStartOpt{ + discover.WithPIDSelector(i.pidSelector), + discover.WithPIDSelectorChangeNotifier(pidSelectorChangeNotify), + } + if ga, ok := i.pidSelector.(*services.GlobAttributes); ok { + ga.SetPIDsChangeNotify(pidSelectorChangeNotify) + } processEvents, err := finder.Start(ctx, opts...) if err != nil { return fmt.Errorf("couldn't start Process Finder: %w", err) @@ -275,7 +283,7 @@ func (i *Instrumenter) processEventsPipeline(ctx context.Context, graph *swarm.R // AddTargetPIDs adds PIDs to the set of target PIDs instrumented at runtime. // Part of TargetPIDsUpdater; callers receive this Instrumenter via WithTargetPIDsUpdater. -// Works with any config; the PID selector is always present and picks up new PIDs on the next discovery poll. +// Works with any config; the PID selector picks up new PIDs on the next discovery poll. func (i *Instrumenter) AddTargetPIDs(pids ...int) { if len(pids) == 0 { return @@ -287,8 +295,20 @@ func (i *Instrumenter) AddTargetPIDs(pids ...int) { i.pidSelector.AddPIDs(u32...) } -// RemoveTargetPIDs removes PIDs from the set of target PIDs. Removed PIDs stop being -// matched on the next discovery poll. Part of TargetPIDsUpdater; callers receive this Instrumenter via WithTargetPIDsUpdater. +// TargetPIDs returns the currently tracked target PIDs as a copy. +// Part of TargetPIDsUpdater. +func (i *Instrumenter) TargetPIDs() []app.PID { + pids, ok := i.pidSelector.GetPIDs() + if !ok || len(pids) == 0 { + return nil + } + out := make([]app.PID, len(pids)) + copy(out, pids) + return out +} + +// RemoveTargetPIDs removes PIDs from the set of target PIDs. The selector (GlobAttributes) notifies +// the matcher so the attacher uninstruments them immediately. Part of TargetPIDsUpdater. func (i *Instrumenter) RemoveTargetPIDs(pids ...int) { if len(pids) == 0 { return diff --git a/pkg/internal/appolly/appolly_test.go b/pkg/internal/appolly/appolly_test.go index af3416df55..5a8f0972a2 100644 --- a/pkg/internal/appolly/appolly_test.go +++ b/pkg/internal/appolly/appolly_test.go @@ -51,6 +51,7 @@ func TestProcessEventsLoopDoesntBlock(t *testing.T) { type targetPIDsUpdater interface { AddTargetPIDs(pids ...int) RemoveTargetPIDs(pids ...int) + TargetPIDs() []app.PID } func TestInstrumenter_ImplementsTargetPIDsUpdater(t *testing.T) { @@ -78,4 +79,22 @@ func TestInstrumenter_AddTargetPIDs_RemoveTargetPIDs(t *testing.T) { instr.RemoveTargetPIDs(99) // not present, no-op instr.AddTargetPIDs() // no-op instr.RemoveTargetPIDs() // no-op + + assert.Equal(t, []app.PID{1, 3, 4}, instr.TargetPIDs()) +} + +func TestInstrumenter_TargetPIDs_ReturnsCopy(t *testing.T) { + instr, err := New( + t.Context(), + &global.ContextInfo{Prometheus: &connector.PrometheusManager{}}, + &obi.Config{ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost"}}, + ) + require.NoError(t, err) + + instr.AddTargetPIDs(7, 8) + got := instr.TargetPIDs() + require.Equal(t, []app.PID{7, 8}, got) + + got[0] = 999 + assert.Equal(t, []app.PID{7, 8}, instr.TargetPIDs()) } From ff1cf8a448af35bc6ccee56bb47f6855b7e0cf95 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Wed, 11 Mar 2026 15:30:23 -0400 Subject: [PATCH 3/7] Refactor to DynamicPidSelector --- docs/config-schema.json | 2 +- pkg/appolly/discover/dynamic_pid_selector.go | 155 ++++++++++++++++++ .../discover/dynamic_pid_selector_test.go | 54 ++++++ pkg/appolly/discover/finder.go | 31 ++-- pkg/appolly/discover/matcher.go | 61 +++---- pkg/appolly/discover/matcher_test.go | 74 ++++----- pkg/appolly/discover/typer_test.go | 2 - pkg/appolly/discover/watcher_kube_test.go | 2 +- pkg/appolly/discover/watcher_proc.go | 4 +- pkg/appolly/discover/watcher_proc_test.go | 4 +- pkg/appolly/instrumenter.go | 8 +- pkg/appolly/services/attr_glob.go | 74 +-------- pkg/appolly/services/attr_glob_test.go | 93 ----------- pkg/appolly/services/attr_regex.go | 2 - pkg/appolly/services/criteria.go | 4 - pkg/instrumenter/instrumenter.go | 4 - pkg/instrumenter/instrumenter_test.go | 93 ++--------- pkg/instrumenter/opts.go | 26 +-- pkg/internal/appolly/appolly.go | 89 +++------- pkg/internal/appolly/appolly_test.go | 62 ++----- pkg/internal/testutil/channels.go | 10 ++ pkg/pipe/global/context.go | 8 +- 22 files changed, 367 insertions(+), 495 deletions(-) create mode 100644 pkg/appolly/discover/dynamic_pid_selector.go create mode 100644 pkg/appolly/discover/dynamic_pid_selector_test.go delete mode 100644 pkg/appolly/services/attr_glob_test.go diff --git a/docs/config-schema.json b/docs/config-schema.json index cee35d41f9..caa3f86a6d 100644 --- a/docs/config-schema.json +++ b/docs/config-schema.json @@ -671,7 +671,7 @@ "type": "integer" }, "type": "array", - "description": "PIDs allows selecting processes by PID. When non-empty, the process PID must be in this list (in addition to any path/port criteria). Updated at runtime via AddPIDs/RemovePIDs; pidsMu protects concurrent access.", + "description": "PIDs allows selecting processes by PID (static from config). When non-empty, the process PID must be in this list.", "x-env-var": "OTEL_EBPF_TARGET_PID" } }, diff --git a/pkg/appolly/discover/dynamic_pid_selector.go b/pkg/appolly/discover/dynamic_pid_selector.go new file mode 100644 index 0000000000..c71c719f48 --- /dev/null +++ b/pkg/appolly/discover/dynamic_pid_selector.go @@ -0,0 +1,155 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package discover // import "go.opentelemetry.io/obi/pkg/appolly/discover" + +import ( + "iter" + "sync" + + "go.opentelemetry.io/obi/pkg/appolly/app" + "go.opentelemetry.io/obi/pkg/appolly/services" + "go.opentelemetry.io/obi/pkg/export/otel/perapp" +) + +// DynamicPIDSelector holds the runtime set of target PIDs for OBI. It is preloaded from +// config target_pids and updated at runtime via AddPIDs/RemovePIDs. Only the discover +// matcher uses it for matching; the instrumenter (or appolly) holds a reference and +// calls AddPIDs/RemovePIDs directly. +type DynamicPIDSelector struct { + mu sync.RWMutex + pids []uint32 + removedCh chan []app.PID // owned by selector; RemovedNotify() returns receive-only view +} + +// NewDynamicPIDSelector creates a new dynamic PID selector (initially empty). +func NewDynamicPIDSelector() *DynamicPIDSelector { + return &DynamicPIDSelector{ + removedCh: make(chan []app.PID, 1), + } +} + +// RemovedNotify returns the channel on which removed PIDs are sent when RemovePIDs is called. +// The matcher uses this to emit synthetic deletes. Safe to call from multiple goroutines. +func (d *DynamicPIDSelector) RemovedNotify() <-chan []app.PID { + return d.removedCh +} + +// GetPIDs returns a copy of the current PID list and true when non-empty. +func (d *DynamicPIDSelector) GetPIDs() ([]app.PID, bool) { + d.mu.RLock() + defer d.mu.RUnlock() + if len(d.pids) == 0 { + return nil, false + } + out := make([]app.PID, len(d.pids)) + for i, p := range d.pids { + out[i] = app.PID(p) + } + return out, true +} + +// AddPIDs adds PIDs to the set (deduplicated). +func (d *DynamicPIDSelector) AddPIDs(pids ...uint32) { + if len(pids) == 0 { + return + } + d.mu.Lock() + defer d.mu.Unlock() + existing := make(map[uint32]struct{}, len(d.pids)) + for _, p := range d.pids { + existing[p] = struct{}{} + } + for _, u := range pids { + if _, ok := existing[u]; !ok { + existing[u] = struct{}{} + d.pids = append(d.pids, u) + } + } +} + +// RemovePIDs removes PIDs from the set and sends them on RemovedNotify() for the matcher. +func (d *DynamicPIDSelector) RemovePIDs(pids ...uint32) { + if len(pids) == 0 { + return + } + toRemove := make(map[uint32]struct{}) + for _, u := range pids { + toRemove[u] = struct{}{} + } + d.mu.Lock() + newPids := d.pids[:0] + removedPIDs := make([]app.PID, 0, len(pids)) + for _, p := range d.pids { + if _, remove := toRemove[p]; !remove { + newPids = append(newPids, p) + continue + } + removedPIDs = append(removedPIDs, app.PID(p)) + } + d.pids = newPids + d.mu.Unlock() + d.notifyRemoved(removedPIDs) +} + +func (d *DynamicPIDSelector) notifyRemoved(removedPIDs []app.PID) { + if len(removedPIDs) == 0 { + return + } + select { + case d.removedCh <- removedPIDs: + default: + // no receiver (e.g. matcher not running); drop so RemovePIDs never blocks + } +} + +// AsSelector returns a services.Selector that matches when the process PID is in this dynamic set. +// The matcher uses it to treat runtime PIDs as a supplement to config criteria. +func (d *DynamicPIDSelector) AsSelector() services.Selector { + return &dynamicPIDCriteriaAdapter{d: d} +} + +// dynamicPIDCriteriaAdapter implements services.Selector by delegating only GetPIDs to the +// DynamicPIDSelector; all other methods return empty/zero so the matcher treats "PID in dynamic set" +// as a match. +type dynamicPIDCriteriaAdapter struct { + d *DynamicPIDSelector +} + +func (a *dynamicPIDCriteriaAdapter) GetName() string { return "" } +func (a *dynamicPIDCriteriaAdapter) GetNamespace() string { return "" } +func (a *dynamicPIDCriteriaAdapter) GetPath() services.StringMatcher { return &emptyMatcher{} } +func (a *dynamicPIDCriteriaAdapter) GetPathRegexp() services.StringMatcher { return &emptyMatcher{} } +func (a *dynamicPIDCriteriaAdapter) GetOpenPorts() *services.IntEnum { return &services.IntEnum{} } +func (a *dynamicPIDCriteriaAdapter) GetLanguages() services.StringMatcher { return &emptyMatcher{} } +func (a *dynamicPIDCriteriaAdapter) GetPIDs() ([]app.PID, bool) { return a.d.GetPIDs() } +func (a *dynamicPIDCriteriaAdapter) GetCmdArgs() services.StringMatcher { return &emptyMatcher{} } +func (a *dynamicPIDCriteriaAdapter) IsContainersOnly() bool { return false } +func (a *dynamicPIDCriteriaAdapter) RangeMetadata() iter.Seq2[string, services.StringMatcher] { + return emptyMetadataSeq2 +} + +func (a *dynamicPIDCriteriaAdapter) RangePodLabels() iter.Seq2[string, services.StringMatcher] { + return emptyMetadataSeq2 +} + +func (a *dynamicPIDCriteriaAdapter) RangePodAnnotations() iter.Seq2[string, services.StringMatcher] { + return emptyMetadataSeq2 +} + +func (a *dynamicPIDCriteriaAdapter) GetExportModes() services.ExportModes { + return services.ExportModeUnset +} + +func (a *dynamicPIDCriteriaAdapter) GetSamplerConfig() *services.SamplerConfig { return nil } +func (a *dynamicPIDCriteriaAdapter) GetRoutesConfig() *services.CustomRoutesConfig { return nil } +func (a *dynamicPIDCriteriaAdapter) MetricsConfig() perapp.SvcMetricsConfig { + return perapp.SvcMetricsConfig{} +} + +type emptyMatcher struct{} + +func (emptyMatcher) IsSet() bool { return false } +func (emptyMatcher) MatchString(_ string) bool { return false } + +func emptyMetadataSeq2(_ func(string, services.StringMatcher) bool) {} diff --git a/pkg/appolly/discover/dynamic_pid_selector_test.go b/pkg/appolly/discover/dynamic_pid_selector_test.go new file mode 100644 index 0000000000..4e02322d46 --- /dev/null +++ b/pkg/appolly/discover/dynamic_pid_selector_test.go @@ -0,0 +1,54 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/obi/pkg/appolly/app" +) + +func TestDynamicPIDSelector_AddPIDs_RemovePIDs_GetPIDs(t *testing.T) { + d := NewDynamicPIDSelector() + pids, ok := d.GetPIDs() + assert.False(t, ok) + assert.Nil(t, pids) + + d.AddPIDs(1, 2, 3) + pids, ok = d.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 2, 3}, pids) + + d.AddPIDs(2, 3, 4) + pids, ok = d.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 2, 3, 4}, pids) + + d.RemovePIDs(2, 4) + pids, ok = d.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 3}, pids) + + d.RemovePIDs(1, 3) + pids, ok = d.GetPIDs() + assert.False(t, ok) + assert.Nil(t, pids) +} + +func TestDynamicPIDSelector_RemovePIDs_Notify(t *testing.T) { + d := NewDynamicPIDSelector() + d.AddPIDs(42, 100) + ch := d.RemovedNotify() + + d.RemovePIDs(100) + got := <-ch + assert.Equal(t, []app.PID{100}, got) + + d.RemovePIDs(42) + got = <-ch + assert.Equal(t, []app.PID{42}, got) +} diff --git a/pkg/appolly/discover/finder.go b/pkg/appolly/discover/finder.go index d2366cf7a7..ad4e91d9c1 100644 --- a/pkg/appolly/discover/finder.go +++ b/pkg/appolly/discover/finder.go @@ -7,9 +7,7 @@ import ( "context" "fmt" - "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" - "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/ebpf" ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" "go.opentelemetry.io/obi/pkg/export/imetrics" @@ -43,9 +41,8 @@ func NewProcessFinder( } type processFinderStartConfig struct { - enrichedProcessEvents *msg.Queue[[]Event[ProcessAttrs]] - pidSelector services.Selector - pidSelectorChangeNotify <-chan []app.PID + enrichedProcessEvents *msg.Queue[[]Event[ProcessAttrs]] + dynamicPIDSelector *DynamicPIDSelector } // ProcessFinderStartOpt allows overriding some internal behavior of ProcessFinder.Start method. @@ -61,20 +58,12 @@ func WithEnrichedProcessEvents(enrichedProcessEvents *msg.Queue[[]Event[ProcessA } } -// WithPIDSelector supplies an additional selector for PID-based discovery. When non-nil, it is prepended -// to the criteria from config (e.g. Instrumenter passes a GlobAttributes; use AddPIDs/RemovePIDs for runtime updates). -func WithPIDSelector(selector services.Selector) ProcessFinderStartOpt { +// WithDynamicPIDSelector supplies the OBI dynamic PID set. Caller can pass discover.NewDynamicPIDSelector() +// and add PIDs via the selector. When non-nil, the finder wires it to the matcher and removed-PID +// notifications are used for synthetic deletes. +func WithDynamicPIDSelector(selector *DynamicPIDSelector) ProcessFinderStartOpt { return func(cfg *processFinderStartConfig) { - cfg.pidSelector = selector - } -} - -// WithPIDSelectorChangeNotifier provides a channel the matcher listens to; when the caller sends the -// removed PIDs on it, the matcher emits targeted synthetic deletes so the attacher uninstruments them -// immediately instead of rescanning all tracked PIDs. -func WithPIDSelectorChangeNotifier(ch <-chan []app.PID) ProcessFinderStartOpt { - return func(cfg *processFinderStartConfig) { - cfg.pidSelectorChangeNotify = ch + cfg.dynamicPIDSelector = selector } } @@ -88,10 +77,12 @@ func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOp tracerEvents := msgh.QueueFromConfig[Event[*ebpf.Instrumentable]](pf.cfg, "tracerEvents") + configCriteria := FindingCriteria(pf.cfg, startConfig.dynamicPIDSelector != nil) + swi := swarm.Instancer{} processEvents := msgh.QueueFromConfig[[]Event[ProcessAttrs]](pf.cfg, "processEvents") - swi.Add(swarm.DirectInstance(ProcessWatcherFunc(pf.cfg, pf.ebpfEventContext, processEvents, startConfig.pidSelector)), + swi.Add(swarm.DirectInstance(ProcessWatcherFunc(pf.cfg, pf.ebpfEventContext, processEvents, configCriteria)), swarm.WithID("ProcessWatcher")) kubeEnrichedEvents := msgh.QueueFromConfig[[]Event[ProcessAttrs]](pf.cfg, "kubeEnrichedEvents") @@ -117,7 +108,7 @@ func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOp ), swarm.WithID("LanguageDecoratorProvider")) criteriaFilteredEvents := msgh.QueueFromConfig[[]Event[ProcessMatch]](pf.cfg, "criteriaFilteredEvents") - swi.Add(criteriaMatcherProvider(pf.cfg, langEnrichedEvents, criteriaFilteredEvents, startConfig.pidSelector, startConfig.pidSelectorChangeNotify), + swi.Add(criteriaMatcherProvider(pf.cfg, langEnrichedEvents, criteriaFilteredEvents, configCriteria, startConfig.dynamicPIDSelector), swarm.WithID("CriteriaMatcher")) executableTypes := msgh.QueueFromConfig[[]Event[ebpf.Instrumentable]](pf.cfg, "executableTypes") diff --git a/pkg/appolly/discover/matcher.go b/pkg/appolly/discover/matcher.go index 9e2cbb84be..723c969bf7 100644 --- a/pkg/appolly/discover/matcher.go +++ b/pkg/appolly/discover/matcher.go @@ -30,20 +30,23 @@ var ( ) // criteriaMatcherProvider filters the processes that match the discovery criteria. -// If pidSelectorChangeNotify is non-nil, the matcher listens to removed-PID notifications -// and runs synthetic deletes only when notified, not on every batch. +// When dynamicSelector is non-nil, runtime PIDs supplement config criteria and the matcher +// listens to the selector's RemovedNotify() channel for synthetic deletes. func criteriaMatcherProvider( cfg *obi.Config, input *msg.Queue[[]Event[ProcessAttrs]], output *msg.Queue[[]Event[ProcessMatch]], - pidSelector services.Selector, - pidSelectorChangeNotify <-chan []app.PID, + configCriteria []services.Selector, + dynamicSelector *DynamicPIDSelector, ) swarm.InstanceFunc { instrumenterNamespace, _ := namespaceFetcherFunc(app.PID(osPidFunc())) - criteria := FindingCriteria(cfg, pidSelector) + var removedNotify <-chan []app.PID + if dynamicSelector != nil { + removedNotify = dynamicSelector.RemovedNotify() + } m := &Matcher{ Log: slog.With("component", "discover.CriteriaMatcher"), - Criteria: criteria, + Criteria: configCriteria, ExcludeCriteria: ExcludingCriteria(cfg), LogEnricherCriteria: LogEnricherFindingCriteria(cfg), ProcessHistory: map[app.PID]ProcessMatch{}, @@ -51,7 +54,8 @@ func criteriaMatcherProvider( Output: output, Namespace: instrumenterNamespace, HasHostPidAccess: hasHostPidAccess(), - RemovedPIDsNotify: pidSelectorChangeNotify, + DynamicPIDs: dynamicSelector, + RemovedPIDsNotify: removedNotify, } return swarm.DirectInstance(m.Run) } @@ -71,6 +75,8 @@ type Matcher struct { Output *msg.Queue[[]Event[ProcessMatch]] Namespace string HasHostPidAccess bool + // DynamicPIDs, when set, supplements config criteria: a process also matches if its PID is in this set. + DynamicPIDs *DynamicPIDSelector // RemovedPIDsNotify, when set, carries the PIDs removed from the dynamic selector so the // matcher can emit targeted synthetic deletes without rescanning ProcessHistory. RemovedPIDsNotify <-chan []app.PID @@ -174,10 +180,14 @@ func (m *Matcher) alreadyMatched(pid app.PID) bool { } func (m *Matcher) matchCriteria(obj ProcessAttrs, proc *services.ProcessInfo) *ProcessMatch { - criteria := make([]services.Selector, 0, len(m.Criteria)) - for i := range m.Criteria { - if m.matchProcess(&obj, proc, m.Criteria[i]) && !m.isExcluded(&obj, proc) { - criteria = append(criteria, m.Criteria[i]) + effectiveCriteria := m.Criteria + if m.DynamicPIDs != nil { + effectiveCriteria = append([]services.Selector{m.DynamicPIDs.AsSelector()}, m.Criteria...) + } + criteria := make([]services.Selector, 0, len(effectiveCriteria)) + for i := range effectiveCriteria { + if m.matchProcess(&obj, proc, effectiveCriteria[i]) && !m.isExcluded(&obj, proc) { + criteria = append(criteria, effectiveCriteria[i]) } } @@ -433,31 +443,14 @@ func LogEnricherFindingCriteria(cfg *obi.Config) []services.Selector { return selectors } -// FindingCriteria returns discovery criteria from config. When pidSelector is non-nil -// and has PIDs (e.g. Instrumenter's GlobAttributes with target_pids or runtime AddPIDs), -// it is the only criterion: config target PIDs are prefilled into it and it is returned alone. -// When pidSelector is nil or has no PIDs (standalone config with discovery.instrument, no target_pids), -// config-based criteria (discovery.instrument, etc.) are used so processes are still discovered. -func FindingCriteria(cfg *obi.Config, pidSelector services.Selector) []services.Selector { +// FindingCriteria returns discovery criteria from config. When skipTargetPIDs is true +// (e.g. Instrumenter with DynamicPIDSelector), target_pids are not included here; the matcher +// uses the dynamic selector as a supplement. When skipTargetPIDs is false, target_pids from +// config are included as static criteria when set. +func FindingCriteria(cfg *obi.Config, skipTargetPIDs bool) []services.Selector { logDeprecationAndConflicts(cfg) - if pidSelector != nil { - if cfg.TargetPIDs.Len() > 0 { - vals := cfg.TargetPIDs.AllValues() - pids := make([]uint32, 0, len(vals)) - for _, v := range vals { - pids = append(pids, uint32(v)) - } - pidSelector.AddPIDs(pids...) - } - // Use pidSelector as the only criterion only when it has effective PID criteria. - // Otherwise (e.g. Instrumenter started with no target_pids) use config-based discovery. - if pids, ok := pidSelector.GetPIDs(); ok && len(pids) > 0 { - return []services.Selector{pidSelector} - } - } - - if cfg.TargetPIDs.Len() > 0 { + if !skipTargetPIDs && cfg.TargetPIDs.Len() > 0 { vals := cfg.TargetPIDs.AllValues() pids := make([]uint32, 0, len(vals)) for _, v := range vals { diff --git a/pkg/appolly/discover/matcher_test.go b/pkg/appolly/discover/matcher_test.go index 2c625b0d9e..f3345a7ad4 100644 --- a/pkg/appolly/discover/matcher_test.go +++ b/pkg/appolly/discover/matcher_test.go @@ -52,7 +52,7 @@ func TestCriteriaMatcher(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -103,7 +103,7 @@ func TestCriteriaMatcherLanguage(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -153,7 +153,7 @@ func TestCriteriaMatcher_Exclude(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -194,7 +194,7 @@ func TestCriteriaMatcher_Exclude_Metadata(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -242,7 +242,7 @@ func TestCriteriaMatcher_MustMatchAllAttributes(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -297,7 +297,7 @@ func TestCriteriaMatcherMissingPort(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -356,7 +356,7 @@ func TestCriteriaMatcherContainersOnly(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -495,7 +495,7 @@ func TestInstrumentation_CoexistingWithDeprecatedServices(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&tc.cfg, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&tc.cfg, discoveredProcesses, filteredProcessesQu, FindingCriteria(&tc.cfg, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -530,7 +530,7 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -561,7 +561,7 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -588,28 +588,22 @@ func TestCriteriaMatcher_TargetPIDs(t *testing.T) { } func TestCriteriaMatcher_DynamicTargetPIDs(t *testing.T) { - // Use a GlobAttributes as pidSelector and update its PIDs at runtime; matcher should see updated list. pipeConfig := obi.Config{ServiceName: "dyn-svc", ServiceNamespace: "ns"} - pidSelector := &services.GlobAttributes{ - Name: "dyn-svc", - Namespace: "ns", - PIDs: []uint32{42}, - } + dynamicSelector := NewDynamicPIDSelector() + dynamicSelector.AddPIDs(42) + configCriteria := FindingCriteria(&pipeConfig, true) discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) filteredProcesses := filteredProcessesQu.Subscribe() - // Set processInfo before starting the matcher so it uses the fake; otherwise the matcher - // may call the default (real) processInfo for PID 42 and fail when that process does not exist. processInfo = func(pp ProcessAttrs) (*services.ProcessInfo, error) { return &services.ProcessInfo{Pid: pp.pid, ExePath: "/any/exe", OpenPorts: pp.openPorts}, nil } - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, pidSelector, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, configCriteria, dynamicSelector)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() - // First batch: only PID 42 is in selector, so only 42 matches discoveredProcesses.Send([]Event[ProcessAttrs]{ {Type: EventCreated, Obj: ProcessAttrs{pid: 42, openPorts: []uint32{}}}, {Type: EventCreated, Obj: ProcessAttrs{pid: 100, openPorts: []uint32{}}}, @@ -618,10 +612,7 @@ func TestCriteriaMatcher_DynamicTargetPIDs(t *testing.T) { require.Len(t, matches, 1) assert.Equal(t, app.PID(42), matches[0].Obj.Process.Pid) - // Add PID 100 at runtime (e.g. AddTargetPIDs(100)) - pidSelector.AddPIDs(100) - - // Second batch: 100 is now in selector, so 100 matches + dynamicSelector.AddPIDs(100) discoveredProcesses.Send([]Event[ProcessAttrs]{ {Type: EventCreated, Obj: ProcessAttrs{pid: 100, openPorts: []uint32{}}}, }) @@ -629,23 +620,28 @@ func TestCriteriaMatcher_DynamicTargetPIDs(t *testing.T) { require.Len(t, matches, 1) assert.Equal(t, app.PID(100), matches[0].Obj.Process.Pid) - // Remove 100; only 42 remains in selector. Sending 100 again should not match (no event). - pidSelector.RemovePIDs(100) + dynamicSelector.RemovePIDs(100) + // Matcher sends synthetic EventDeleted for removed PIDs; drain it before asserting no re-match. + deletes := testutil.ReadChannel(t, filteredProcesses, testTimeout) + require.Len(t, deletes, 1) + assert.Equal(t, EventDeleted, deletes[0].Type) + assert.Equal(t, app.PID(100), deletes[0].Obj.Process.Pid) + discoveredProcesses.Send([]Event[ProcessAttrs]{ {Type: EventCreated, Obj: ProcessAttrs{pid: 100, openPorts: []uint32{}}}, }) testutil.ChannelEmpty(t, filteredProcesses, 100*time.Millisecond) + + // Stop matcher so next test does not race on global processInfo (close input, drain output). + discoveredProcesses.Close() + testutil.DrainUntilClosed(filteredProcesses) } func TestCriteriaMatcher_DynamicTargetPIDs_RemoveNotification(t *testing.T) { pipeConfig := obi.Config{ServiceName: "dyn-svc", ServiceNamespace: "ns"} - pidSelector := &services.GlobAttributes{ - Name: "dyn-svc", - Namespace: "ns", - PIDs: []uint32{42, 100}, - } - removedPIDs := make(chan []app.PID, 2) - pidSelector.SetPIDsChangeNotify(removedPIDs) + dynamicSelector := NewDynamicPIDSelector() + dynamicSelector.AddPIDs(42, 100) + configCriteria := FindingCriteria(&pipeConfig, true) discoveredProcesses := msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(10)) filteredProcessesQu := msg.NewQueue[[]Event[ProcessMatch]](msg.ChannelBufferLen(10)) @@ -653,7 +649,7 @@ func TestCriteriaMatcher_DynamicTargetPIDs_RemoveNotification(t *testing.T) { processInfo = func(pp ProcessAttrs) (*services.ProcessInfo, error) { return &services.ProcessInfo{Pid: pp.pid, ExePath: "/any/exe", OpenPorts: pp.openPorts}, nil } - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, pidSelector, removedPIDs)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, configCriteria, dynamicSelector)(t.Context()) require.NoError(t, err) go matcherFunc(t.Context()) defer filteredProcessesQu.Close() @@ -665,17 +661,21 @@ func TestCriteriaMatcher_DynamicTargetPIDs_RemoveNotification(t *testing.T) { matches := testutil.ReadChannel(t, filteredProcesses, testTimeout) require.Len(t, matches, 2) - pidSelector.RemovePIDs(100) + dynamicSelector.RemovePIDs(100) matches = testutil.ReadChannel(t, filteredProcesses, testTimeout) require.Len(t, matches, 1) assert.Equal(t, EventDeleted, matches[0].Type) assert.Equal(t, app.PID(100), matches[0].Obj.Process.Pid) - pidSelector.RemovePIDs(42) + dynamicSelector.RemovePIDs(42) matches = testutil.ReadChannel(t, filteredProcesses, testTimeout) require.Len(t, matches, 1) assert.Equal(t, EventDeleted, matches[0].Type) assert.Equal(t, app.PID(42), matches[0].Obj.Process.Pid) + + // Stop matcher so next test does not race on global processInfo (close input, drain output). + discoveredProcesses.Close() + testutil.DrainUntilClosed(filteredProcesses) } func TestCriteriaMatcher_Granular(t *testing.T) { @@ -702,7 +702,7 @@ func TestCriteriaMatcher_Granular(t *testing.T) { filteredProcesses := filteredProcessesQu.Subscribe() - matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, nil, nil)(t.Context()) + matcherFunc, err := criteriaMatcherProvider(&pipeConfig, discoveredProcesses, filteredProcessesQu, FindingCriteria(&pipeConfig, false), nil)(t.Context()) require.NoError(t, err) diff --git a/pkg/appolly/discover/typer_test.go b/pkg/appolly/discover/typer_test.go index e7d23bb6bd..6241a87b76 100644 --- a/pkg/appolly/discover/typer_test.go +++ b/pkg/appolly/discover/typer_test.go @@ -37,8 +37,6 @@ func (d dummyCriterion) IsContainersOnly() bool func (d dummyCriterion) GetPathRegexp() services.StringMatcher { return nil } func (d dummyCriterion) GetCmdArgs() services.StringMatcher { return nil } func (d dummyCriterion) GetPIDs() ([]app.PID, bool) { return nil, false } -func (d dummyCriterion) AddPIDs(_ ...uint32) {} -func (d dummyCriterion) RemovePIDs(_ ...uint32) {} func (d dummyCriterion) GetNamespace() string { return d.namespace } func (d dummyCriterion) GetExportModes() services.ExportModes { return d.export } func (d dummyCriterion) GetSamplerConfig() *services.SamplerConfig { return d.sampler } diff --git a/pkg/appolly/discover/watcher_kube_test.go b/pkg/appolly/discover/watcher_kube_test.go index cf6a8dfb47..800f8ed8d0 100644 --- a/pkg/appolly/discover/watcher_kube_test.go +++ b/pkg/appolly/discover/watcher_kube_test.go @@ -164,7 +164,7 @@ func TestWatcherKubeEnricherWithMatcher(t *testing.T) { version: "v[0-9]*" `), &pipeConfig)) - swi.Add(criteriaMatcherProvider(&pipeConfig, connectQueue, outputQueue, nil, nil)) + swi.Add(criteriaMatcherProvider(&pipeConfig, connectQueue, outputQueue, FindingCriteria(&pipeConfig, false), nil)) nodesRunner, err := swi.Instance(t.Context()) require.NoError(t, err) diff --git a/pkg/appolly/discover/watcher_proc.go b/pkg/appolly/discover/watcher_proc.go index 271c152842..0987f839d4 100644 --- a/pkg/appolly/discover/watcher_proc.go +++ b/pkg/appolly/discover/watcher_proc.go @@ -68,7 +68,7 @@ func wplog() *slog.Logger { // ProcessWatcherFunc polls every PollInterval for new processes and forwards either new or deleted process PIDs // as well as PIDs from processes that setup a new connection. -func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContext, output *msg.Queue[[]Event[ProcessAttrs]], pidSelector services.Selector) swarm.RunFunc { +func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContext, output *msg.Queue[[]Event[ProcessAttrs]], findingCriteria []services.Selector) swarm.RunFunc { acc := pollAccounter{ cfg: cfg, output: output, @@ -82,7 +82,7 @@ func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContex fetchPorts: true, // must be true until we've activated the bpf watcher component bpfWatcherEnabled: false, // async set by listening on the bpfWatchEvents channel stateMux: sync.Mutex{}, - findingCriteria: FindingCriteria(cfg, pidSelector), + findingCriteria: findingCriteria, ebpfContext: ebpfContext, } if acc.interval == 0 { diff --git a/pkg/appolly/discover/watcher_proc_test.go b/pkg/appolly/discover/watcher_proc_test.go index eb92584953..39c24c9d3e 100644 --- a/pkg/appolly/discover/watcher_proc_test.go +++ b/pkg/appolly/discover/watcher_proc_test.go @@ -202,7 +202,7 @@ func TestPortsFetchRequired(t *testing.T) { stateMux: sync.Mutex{}, bpfWatcherEnabled: false, fetchPorts: true, - findingCriteria: FindingCriteria(cfg, nil), + findingCriteria: FindingCriteria(cfg, false), output: msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(1)), } @@ -300,7 +300,7 @@ func TestMinProcessAge(t *testing.T) { stateMux: sync.Mutex{}, bpfWatcherEnabled: false, fetchPorts: true, - findingCriteria: FindingCriteria(cfg, nil), + findingCriteria: FindingCriteria(cfg, false), output: msg.NewQueue[[]Event[ProcessAttrs]](msg.ChannelBufferLen(1)), } diff --git a/pkg/appolly/instrumenter.go b/pkg/appolly/instrumenter.go index 0543f2b3e5..1c4427a7d7 100644 --- a/pkg/appolly/instrumenter.go +++ b/pkg/appolly/instrumenter.go @@ -251,11 +251,11 @@ func spanPtrPromGetters(cfg *obi.Config) attributes.NamedGetters[request.Span, s // Then they would be used or not for each service, based on the per-service features-. func joinMetricsConfig(cfg *obi.Config) *perapp.MetricsConfig { mc := cfg.Metrics - for i := range cfg.Discovery.Instrument { - mc.Features |= cfg.Discovery.Instrument[i].Metrics.Features + for _, d := range cfg.Discovery.Instrument { + mc.Features |= d.Metrics.Features } - for i := range cfg.Discovery.Services { - mc.Features |= cfg.Discovery.Services[i].Metrics.Features + for _, d := range cfg.Discovery.Services { + mc.Features |= d.Metrics.Features } return &mc } diff --git a/pkg/appolly/services/attr_glob.go b/pkg/appolly/services/attr_glob.go index e209f969db..0ac9ded1ed 100644 --- a/pkg/appolly/services/attr_glob.go +++ b/pkg/appolly/services/attr_glob.go @@ -6,7 +6,6 @@ package services // import "go.opentelemetry.io/obi/pkg/appolly/services" import ( "fmt" "iter" - "sync" "github.com/gobwas/glob" "github.com/invopop/jsonschema" @@ -92,13 +91,8 @@ type GlobAttributes struct { // programming language they are written in. Use lowercase names, e.g. java,go Languages GlobAttr `yaml:"languages"` - // PIDs allows selecting processes by PID. When non-empty, the process PID must be in this list (in addition to any path/port criteria). - // Updated at runtime via AddPIDs/RemovePIDs; pidsMu protects concurrent access. - PIDs []uint32 `yaml:"target_pids"` - pidsMu sync.RWMutex - // pidsChangeNotify, when set, receives the PIDs actually removed from the selector so the matcher can - // uninstrument them without rescanning all tracked PIDs. - pidsChangeNotify chan<- []app.PID + // PIDs allows selecting processes by PID (static from config). When non-empty, the process PID must be in this list. + PIDs []uint32 `yaml:"target_pids"` // Path allows defining the regular expression matching the full executable path. Path GlobAttr `yaml:"exe_path"` @@ -246,8 +240,6 @@ func (ga *GlobAttributes) GetSamplerConfig() *SamplerConfig { return ga.SamplerC func (ga *GlobAttributes) GetRoutesConfig() *CustomRoutesConfig { return ga.Routes } func (ga *GlobAttributes) pids() ([]app.PID, bool) { - ga.pidsMu.RLock() - defer ga.pidsMu.RUnlock() if len(ga.PIDs) == 0 { return nil, false } @@ -258,68 +250,6 @@ func (ga *GlobAttributes) pids() ([]app.PID, bool) { return out, true } -// AddPIDs adds PIDs to the selector's list (for runtime add; thread-safe with GetPIDs). -func (ga *GlobAttributes) AddPIDs(pids ...uint32) { - if len(pids) == 0 { - return - } - ga.pidsMu.Lock() - defer ga.pidsMu.Unlock() - existing := make(map[uint32]struct{}, len(ga.PIDs)) - for _, p := range ga.PIDs { - existing[p] = struct{}{} - } - for _, u := range pids { - if _, ok := existing[u]; !ok { - existing[u] = struct{}{} - ga.PIDs = append(ga.PIDs, u) - } - } -} - -// SetPIDsChangeNotify sets the channel to notify with the PIDs removed from the selector. -// Used by the discover matcher to emit targeted synthetic deletes when the PID list changes. -func (ga *GlobAttributes) SetPIDsChangeNotify(ch chan<- []app.PID) { - ga.pidsChangeNotify = ch -} - -func (ga *GlobAttributes) notifyPIDsChanged(removedPIDs []app.PID) { - if ga.pidsChangeNotify == nil || len(removedPIDs) == 0 { - return - } - // This channel is edge-based: it carries the exact removal that just happened. - // That avoids the matcher doing a level-based recompute by scanning all tracked - // PIDs against the selector's current state after every change. - // - // Because the payload is the removal event itself, dropping sends would lose - // information. We therefore use a blocking send instead of a best-effort wakeup. - ga.pidsChangeNotify <- removedPIDs -} - -// RemovePIDs removes PIDs from the selector's list (for runtime remove; thread-safe with GetPIDs). -func (ga *GlobAttributes) RemovePIDs(pids ...uint32) { - if len(pids) == 0 { - return - } - toRemove := make(map[uint32]struct{}) - for _, u := range pids { - toRemove[u] = struct{}{} - } - ga.pidsMu.Lock() - newPids := ga.PIDs[:0] - removedPIDs := make([]app.PID, 0, len(pids)) - for _, p := range ga.PIDs { - if _, remove := toRemove[p]; !remove { - newPids = append(newPids, p) - continue - } - removedPIDs = append(removedPIDs, app.PID(p)) - } - ga.PIDs = newPids - ga.pidsMu.Unlock() - ga.notifyPIDsChanged(removedPIDs) -} - type nilMatcher struct{} func (n nilMatcher) IsSet() bool { return false } diff --git a/pkg/appolly/services/attr_glob_test.go b/pkg/appolly/services/attr_glob_test.go deleted file mode 100644 index 708c0a6098..0000000000 --- a/pkg/appolly/services/attr_glob_test.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package services - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "go.opentelemetry.io/obi/pkg/appolly/app" -) - -func TestGlobAttributes_AddPIDs_RemovePIDs_GetPIDs(t *testing.T) { - ga := &GlobAttributes{Name: "svc", Namespace: "ns"} - - // Initially empty - pids, ok := ga.GetPIDs() - assert.False(t, ok) - assert.Nil(t, pids) - - // Add PIDs - ga.AddPIDs(1, 2, 3) - pids, ok = ga.GetPIDs() - require.True(t, ok) - assert.Equal(t, []app.PID{1, 2, 3}, pids) - - // Add duplicates and new: no duplicates added - ga.AddPIDs(2, 3, 4) - pids, ok = ga.GetPIDs() - require.True(t, ok) - assert.Equal(t, []app.PID{1, 2, 3, 4}, pids) - - // Remove PIDs - ga.RemovePIDs(2, 4) - pids, ok = ga.GetPIDs() - require.True(t, ok) - assert.Equal(t, []app.PID{1, 3}, pids) - - // Remove all - ga.RemovePIDs(1, 3) - pids, ok = ga.GetPIDs() - assert.False(t, ok) - assert.Nil(t, pids) - - // Add after empty - ga.AddPIDs(42) - pids, ok = ga.GetPIDs() - require.True(t, ok) - assert.Equal(t, []app.PID{42}, pids) -} - -func TestGlobAttributes_AddPIDs_RemovePIDs_emptyNoOp(t *testing.T) { - ga := &GlobAttributes{PIDs: []uint32{1, 2, 3}} - - // No-op when called with no args - ga.AddPIDs() - ga.RemovePIDs() - - pids, ok := ga.GetPIDs() - require.True(t, ok) - assert.Equal(t, []app.PID{1, 2, 3}, pids) -} - -func TestGlobAttributes_RemovePIDs_NotifyRemovedPIDs(t *testing.T) { - ga := &GlobAttributes{PIDs: []uint32{1, 2, 3}} - removedPIDs := make(chan []app.PID, 1) - ga.SetPIDsChangeNotify(removedPIDs) - - ga.AddPIDs(4) - select { - case got := <-removedPIDs: - t.Fatalf("unexpected add notification: %v", got) - default: - } - - ga.RemovePIDs(2, 4, 99) - - select { - case got := <-removedPIDs: - assert.Equal(t, []app.PID{2, 4}, got) - default: - t.Fatal("expected removed PID notification") - } - - ga.RemovePIDs(99) - select { - case got := <-removedPIDs: - t.Fatalf("unexpected remove notification for missing pid: %v", got) - default: - } -} diff --git a/pkg/appolly/services/attr_regex.go b/pkg/appolly/services/attr_regex.go index df6c5cdc74..1136280d51 100644 --- a/pkg/appolly/services/attr_regex.go +++ b/pkg/appolly/services/attr_regex.go @@ -202,8 +202,6 @@ func (a *RegexSelector) GetPathRegexp() StringMatcher { return &a.Path func (a *RegexSelector) GetOpenPorts() *IntEnum { return &a.OpenPorts } func (a *RegexSelector) GetPIDs() ([]app.PID, bool) { return a.pids() } func (a *RegexSelector) GetCmdArgs() StringMatcher { return &a.CmdArgs } -func (a *RegexSelector) AddPIDs(_ ...uint32) {} -func (a *RegexSelector) RemovePIDs(_ ...uint32) {} func (a *RegexSelector) IsContainersOnly() bool { return a.ContainersOnly } func (a *RegexSelector) MetricsConfig() perapp.SvcMetricsConfig { return a.Metrics } func (a *RegexSelector) RangeMetadata() iter.Seq2[string, StringMatcher] { diff --git a/pkg/appolly/services/criteria.go b/pkg/appolly/services/criteria.go index 85ed09fd9e..6f036a3e01 100644 --- a/pkg/appolly/services/criteria.go +++ b/pkg/appolly/services/criteria.go @@ -171,10 +171,6 @@ type Selector interface { GetLanguages() StringMatcher // GetPIDs returns the list of target PIDs and true when this selector has PID criteria (analogous to OpenPorts). GetPIDs() ([]app.PID, bool) - // AddPIDs adds PIDs to the selector's list; no-op for selectors that do not support dynamic PIDs. - AddPIDs(pids ...uint32) - // RemovePIDs removes PIDs from the selector's list; no-op for selectors that do not support dynamic PIDs. - RemovePIDs(pids ...uint32) GetCmdArgs() StringMatcher IsContainersOnly() bool RangeMetadata() iter.Seq2[string, StringMatcher] diff --git a/pkg/instrumenter/instrumenter.go b/pkg/instrumenter/instrumenter.go index 3548872e9b..7a96d389f5 100644 --- a/pkg/instrumenter/instrumenter.go +++ b/pkg/instrumenter/instrumenter.go @@ -87,10 +87,6 @@ func setupAppO11y(ctx context.Context, ctxInfo *global.ContextInfo, config *obi. slog.Debug("can't create new instrumenter", "error", err) return fmt.Errorf("can't create new instrumenter: %w", err) } - // Notify callers that requested it (e.g. WithTargetPIDsUpdater); they receive the App O11y instrumenter. - if ctxInfo.AppO11y.OnTargetPIDsUpdaterReady != nil { - ctxInfo.AppO11y.OnTargetPIDsUpdaterReady(instr) - } if err := instr.FindAndInstrument(ctx); err != nil { slog.Debug("can't find target process", "error", err) diff --git a/pkg/instrumenter/instrumenter_test.go b/pkg/instrumenter/instrumenter_test.go index 1093f41e13..4b7f3bd9ab 100644 --- a/pkg/instrumenter/instrumenter_test.go +++ b/pkg/instrumenter/instrumenter_test.go @@ -5,7 +5,6 @@ package instrumenter import ( "context" - "sync" "testing" "time" @@ -13,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/obi/pkg/appolly/app" + "go.opentelemetry.io/obi/pkg/appolly/discover" "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/export/otel/otelcfg" "go.opentelemetry.io/obi/pkg/obi" @@ -46,55 +46,14 @@ func TestServiceNameTemplate(t *testing.T) { assert.Nil(t, temp) } -// TestRun_WithTargetPIDsUpdater_OnlyDynamicPIDSelector verifies that when the instrumenter -// is initialized with a dynamic PID selector (via WithTargetPIDsUpdater), that selector is -// the only discovery criterion—config-based criteria (e.g. discovery.instrument) are not -// used. The selector may be empty by default; callers add/remove PIDs via the updater. -func TestRun_WithTargetPIDsUpdater_OnlyDynamicPIDSelector(t *testing.T) { +// TestRun_WithDynamicPIDSelector verifies that when the caller passes a selector via +// WithDynamicPIDSelector, Run uses it and the caller can add/remove PIDs on it directly— +// no callback or reference to the instrumenter is needed. +func TestRun_WithDynamicPIDSelector(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Config has discovery.instrument set, but when using WithTargetPIDsUpdater the - // dynamic PID selector is the sole criterion (this config's Instrument is ignored). - cfg := &obi.Config{ - ChannelBufferLen: 1, - Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost:0"}, - Discovery: services.DiscoveryConfig{ - Instrument: services.GlobDefinitionCriteria{ - services.GlobAttributes{Name: "ignored", OpenPorts: services.IntEnum{Ranges: []services.IntRange{{Start: 8080}}}}, - }, - }, - } - require.True(t, cfg.Enabled(obi.FeatureAppO11y), "test config must enable App O11y") - - var callbackRan bool - opts := []Option{ - WithTargetPIDsUpdater(func(u TargetPIDsUpdater) { - require.NotNil(t, u, "caller should receive non-nil TargetPIDsUpdater when using dynamic PID selector") - // Selector is the only criterion; it may be empty. Caller controls targets via Add/Remove. - callbackRan = true - }), - } - - done := make(chan error, 1) - go func() { done <- Run(ctx, cfg, opts...) }() - - time.Sleep(2 * time.Second) - cancel() - <-done - - require.True(t, callbackRan, "WithTargetPIDsUpdater callback must run; discovery then uses only the dynamic PID selector (even if empty)") -} - -// TestRun_WithTargetPIDsUpdater_DynamicUpdate shows that a caller using the public API -// (Run + WithTargetPIDsUpdater) receives the instrumenter and can dynamically add/remove -// target PIDs at runtime. Run is started with a cancelable context and stopped shortly -// after the callback runs so we don't run the full pipeline. -func TestRun_WithTargetPIDsUpdater_DynamicUpdate(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Minimal config: enable App O11y (discovery.instrument) and at least one exporter (traces endpoint). + sel := discover.NewDynamicPIDSelector() cfg := &obi.Config{ ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost:0"}, @@ -104,42 +63,20 @@ func TestRun_WithTargetPIDsUpdater_DynamicUpdate(t *testing.T) { }, }, } - // Ensure Enabled(FeatureAppO11y) is true (Discovery.Instrument is set) require.True(t, cfg.Enabled(obi.FeatureAppO11y), "test config must enable App O11y") - var ( - receivedUpdater bool - updaterMu sync.Mutex - ) - opts := []Option{ - WithTargetPIDsUpdater(func(u TargetPIDsUpdater) { - updaterMu.Lock() - defer updaterMu.Unlock() - require.NotNil(t, u, "caller should receive non-nil TargetPIDsUpdater") - // Demonstrate dynamic update: add and remove PIDs at runtime. - u.AddTargetPIDs(42, 100) - u.AddTargetPIDs(42, 200) // duplicate 42, new 200 - u.RemoveTargetPIDs(42) - u.RemoveTargetPIDs(999) // not present, no-op - assert.Equal(t, []app.PID{100, 200}, u.TargetPIDs()) - receivedUpdater = true - }), - } - + opts := []Option{WithDynamicPIDSelector(sel)} done := make(chan error, 1) go func() { done <- Run(ctx, cfg, opts...) }() - // Give setupAppO11y time to create the instrumenter and invoke OnTargetPIDsUpdaterReady. - // We don't need to wait for the full pipeline; just for the callback. time.Sleep(2 * time.Second) + sel.AddPIDs(42, 100) + sel.AddPIDs(42, 200) + sel.RemovePIDs(42) + sel.RemovePIDs(999) + pids, ok := sel.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{100, 200}, pids) cancel() - - err := <-done - // Run typically returns context.Canceled or similar after cancel. - require.Error(t, err) - - updaterMu.Lock() - got := receivedUpdater - updaterMu.Unlock() - require.True(t, got, "WithTargetPIDsUpdater callback should be invoked with the instrumenter; caller can then add/remove PIDs at runtime") + <-done } diff --git a/pkg/instrumenter/opts.go b/pkg/instrumenter/opts.go index 2dc99d89e8..f0f5f0e5a4 100644 --- a/pkg/instrumenter/opts.go +++ b/pkg/instrumenter/opts.go @@ -4,8 +4,8 @@ package instrumenter // import "go.opentelemetry.io/obi/pkg/instrumenter" import ( - "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" + "go.opentelemetry.io/obi/pkg/appolly/discover" "go.opentelemetry.io/obi/pkg/pipe/global" "go.opentelemetry.io/obi/pkg/pipe/msg" ) @@ -13,26 +13,12 @@ import ( // Option that override the instantiation of the instrumenter type Option func(info *global.ContextInfo) -// TargetPIDsUpdater is implemented by the default App O11y instrumenter. It allows adding, -// removing, and inspecting target PIDs at runtime. Works with any config; no initial -// target_pids required. -type TargetPIDsUpdater interface { - AddTargetPIDs(pids ...int) - RemoveTargetPIDs(pids ...int) - TargetPIDs() []app.PID -} - -// WithTargetPIDsUpdater calls the given function with a TargetPIDsUpdater after the App O11y -// instrumenter is created. Callers receive the App O11y instrumenter (as this interface); store it -// and call AddTargetPIDs/RemoveTargetPIDs at runtime to change which PIDs are instrumented, or -// TargetPIDs to inspect the currently tracked set. -func WithTargetPIDsUpdater(onReady func(TargetPIDsUpdater)) Option { +// WithDynamicPIDSelector passes the given dynamic PID selector into the App O11y pipeline. The caller +// creates it with discover.NewDynamicPIDSelector(), passes it here, and then calls AddPIDs/RemovePIDs/GetPIDs +// on it directly—no callback or reference to the instrumenter is needed. +func WithDynamicPIDSelector(sel *discover.DynamicPIDSelector) Option { return func(info *global.ContextInfo) { - info.AppO11y.OnTargetPIDsUpdaterReady = func(v any) { - if u, ok := v.(TargetPIDsUpdater); ok { - onReady(u) - } - } + info.AppO11y.DynamicPIDSelector = sel } } diff --git a/pkg/internal/appolly/appolly.go b/pkg/internal/appolly/appolly.go index 48a3fcea4a..511bd8b0fa 100644 --- a/pkg/internal/appolly/appolly.go +++ b/pkg/internal/appolly/appolly.go @@ -13,11 +13,9 @@ import ( "time" "go.opentelemetry.io/obi/pkg/appolly" - "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" "go.opentelemetry.io/obi/pkg/appolly/discover" "go.opentelemetry.io/obi/pkg/appolly/discover/exec" - "go.opentelemetry.io/obi/pkg/appolly/services" "go.opentelemetry.io/obi/pkg/appolly/traces" "go.opentelemetry.io/obi/pkg/ebpf" ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" @@ -53,10 +51,9 @@ type Instrumenter struct { // global data structures for all eBPF tracers ebpfEventContext *ebpfcommon.EBPFEventContext - // pidSelector is the PID criteria passed to discovery; this Instrumenter implements TargetPIDsUpdater - // (AddTargetPIDs/RemoveTargetPIDs) so callers who use WithTargetPIDsUpdater receive this instance. - pidSelector services.Selector // *services.GlobAttributes always set; PIDs from config or empty, updated at runtime via AddPIDs/RemovePIDs - finishers []finisher + // dynamicPIDSelector is the runtime PID set; from WithDynamicPIDSelector or created in New. Finder preloads from config. + dynamicPIDSelector *discover.DynamicPIDSelector + finishers []finisher } type finisher struct { @@ -101,28 +98,24 @@ func New(ctx context.Context, ctxInfo *global.ContextInfo, config *obi.Config) ( return nil, fmt.Errorf("can't instantiate instrumentation pipeline: %w", err) } - var initialPIDs []uint32 - if config.TargetPIDs.Len() > 0 { - initial := config.TargetPIDs.AllValues() - initialPIDs = make([]uint32, len(initial)) - for i, p := range initial { - initialPIDs[i] = uint32(p) + var sel *discover.DynamicPIDSelector + if v := ctxInfo.AppO11y.DynamicPIDSelector; v != nil { + if s, ok := v.(*discover.DynamicPIDSelector); ok { + sel = s } + // If v is not a *DynamicPIDSelector, sel stays nil and we use static config target_pids. } + // When sel is nil, finder gets nil: config target_pids are used as static criteria (FindingCriteria(cfg, false)). instr := &Instrumenter{ - config: config, - ctxInfo: ctxInfo, - tracersWg: &sync.WaitGroup{}, - tracesInput: tracesInput, - processEventInput: processEventsInput, - bp: bp, - peGraphBuilder: swi, - ebpfEventContext: ebpfcommon.NewEBPFEventContext(), - pidSelector: &services.GlobAttributes{ - Name: config.ServiceName, - Namespace: config.ServiceNamespace, - PIDs: initialPIDs, - }, + config: config, + ctxInfo: ctxInfo, + tracersWg: &sync.WaitGroup{}, + tracesInput: tracesInput, + processEventInput: processEventsInput, + bp: bp, + peGraphBuilder: swi, + ebpfEventContext: ebpfcommon.NewEBPFEventContext(), + dynamicPIDSelector: sel, } return instr, nil } @@ -133,13 +126,8 @@ func New(ctx context.Context, ctxInfo *global.ContextInfo, config *obi.Config) ( // This is: when the context is cancelled, it has unloaded all the eBPF probes. func (i *Instrumenter) FindAndInstrument(ctx context.Context) error { finder := discover.NewProcessFinder(i.config, i.ctxInfo, i.tracesInput, i.ebpfEventContext) - pidSelectorChangeNotify := make(chan []app.PID, 1) opts := []discover.ProcessFinderStartOpt{ - discover.WithPIDSelector(i.pidSelector), - discover.WithPIDSelectorChangeNotifier(pidSelectorChangeNotify), - } - if ga, ok := i.pidSelector.(*services.GlobAttributes); ok { - ga.SetPIDsChangeNotify(pidSelectorChangeNotify) + discover.WithDynamicPIDSelector(i.dynamicPIDSelector), } processEvents, err := finder.Start(ctx, opts...) if err != nil { @@ -280,42 +268,3 @@ func refreshK8sInformerCache(ctx context.Context, ctxInfo *global.ContextInfo) e func (i *Instrumenter) processEventsPipeline(ctx context.Context, graph *swarm.Runner) { graph.Start(ctx, swarm.WithCancelTimeout(i.config.ShutdownTimeout)) } - -// AddTargetPIDs adds PIDs to the set of target PIDs instrumented at runtime. -// Part of TargetPIDsUpdater; callers receive this Instrumenter via WithTargetPIDsUpdater. -// Works with any config; the PID selector picks up new PIDs on the next discovery poll. -func (i *Instrumenter) AddTargetPIDs(pids ...int) { - if len(pids) == 0 { - return - } - u32 := make([]uint32, len(pids)) - for j, p := range pids { - u32[j] = uint32(p) - } - i.pidSelector.AddPIDs(u32...) -} - -// TargetPIDs returns the currently tracked target PIDs as a copy. -// Part of TargetPIDsUpdater. -func (i *Instrumenter) TargetPIDs() []app.PID { - pids, ok := i.pidSelector.GetPIDs() - if !ok || len(pids) == 0 { - return nil - } - out := make([]app.PID, len(pids)) - copy(out, pids) - return out -} - -// RemoveTargetPIDs removes PIDs from the set of target PIDs. The selector (GlobAttributes) notifies -// the matcher so the attacher uninstruments them immediately. Part of TargetPIDsUpdater. -func (i *Instrumenter) RemoveTargetPIDs(pids ...int) { - if len(pids) == 0 { - return - } - u32 := make([]uint32, len(pids)) - for j, p := range pids { - u32[j] = uint32(p) - } - i.pidSelector.RemovePIDs(u32...) -} diff --git a/pkg/internal/appolly/appolly_test.go b/pkg/internal/appolly/appolly_test.go index 5a8f0972a2..5beac431c3 100644 --- a/pkg/internal/appolly/appolly_test.go +++ b/pkg/internal/appolly/appolly_test.go @@ -47,54 +47,26 @@ func TestProcessEventsLoopDoesntBlock(t *testing.T) { assert.NoError(t, err) } -// targetPIDsUpdater is the same as instrumenter.TargetPIDsUpdater; used to avoid import cycle. -type targetPIDsUpdater interface { - AddTargetPIDs(pids ...int) - RemoveTargetPIDs(pids ...int) - TargetPIDs() []app.PID -} - -func TestInstrumenter_ImplementsTargetPIDsUpdater(t *testing.T) { - instr, err := New( - t.Context(), - &global.ContextInfo{Prometheus: &connector.PrometheusManager{}}, - &obi.Config{ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost"}}, - ) - require.NoError(t, err) - var _ targetPIDsUpdater = instr -} - -func TestInstrumenter_AddTargetPIDs_RemoveTargetPIDs(t *testing.T) { - instr, err := New( - t.Context(), - &global.ContextInfo{Prometheus: &connector.PrometheusManager{}}, - &obi.Config{ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost"}}, - ) - require.NoError(t, err) - - // AddTargetPIDs and RemoveTargetPIDs should not panic; instrumenter always has pidSelector - instr.AddTargetPIDs(1, 2, 3) - instr.AddTargetPIDs(2, 4) // 2 duplicate, 4 new - instr.RemoveTargetPIDs(2) - instr.RemoveTargetPIDs(99) // not present, no-op - instr.AddTargetPIDs() // no-op - instr.RemoveTargetPIDs() // no-op - - assert.Equal(t, []app.PID{1, 3, 4}, instr.TargetPIDs()) -} - -func TestInstrumenter_TargetPIDs_ReturnsCopy(t *testing.T) { - instr, err := New( +// TestInstrumenter_WithDynamicPIDSelector verifies that when the caller passes a selector via +// ContextInfo.AppO11y.DynamicPIDSelector, New uses it and the caller can add/remove PIDs on it directly. +func TestInstrumenter_WithDynamicPIDSelector(t *testing.T) { + sel := discover.NewDynamicPIDSelector() + ctxInfo := &global.ContextInfo{ + Prometheus: &connector.PrometheusManager{}, + AppO11y: global.AppO11y{DynamicPIDSelector: sel}, + } + _, err := New( t.Context(), - &global.ContextInfo{Prometheus: &connector.PrometheusManager{}}, + ctxInfo, &obi.Config{ChannelBufferLen: 1, Traces: otelcfg.TracesConfig{TracesEndpoint: "http://localhost"}}, ) require.NoError(t, err) - instr.AddTargetPIDs(7, 8) - got := instr.TargetPIDs() - require.Equal(t, []app.PID{7, 8}, got) - - got[0] = 999 - assert.Equal(t, []app.PID{7, 8}, instr.TargetPIDs()) + sel.AddPIDs(1, 2, 3) + sel.AddPIDs(2, 4) + sel.RemovePIDs(2) + sel.RemovePIDs(99) + pids, ok := sel.GetPIDs() + require.True(t, ok) + assert.Equal(t, []app.PID{1, 3, 4}, pids) } diff --git a/pkg/internal/testutil/channels.go b/pkg/internal/testutil/channels.go index d6b9196d52..719fdee19d 100644 --- a/pkg/internal/testutil/channels.go +++ b/pkg/internal/testutil/channels.go @@ -35,3 +35,13 @@ func ChannelEmpty[T any](t *testing.T, inCh <-chan T, timeout time.Duration) { // ok, channel is empty! } } + +// DrainUntilClosed reads from ch until it is closed. Use when a producer (e.g. a goroutine) closes +// its output on exit and you need to wait for it to finish before leaving the test. +func DrainUntilClosed[T any](ch <-chan T) { + for { + if _, ok := <-ch; !ok { + return + } + } +} diff --git a/pkg/pipe/global/context.go b/pkg/pipe/global/context.go index 777af6ad73..11d1223d0e 100644 --- a/pkg/pipe/global/context.go +++ b/pkg/pipe/global/context.go @@ -64,8 +64,8 @@ type ContextInfo struct { type AppO11y struct { // ReportRoutes sets whether the metrics should set the http.route attribute ReportRoutes bool - // OnTargetPIDsUpdaterReady, when set, is called with the App O11y instrumenter after it is created. - // Callers receive that instrumenter (it implements TargetPIDsUpdater by default) and may store it - // to add or remove target PIDs at runtime. - OnTargetPIDsUpdaterReady func(any) + // DynamicPIDSelector, when set, is the runtime PID set used for discovery. The caller creates it + // (e.g. discover.NewDynamicPIDSelector()), passes it via instrumenter.WithDynamicPIDSelector, and + // calls AddPIDs/RemovePIDs/GetPIDs on it directly. The instrumenter does not implement an updater interface. + DynamicPIDSelector any } From 6c84c6e7b57425a7750d83b2c01fcd270b544244 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 13 Mar 2026 14:53:21 -0400 Subject: [PATCH 4/7] Notify watcher to forget already seen pids on dynamic add --- pkg/appolly/discover/dynamic_pid_selector.go | 28 ++++++++++- .../discover/dynamic_pid_selector_test.go | 21 ++++++++ pkg/appolly/discover/finder.go | 7 ++- pkg/appolly/discover/watcher_proc.go | 50 ++++++++++++++++--- pkg/appolly/discover/watcher_proc_test.go | 47 +++++++++++++++++ 5 files changed, 143 insertions(+), 10 deletions(-) diff --git a/pkg/appolly/discover/dynamic_pid_selector.go b/pkg/appolly/discover/dynamic_pid_selector.go index c71c719f48..696ce703ea 100644 --- a/pkg/appolly/discover/dynamic_pid_selector.go +++ b/pkg/appolly/discover/dynamic_pid_selector.go @@ -20,12 +20,14 @@ type DynamicPIDSelector struct { mu sync.RWMutex pids []uint32 removedCh chan []app.PID // owned by selector; RemovedNotify() returns receive-only view + addedCh chan []app.PID // owned by selector; AddedPIDsNotify() returns receive-only view } // NewDynamicPIDSelector creates a new dynamic PID selector (initially empty). func NewDynamicPIDSelector() *DynamicPIDSelector { return &DynamicPIDSelector{ removedCh: make(chan []app.PID, 1), + addedCh: make(chan []app.PID, 1), } } @@ -35,6 +37,13 @@ func (d *DynamicPIDSelector) RemovedNotify() <-chan []app.PID { return d.removedCh } +// AddedPIDsNotify returns the channel on which newly added PIDs are sent when AddPIDs is called. +// The process watcher uses this to forget those PIDs from its tracked state so they are re-emitted +// as new on the next poll (supporting adding an already-seen process to the dynamic set). +func (d *DynamicPIDSelector) AddedPIDsNotify() <-chan []app.PID { + return d.addedCh +} + // GetPIDs returns a copy of the current PID list and true when non-empty. func (d *DynamicPIDSelector) GetPIDs() ([]app.PID, bool) { d.mu.RLock() @@ -49,23 +58,27 @@ func (d *DynamicPIDSelector) GetPIDs() ([]app.PID, bool) { return out, true } -// AddPIDs adds PIDs to the set (deduplicated). +// AddPIDs adds PIDs to the set (deduplicated). Newly added PIDs are sent on AddedPIDsNotify() +// so the process watcher can forget them and re-emit them as new on the next poll. func (d *DynamicPIDSelector) AddPIDs(pids ...uint32) { if len(pids) == 0 { return } d.mu.Lock() - defer d.mu.Unlock() existing := make(map[uint32]struct{}, len(d.pids)) for _, p := range d.pids { existing[p] = struct{}{} } + var added []app.PID for _, u := range pids { if _, ok := existing[u]; !ok { existing[u] = struct{}{} d.pids = append(d.pids, u) + added = append(added, app.PID(u)) } } + d.mu.Unlock() + d.notifyAdded(added) } // RemovePIDs removes PIDs from the set and sends them on RemovedNotify() for the matcher. @@ -103,6 +116,17 @@ func (d *DynamicPIDSelector) notifyRemoved(removedPIDs []app.PID) { } } +func (d *DynamicPIDSelector) notifyAdded(addedPIDs []app.PID) { + if len(addedPIDs) == 0 { + return + } + select { + case d.addedCh <- addedPIDs: + default: + // no receiver (e.g. watcher not running); drop so AddPIDs never blocks + } +} + // AsSelector returns a services.Selector that matches when the process PID is in this dynamic set. // The matcher uses it to treat runtime PIDs as a supplement to config criteria. func (d *DynamicPIDSelector) AsSelector() services.Selector { diff --git a/pkg/appolly/discover/dynamic_pid_selector_test.go b/pkg/appolly/discover/dynamic_pid_selector_test.go index 4e02322d46..a23494d78c 100644 --- a/pkg/appolly/discover/dynamic_pid_selector_test.go +++ b/pkg/appolly/discover/dynamic_pid_selector_test.go @@ -52,3 +52,24 @@ func TestDynamicPIDSelector_RemovePIDs_Notify(t *testing.T) { got = <-ch assert.Equal(t, []app.PID{42}, got) } + +func TestDynamicPIDSelector_AddPIDs_Notify(t *testing.T) { + d := NewDynamicPIDSelector() + ch := d.AddedPIDsNotify() + + d.AddPIDs(42, 100) + got := <-ch + assert.Equal(t, []app.PID{42, 100}, got) + + // Adding already-present PIDs does not notify + d.AddPIDs(42) + select { + case <-ch: + t.Fatal("expected no send when adding existing PID") + default: + } + // New PIDs only + d.AddPIDs(42, 99) + got = <-ch + assert.Equal(t, []app.PID{99}, got) +} diff --git a/pkg/appolly/discover/finder.go b/pkg/appolly/discover/finder.go index ad4e91d9c1..b61f3e3e88 100644 --- a/pkg/appolly/discover/finder.go +++ b/pkg/appolly/discover/finder.go @@ -7,6 +7,7 @@ import ( "context" "fmt" + "go.opentelemetry.io/obi/pkg/appolly/app" "go.opentelemetry.io/obi/pkg/appolly/app/request" "go.opentelemetry.io/obi/pkg/ebpf" ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" @@ -82,7 +83,11 @@ func (pf *ProcessFinder) Start(ctx context.Context, opts ...ProcessFinderStartOp swi := swarm.Instancer{} processEvents := msgh.QueueFromConfig[[]Event[ProcessAttrs]](pf.cfg, "processEvents") - swi.Add(swarm.DirectInstance(ProcessWatcherFunc(pf.cfg, pf.ebpfEventContext, processEvents, configCriteria)), + var addedPIDsCh <-chan []app.PID + if startConfig.dynamicPIDSelector != nil { + addedPIDsCh = startConfig.dynamicPIDSelector.AddedPIDsNotify() + } + swi.Add(swarm.DirectInstance(ProcessWatcherFunc(pf.cfg, pf.ebpfEventContext, processEvents, configCriteria, addedPIDsCh)), swarm.WithID("ProcessWatcher")) kubeEnrichedEvents := msgh.QueueFromConfig[[]Event[ProcessAttrs]](pf.cfg, "kubeEnrichedEvents") diff --git a/pkg/appolly/discover/watcher_proc.go b/pkg/appolly/discover/watcher_proc.go index 0987f839d4..ad4742c21e 100644 --- a/pkg/appolly/discover/watcher_proc.go +++ b/pkg/appolly/discover/watcher_proc.go @@ -68,7 +68,10 @@ func wplog() *slog.Logger { // ProcessWatcherFunc polls every PollInterval for new processes and forwards either new or deleted process PIDs // as well as PIDs from processes that setup a new connection. -func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContext, output *msg.Queue[[]Event[ProcessAttrs]], findingCriteria []services.Selector) swarm.RunFunc { +// When addedPIDsNotify is non-nil, the watcher receives PIDs that were added to the dynamic selector and +// forgets them from its tracked state so they are re-emitted as new on the next poll (supporting adding +// an already-seen process). +func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContext, output *msg.Queue[[]Event[ProcessAttrs]], findingCriteria []services.Selector, addedPIDsNotify <-chan []app.PID) swarm.RunFunc { acc := pollAccounter{ cfg: cfg, output: output, @@ -84,6 +87,7 @@ func ProcessWatcherFunc(cfg *obi.Config, ebpfContext *ebpfcommon.EBPFEventContex stateMux: sync.Mutex{}, findingCriteria: findingCriteria, ebpfContext: ebpfContext, + addedPIDsNotify: addedPIDsNotify, } if acc.interval == 0 { acc.interval = defaultPollInterval @@ -121,6 +125,8 @@ type pollAccounter struct { findingCriteria []services.Selector output *msg.Queue[[]Event[ProcessAttrs]] ebpfContext *ebpfcommon.EBPFEventContext + // when non-nil, PIDs received here are removed from pids/pidPorts so they are re-emitted as new on next poll + addedPIDsNotify <-chan []app.PID } func (pa *pollAccounter) run(ctx context.Context) { @@ -154,12 +160,42 @@ func (pa *pollAccounter) run(ctx context.Context) { pa.output.Send(events) } } - select { - case <-ctx.Done(): - log.Debug("context canceled. Exiting") - return - case <-time.After(pa.interval): - // poll event starting again + switch { + case pa.addedPIDsNotify != nil: + select { + case <-ctx.Done(): + log.Debug("context canceled. Exiting") + return + case pids := <-pa.addedPIDsNotify: + pa.forgetPIDs(pids) + log.Debug("forgot PIDs so they can be re-emitted as new", "pids", pids) + case <-time.After(pa.interval): + // poll again + } + default: + select { + case <-ctx.Done(): + log.Debug("context canceled. Exiting") + return + case <-time.After(pa.interval): + // poll again + } + } + } +} + +// forgetPIDs removes the given PIDs from the watcher's tracked state so they will be +// reported as new on the next poll (e.g. when added to the dynamic PID selector). +func (pa *pollAccounter) forgetPIDs(pids []app.PID) { + for _, pid := range pids { + delete(pa.pids, pid) + } + for pp := range pa.pidPorts { + for _, pid := range pids { + if pp.Pid == pid { + delete(pa.pidPorts, pp) + break + } } } } diff --git a/pkg/appolly/discover/watcher_proc_test.go b/pkg/appolly/discover/watcher_proc_test.go index 39c24c9d3e..d8448bf766 100644 --- a/pkg/appolly/discover/watcher_proc_test.go +++ b/pkg/appolly/discover/watcher_proc_test.go @@ -328,3 +328,50 @@ func TestMinProcessAge(t *testing.T) { assert.True(t, ok) assert.False(t, acc.processTooNew(process)) } + +// TestForgetPIDs_ReemitsExistingProcess verifies that when a PID was already seen by the watcher, +// sending it on addedPIDsNotify (forget) causes the next poll to emit EventCreated again. +// This supports the use case of adding an existing process to the dynamic PID selector. +func TestForgetPIDs_ReemitsExistingProcess(t *testing.T) { + p1 := ProcessAttrs{pid: 1, openPorts: []uint32{3030}} + p2 := ProcessAttrs{pid: 2, openPorts: []uint32{123}} + addedCh := make(chan []app.PID, 1) + acc := pollAccounter{ + interval: time.Hour, + cfg: &obi.Config{}, + pids: map[app.PID]ProcessAttrs{}, + pidPorts: map[pidPort]ProcessAttrs{}, + listProcesses: func(bool) (map[app.PID]ProcessAttrs, error) { + return map[app.PID]ProcessAttrs{p1.pid: p1, p2.pid: p2}, nil + }, + executableReady: func(app.PID) (string, bool) { + return "", true + }, + loadBPFWatcher: func(context.Context, *ebpfcommon.EBPFEventContext, *obi.Config, chan<- watcher.Event) error { + return nil + }, + loadBPFLogger: func(context.Context, *ebpfcommon.EBPFEventContext, *obi.Config) error { + return nil + }, + addedPIDsNotify: addedCh, + } + procs, err := acc.listProcesses(false) + require.NoError(t, err) + events := acc.snapshot(procs) + require.Len(t, events, 2) + assert.Equal(t, EventCreated, sort(events)[0].Type) + assert.Equal(t, EventCreated, sort(events)[1].Type) + // Second snapshot: no new events (already seen) + events2 := acc.snapshot(procs) + assert.Empty(t, events2) + // Forget p1 only + acc.forgetPIDs([]app.PID{1}) + // Next snapshot: p1 should appear as created again + events3 := acc.snapshot(procs) + require.Len(t, events3, 1) + assert.Equal(t, EventCreated, events3[0].Type) + assert.Equal(t, app.PID(1), events3[0].Obj.pid) + // p2 still not re-emitted + events4 := acc.snapshot(procs) + assert.Empty(t, events4) +} From 23dcb8c9f1be911632cc5b5399502b73e0eb6d1b Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 13 Mar 2026 16:13:32 -0400 Subject: [PATCH 5/7] Add/remove buffer queues and make dynamic criteria once --- pkg/appolly/discover/dynamic_pid_selector.go | 71 +++++++++++++++---- .../discover/dynamic_pid_selector_test.go | 22 ++++++ pkg/appolly/discover/matcher.go | 16 ++--- 3 files changed, 86 insertions(+), 23 deletions(-) diff --git a/pkg/appolly/discover/dynamic_pid_selector.go b/pkg/appolly/discover/dynamic_pid_selector.go index 696ce703ea..7863d1c089 100644 --- a/pkg/appolly/discover/dynamic_pid_selector.go +++ b/pkg/appolly/discover/dynamic_pid_selector.go @@ -16,19 +16,36 @@ import ( // config target_pids and updated at runtime via AddPIDs/RemovePIDs. Only the discover // matcher uses it for matching; the instrumenter (or appolly) holds a reference and // calls AddPIDs/RemovePIDs directly. +// +// Pending add/remove PIDs are accumulated in slices and drained by goroutines into +// RemovedNotify() and AddedPIDsNotify(), so callers never block and nothing is dropped. type DynamicPIDSelector struct { - mu sync.RWMutex - pids []uint32 - removedCh chan []app.PID // owned by selector; RemovedNotify() returns receive-only view - addedCh chan []app.PID // owned by selector; AddedPIDsNotify() returns receive-only view + mu sync.RWMutex + pids []uint32 + + removedCh chan []app.PID // consumer receives from this + removedPending []app.PID // PIDs to send on next drain + removedMu sync.Mutex + removedCond *sync.Cond + + addedCh chan []app.PID // consumer receives from this + addedPending []app.PID // PIDs to send on next drain + addedMu sync.Mutex + addedCond *sync.Cond } // NewDynamicPIDSelector creates a new dynamic PID selector (initially empty). +// It starts goroutines that drain pending add/remove PIDs to the notify channels. func NewDynamicPIDSelector() *DynamicPIDSelector { - return &DynamicPIDSelector{ + d := &DynamicPIDSelector{ removedCh: make(chan []app.PID, 1), addedCh: make(chan []app.PID, 1), } + d.removedCond = sync.NewCond(&d.removedMu) + d.addedCond = sync.NewCond(&d.addedMu) + go d.drainRemoved() + go d.drainAdded() + return d } // RemovedNotify returns the channel on which removed PIDs are sent when RemovePIDs is called. @@ -109,21 +126,47 @@ func (d *DynamicPIDSelector) notifyRemoved(removedPIDs []app.PID) { if len(removedPIDs) == 0 { return } - select { - case d.removedCh <- removedPIDs: - default: - // no receiver (e.g. matcher not running); drop so RemovePIDs never blocks - } + d.removedMu.Lock() + d.removedPending = append(d.removedPending, removedPIDs...) + d.removedCond.Signal() + d.removedMu.Unlock() } func (d *DynamicPIDSelector) notifyAdded(addedPIDs []app.PID) { if len(addedPIDs) == 0 { return } - select { - case d.addedCh <- addedPIDs: - default: - // no receiver (e.g. watcher not running); drop so AddPIDs never blocks + d.addedMu.Lock() + d.addedPending = append(d.addedPending, addedPIDs...) + d.addedCond.Signal() + d.addedMu.Unlock() +} + +// drainRemoved runs in a goroutine; it sends the current pending removed PIDs and clears the slice. +func (d *DynamicPIDSelector) drainRemoved() { + for { + d.removedMu.Lock() + for len(d.removedPending) == 0 { + d.removedCond.Wait() + } + batch := append([]app.PID(nil), d.removedPending...) + d.removedPending = d.removedPending[:0] + d.removedMu.Unlock() + d.removedCh <- batch + } +} + +// drainAdded runs in a goroutine; it sends the current pending added PIDs and clears the slice. +func (d *DynamicPIDSelector) drainAdded() { + for { + d.addedMu.Lock() + for len(d.addedPending) == 0 { + d.addedCond.Wait() + } + batch := append([]app.PID(nil), d.addedPending...) + d.addedPending = d.addedPending[:0] + d.addedMu.Unlock() + d.addedCh <- batch } } diff --git a/pkg/appolly/discover/dynamic_pid_selector_test.go b/pkg/appolly/discover/dynamic_pid_selector_test.go index a23494d78c..becb3f0aa0 100644 --- a/pkg/appolly/discover/dynamic_pid_selector_test.go +++ b/pkg/appolly/discover/dynamic_pid_selector_test.go @@ -73,3 +73,25 @@ func TestDynamicPIDSelector_AddPIDs_Notify(t *testing.T) { got = <-ch assert.Equal(t, []app.PID{99}, got) } + +// TestDynamicPIDSelector_QueueNoDrop verifies that rapid AddPIDs/RemovePIDs accumulate +// in a single pending slice and are sent together when the consumer drains (no drops). +func TestDynamicPIDSelector_QueueNoDrop(t *testing.T) { + d := NewDynamicPIDSelector() + d.AddPIDs(1, 2, 3, 4) + removedCh := d.RemovedNotify() + addedCh := d.AddedPIDsNotify() + + // Drain the initial AddPIDs(1,2,3,4) + <-addedCh + + d.RemovePIDs(1) + d.RemovePIDs(2, 3) + gotRemoved := <-removedCh + assert.ElementsMatch(t, []app.PID{1, 2, 3}, gotRemoved) + + d.AddPIDs(10, 20) + d.AddPIDs(30) + gotAdded := <-addedCh + assert.ElementsMatch(t, []app.PID{10, 20, 30}, gotAdded) +} diff --git a/pkg/appolly/discover/matcher.go b/pkg/appolly/discover/matcher.go index 723c969bf7..d063c00a0e 100644 --- a/pkg/appolly/discover/matcher.go +++ b/pkg/appolly/discover/matcher.go @@ -41,12 +41,14 @@ func criteriaMatcherProvider( ) swarm.InstanceFunc { instrumenterNamespace, _ := namespaceFetcherFunc(app.PID(osPidFunc())) var removedNotify <-chan []app.PID + criteria := configCriteria if dynamicSelector != nil { removedNotify = dynamicSelector.RemovedNotify() + criteria = append([]services.Selector{dynamicSelector.AsSelector()}, configCriteria...) } m := &Matcher{ Log: slog.With("component", "discover.CriteriaMatcher"), - Criteria: configCriteria, + Criteria: criteria, ExcludeCriteria: ExcludingCriteria(cfg), LogEnricherCriteria: LogEnricherFindingCriteria(cfg), ProcessHistory: map[app.PID]ProcessMatch{}, @@ -180,14 +182,10 @@ func (m *Matcher) alreadyMatched(pid app.PID) bool { } func (m *Matcher) matchCriteria(obj ProcessAttrs, proc *services.ProcessInfo) *ProcessMatch { - effectiveCriteria := m.Criteria - if m.DynamicPIDs != nil { - effectiveCriteria = append([]services.Selector{m.DynamicPIDs.AsSelector()}, m.Criteria...) - } - criteria := make([]services.Selector, 0, len(effectiveCriteria)) - for i := range effectiveCriteria { - if m.matchProcess(&obj, proc, effectiveCriteria[i]) && !m.isExcluded(&obj, proc) { - criteria = append(criteria, effectiveCriteria[i]) + criteria := make([]services.Selector, 0, len(m.Criteria)) + for i := range m.Criteria { + if m.matchProcess(&obj, proc, m.Criteria[i]) && !m.isExcluded(&obj, proc) { + criteria = append(criteria, m.Criteria[i]) } } From 218366baa3cff1c42544a1e9e54dd2214512ce49 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 13 Mar 2026 16:37:16 -0400 Subject: [PATCH 6/7] Remove redundant runWithNotify/swarms call --- pkg/appolly/discover/matcher.go | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/pkg/appolly/discover/matcher.go b/pkg/appolly/discover/matcher.go index d063c00a0e..39dd78ef37 100644 --- a/pkg/appolly/discover/matcher.go +++ b/pkg/appolly/discover/matcher.go @@ -20,7 +20,6 @@ import ( "go.opentelemetry.io/obi/pkg/obi" "go.opentelemetry.io/obi/pkg/pipe/msg" "go.opentelemetry.io/obi/pkg/pipe/swarm" - "go.opentelemetry.io/obi/pkg/pipe/swarm/swarms" ) var ( @@ -98,21 +97,6 @@ func (pm ProcessMatch) LogEnricherEnabled() bool { func (m *Matcher) Run(ctx context.Context) { defer m.Output.Close() m.Log.Debug("starting criteria matcher node") - if m.RemovedPIDsNotify != nil { - m.runWithNotify(ctx) - return - } - swarms.ForEachInput(ctx, m.Input, m.Log.Debug, func(i []Event[ProcessAttrs]) { - m.Log.Debug("filtering processes", "len", len(i)) - o := m.filter(i) - m.Log.Debug("processes matching selection criteria", "len", len(o)) - if len(o) > 0 { - m.Output.Send(o) - } - }) -} - -func (m *Matcher) runWithNotify(ctx context.Context) { for { select { case <-ctx.Done(): From c401fcd722db8b2646d5f26bcf766c04a286b7e9 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 13 Mar 2026 17:07:01 -0400 Subject: [PATCH 7/7] Refactor forgetPidsNotify --- pkg/appolly/discover/watcher_proc.go | 49 ++++++++++++++++++---------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/pkg/appolly/discover/watcher_proc.go b/pkg/appolly/discover/watcher_proc.go index ad4742c21e..291b8ecca8 100644 --- a/pkg/appolly/discover/watcher_proc.go +++ b/pkg/appolly/discover/watcher_proc.go @@ -127,6 +127,8 @@ type pollAccounter struct { ebpfContext *ebpfcommon.EBPFEventContext // when non-nil, PIDs received here are removed from pids/pidPorts so they are re-emitted as new on next poll addedPIDsNotify <-chan []app.PID + // pidsMu protects pids and pidPorts so the addedPIDsNotify goroutine can call forgetPIDs while snapshot runs + pidsMu sync.Mutex } func (pa *pollAccounter) run(ctx context.Context) { @@ -150,6 +152,10 @@ func (pa *pollAccounter) run(ctx context.Context) { go pa.watchForProcessEvents(ctx, log, bpfWatchEvents) + if pa.addedPIDsNotify != nil { + go pa.runAddedPIDsNotify(ctx, log) + } + for { procs, err := pa.listProcesses(pa.portFetchRequired()) if err != nil { @@ -160,26 +166,29 @@ func (pa *pollAccounter) run(ctx context.Context) { pa.output.Send(events) } } - switch { - case pa.addedPIDsNotify != nil: - select { - case <-ctx.Done(): - log.Debug("context canceled. Exiting") - return - case pids := <-pa.addedPIDsNotify: - pa.forgetPIDs(pids) - log.Debug("forgot PIDs so they can be re-emitted as new", "pids", pids) - case <-time.After(pa.interval): - // poll again - } - default: - select { - case <-ctx.Done(): - log.Debug("context canceled. Exiting") + select { + case <-ctx.Done(): + log.Debug("context canceled. Exiting") + return + case <-time.After(pa.interval): + // poll again + } + } +} + +// runAddedPIDsNotify runs in a goroutine; it receives PIDs added to the dynamic selector +// and calls forgetPIDs so they are re-emitted as new on the next poll. +func (pa *pollAccounter) runAddedPIDsNotify(ctx context.Context, log *slog.Logger) { + for { + select { + case <-ctx.Done(): + return + case pids, ok := <-pa.addedPIDsNotify: + if !ok { return - case <-time.After(pa.interval): - // poll again } + pa.forgetPIDs(pids) + log.Debug("forgot PIDs so they can be re-emitted as new", "pids", pids) } } } @@ -187,6 +196,8 @@ func (pa *pollAccounter) run(ctx context.Context) { // forgetPIDs removes the given PIDs from the watcher's tracked state so they will be // reported as new on the next poll (e.g. when added to the dynamic PID selector). func (pa *pollAccounter) forgetPIDs(pids []app.PID) { + pa.pidsMu.Lock() + defer pa.pidsMu.Unlock() for _, pid := range pids { delete(pa.pids, pid) } @@ -264,6 +275,8 @@ func (pa *pollAccounter) processTooNew(proc ProcessAttrs) bool { // snapshot compares the current processes with the status of the previous poll // and forwards a list of process creation/deletion events func (pa *pollAccounter) snapshot(fetchedProcs map[app.PID]ProcessAttrs) []Event[ProcessAttrs] { + pa.pidsMu.Lock() + defer pa.pidsMu.Unlock() log := wplog() var events []Event[ProcessAttrs] currentPidPorts := make(map[pidPort]ProcessAttrs, len(fetchedProcs))