From 23d3d2fdcc73405738a14f29823c6f219ce74aab Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 11:13:16 +0200 Subject: [PATCH 01/25] test(component): port Test_28 to upstream's unified user-overlay label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream PR #788 (Replace AP and NN cache with CP) collapsed the two legacy workload-keyed caches into a single ContainerProfileCache that reads ONE pod label `kubescape.io/user-defined-profile=` and uses as the lookup key for BOTH the user ApplicationProfile and the user NetworkNeighborhood. The fork's earlier two-label scheme (`user-defined-profile` for AP + separate `user-defined-network` for NN, with potentially different names) is no longer honored — the second label is silently ignored. Port: - tests/resources/nginx-user-defined-deployment.yaml: drop the `user-defined-network` label, point the surviving label at one shared name `curl-28-overlay`. - tests/component_test.go Test_28_UserDefinedNetworkNeighborhood: create both AP and NN under that single shared name. Assertions unchanged — the test still verifies that the user NN's egress restriction (only fusioncore.ai allowed on TCP/80) is enforced once the pod is running. Verified locally: go vet -tags=component ./tests/... clean; go test -tags=component -run='^$' ./tests/... compiles cleanly. --- tests/component_test.go | 14 ++++++++++---- tests/resources/nginx-user-defined-deployment.yaml | 7 +++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index 621839f22..1d97e422c 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2175,9 +2175,15 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { k8sClient := k8sinterface.NewKubernetesApi() storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) + // Upstream ContainerProfileCache (kubescape/node-agent#788) reads ONE + // pod label `kubescape.io/user-defined-profile=` and uses + // as the lookup key for BOTH the user AP and the user NN. + // AP and NN MUST therefore share that single name. + const overlayName = "curl-28-overlay" + ap := &v1beta1.ApplicationProfile{ ObjectMeta: metav1.ObjectMeta{ - Name: "curl-ap", + Name: overlayName, Namespace: ns.Name, }, Spec: v1beta1.ApplicationProfileSpec{ @@ -2201,7 +2207,7 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { nn := &v1beta1.NetworkNeighborhood{ ObjectMeta: metav1.ObjectMeta{ - Name: "curl-nn", + Name: overlayName, Namespace: ns.Name, Annotations: map[string]string{ helpersv1.ManagedByMetadataKey: helpersv1.ManagedByUserValue, @@ -2244,8 +2250,8 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { require.NoError(t, err, "create NN") require.Eventually(t, func() bool { - _, apErr := storageClient.ApplicationProfiles(ns.Name).Get(context.Background(), "curl-ap", v1.GetOptions{}) - _, nnErr := storageClient.NetworkNeighborhoods(ns.Name).Get(context.Background(), "curl-nn", v1.GetOptions{}) + _, apErr := storageClient.ApplicationProfiles(ns.Name).Get(context.Background(), overlayName, v1.GetOptions{}) + _, nnErr := storageClient.NetworkNeighborhoods(ns.Name).Get(context.Background(), overlayName, v1.GetOptions{}) return apErr == nil && nnErr == nil }, 30*time.Second, 1*time.Second, "AP+NN must be in storage before pod deploy") diff --git a/tests/resources/nginx-user-defined-deployment.yaml b/tests/resources/nginx-user-defined-deployment.yaml index 8e68df16b..c21c6b080 100644 --- a/tests/resources/nginx-user-defined-deployment.yaml +++ b/tests/resources/nginx-user-defined-deployment.yaml @@ -13,8 +13,11 @@ spec: metadata: labels: app: curl-28 - kubescape.io/user-defined-network: curl-nn - kubescape.io/user-defined-profile: curl-ap + # Upstream ContainerProfileCache (kubescape/node-agent#788) reads ONE + # label and uses its value as the name of BOTH the user-defined + # ApplicationProfile and the user-defined NetworkNeighborhood. The + # AP and NN below MUST share the same name as this label value. + kubescape.io/user-defined-profile: curl-28-overlay spec: containers: - name: curl From a83bb695ae125e96b8e8323d41d12030b491ca7c Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 11:23:02 +0200 Subject: [PATCH 02/25] feat(cel): re-port CompareExecArgs hookup onto upstream's CP cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #35's wildcard-aware exec arg matching needs reapplication on top of the new ContainerProfileCache (upstream #788) baseline. The original PR sat on the legacy applicationprofilecache, which has been deleted; the call site now reads cp.Spec.Execs from a ContainerProfile. Same semantic change as PR #35: '⋯' (DynamicIdentifier) — matches exactly one argument position. '*' (WildcardIdentifier) — matches zero or more consecutive args. Wiring: - pkg/rulemanager/cel/libraries/applicationprofile/exec.go: drop slices.Compare exact-equality on the cp.Spec.Execs loop; route through dynamicpathdetector.CompareExecArgs. - go.mod: bump fork storage replace to feat/exec-arg-wildcards tip (3fc287210729) which carries the matcher. - exec_test.go: re-add TestExecWithArgsWildcardInProfile (13 subtests across curl --user ⋯, sh -c *, ls -l ⋯, echo hello *, plus negative literal-anchor / under-consumed-⋯ / mid-profile-* cases). Mirrors the test set that lived on PR #35 before the upstream merge. Verified: full applicationprofile package green (`go test ./pkg/rulemanager/cel/libraries/applicationprofile/`). --- go.mod | 2 +- go.sum | 4 +- .../cel/libraries/applicationprofile/exec.go | 9 +- .../libraries/applicationprofile/exec_test.go | 121 ++++++++++++++++++ 4 files changed, 127 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index c74fe99f0..63a0c19be 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260429052903-0e0366026f05 +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260429084127-3fc287210729 diff --git a/go.sum b/go.sum index bbfd3daaf..86f43cedb 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260429052903-0e0366026f05 h1:RCEcduxCntYAuo8BleZu84Kk//X0gvsGrutQtdcLMn0= -github.com/k8sstormcenter/storage v0.0.240-0.20260429052903-0e0366026f05/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260429084127-3fc287210729 h1:KzutjOD6Ohl61rlq6SBbQ2q8YbWpxMpaV4X09owobi4= +github.com/k8sstormcenter/storage v0.0.240-0.20260429084127-3fc287210729/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= diff --git a/pkg/rulemanager/cel/libraries/applicationprofile/exec.go b/pkg/rulemanager/cel/libraries/applicationprofile/exec.go index 25b92f236..e02e1524c 100644 --- a/pkg/rulemanager/cel/libraries/applicationprofile/exec.go +++ b/pkg/rulemanager/cel/libraries/applicationprofile/exec.go @@ -1,8 +1,6 @@ package applicationprofile import ( - "slices" - "github.com/google/cel-go/common/types" "github.com/google/cel-go/common/types/ref" @@ -11,6 +9,7 @@ import ( "github.com/kubescape/node-agent/pkg/rulemanager/cel/libraries/cache" "github.com/kubescape/node-agent/pkg/rulemanager/cel/libraries/celparse" "github.com/kubescape/node-agent/pkg/rulemanager/profilehelper" + "github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector" ) func (l *apLibrary) wasExecuted(containerID, path ref.Val) ref.Val { @@ -85,10 +84,8 @@ func (l *apLibrary) wasExecutedWithArgs(containerID, path, args ref.Val) ref.Val } for _, exec := range cp.Spec.Execs { - if exec.Path == pathStr { - if slices.Compare(exec.Args, celArgs) == 0 { - return types.Bool(true) - } + if exec.Path == pathStr && dynamicpathdetector.CompareExecArgs(exec.Args, celArgs) { + return types.Bool(true) } } diff --git a/pkg/rulemanager/cel/libraries/applicationprofile/exec_test.go b/pkg/rulemanager/cel/libraries/applicationprofile/exec_test.go index 8821e7bdf..8b2d0d0ff 100644 --- a/pkg/rulemanager/cel/libraries/applicationprofile/exec_test.go +++ b/pkg/rulemanager/cel/libraries/applicationprofile/exec_test.go @@ -299,6 +299,127 @@ func TestExecWithArgsNoProfile(t *testing.T) { assert.False(t, actualResult, "ap.was_executed_with_args should return false when no profile is available") } +// TestExecWithArgsWildcardInProfile exercises wildcard tokens inside a +// user-defined ApplicationProfile's exec arg vector: +// +// "⋯" (DynamicIdentifier) — matches exactly one argument position. +// "*" (WildcardIdentifier) — matches zero or more consecutive args. +// +// The runtime exec arg vector is matched against the profile via +// dynamicpathdetector.CompareExecArgs (added in +// k8sstormcenter/storage#23 — the matcher that this CEL function now +// routes through instead of slices.Compare). +func TestExecWithArgsWildcardInProfile(t *testing.T) { + objCache := objectcachev1.RuleObjectCacheMock{ + ContainerIDToSharedData: maps.NewSafeMap[string, *objectcache.WatchedContainerData](), + } + + objCache.SetSharedContainerData("test-container-id", &objectcache.WatchedContainerData{ + ContainerType: objectcache.Container, + ContainerInfos: map[objectcache.ContainerType][]objectcache.ContainerInfo{ + objectcache.Container: { + { + Name: "test-container", + }, + }, + }, + }) + + profile := &v1beta1.ApplicationProfile{} + profile.Spec.Containers = append(profile.Spec.Containers, v1beta1.ApplicationProfileContainer{ + Name: "test-container", + Execs: []v1beta1.ExecCalls{ + // curl any URL: --user must be literal, value is one position. + { + Path: "/usr/bin/curl", + Args: []string{"--user", "⋯"}, + }, + // sh -c with any trailing payload (zero or more args). + { + Path: "/bin/sh", + Args: []string{"-c", "*"}, + }, + // ls -l in any directory — single trailing position. + { + Path: "/bin/ls", + Args: []string{"-l", "⋯"}, + }, + // echo with any number of greeting words after a literal anchor. + { + Path: "/bin/echo", + Args: []string{"hello", "*"}, + }, + }, + }) + objCache.SetApplicationProfile(profile) + + env, err := cel.NewEnv( + cel.Variable("containerID", cel.StringType), + cel.Variable("path", cel.StringType), + cel.Variable("args", cel.ListType(cel.StringType)), + AP(&objCache, config.Config{}), + ) + if err != nil { + t.Fatalf("failed to create env: %v", err) + } + + testCases := []struct { + name string + path string + args []string + expectedResult bool + }{ + // curl with --user, dynamic value + {"curl --user alice — ⋯ matches one arg", "/usr/bin/curl", []string{"--user", "alice"}, true}, + {"curl --user alice bob — extra arg, ⋯ rejects", "/usr/bin/curl", []string{"--user", "alice", "bob"}, false}, + {"curl --user — missing value, ⋯ requires one arg", "/usr/bin/curl", []string{"--user"}, false}, + {"curl --pass alice — literal mismatch", "/usr/bin/curl", []string{"--pass", "alice"}, false}, + + // sh -c with arbitrary trailing payload + {"sh -c with single command", "/bin/sh", []string{"-c", "echo hi"}, true}, + {"sh -c with multi-token command", "/bin/sh", []string{"-c", "while", "true;", "do", "sleep", "1;", "done"}, true}, + {"sh -c with no trailing args (* matches zero)", "/bin/sh", []string{"-c"}, true}, + {"sh -x — wrong flag", "/bin/sh", []string{"-x", "echo hi"}, false}, + + // ls -l in any directory + {"ls -l /var/log", "/bin/ls", []string{"-l", "/var/log"}, true}, + {"ls -l with no directory (⋯ requires one)", "/bin/ls", []string{"-l"}, false}, + + // echo hello * + {"echo hello world from test", "/bin/echo", []string{"hello", "world", "from", "test"}, true}, + {"echo hello (no trailing args)", "/bin/echo", []string{"hello"}, true}, + {"echo goodbye world — wrong literal anchor", "/bin/echo", []string{"goodbye", "world"}, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ast, issues := env.Compile(`ap.was_executed_with_args(containerID, path, args)`) + if issues != nil { + t.Fatalf("failed to compile expression: %v", issues.Err()) + } + + program, err := env.Program(ast) + if err != nil { + t.Fatalf("failed to create program: %v", err) + } + + result, _, err := program.Eval(map[string]interface{}{ + "containerID": "test-container-id", + "path": tc.path, + "args": tc.args, + }) + if err != nil { + t.Fatalf("failed to eval program: %v", err) + } + + actualResult := result.Value().(bool) + assert.Equal(t, tc.expectedResult, actualResult, + "runtime args %v vs profile (one of curl/sh/ls/echo overlay): got %v want %v", + tc.args, actualResult, tc.expectedResult) + }) + } +} + func TestExecWithArgsCompilation(t *testing.T) { objCache := objectcachev1.RuleObjectCacheMock{} From eb0145d280c502461fb28e52795c84ab49b923d9 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 11:27:58 +0200 Subject: [PATCH 03/25] feat(rules): R0040 'Unexpected process arguments' + Test_32 e2e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R0040 is an additive companion to R0001. It evaluates: ap.was_executed(...) && !ap.was_executed_with_args(..., event.args) so it ONLY fires when the exec'd path IS in the user-defined profile (R0001 stays silent) but the runtime arg vector does not match any profile entry's pattern. With wildcard tokens supported by dynamicpathdetector.CompareExecArgs: '⋯' (DynamicIdentifier) — exactly one argument position. '*' (WildcardIdentifier) — zero or more consecutive args. Use case: profile entry {Path: /bin/sh, Args: [sh, -c, *]} flags 'sh -x ...' as drift while permitting 'sh -c '. Wiring: - tests/chart/templates/node-agent/default-rules.yaml: new R0040 CEL rule definition immediately after R0001, same MITRE tagging (TA0002/T1059) and same applicationprofile-anomaly tag set. - tests/chart/templates/node-agent/default-rule-binding.yaml: R0040 added to the all-rules-all-pods binding next to R0001. - tests/resources/curl-exec-arg-wildcards-deployment.yaml: new fixture, curl pod labelled with the unified kubescape.io/user-defined-profile=curl-32-overlay label. - tests/component_test.go: Test_32_UnexpectedProcessArguments with 4 subtests: 32a sh_dash_c_matches_wildcard_trailing — sh -c matches profile [sh, -c, *] — R0040 silent. 32b sh_dash_x_mismatches_R0040 — sh -x mismatches the literal -c anchor — R0040 fires. 32c echo_hello_matches_wildcard_trailing — echo hello world matches [echo, hello, *] — R0040 silent. 32d echo_goodbye_mismatches_R0040 — echo goodbye world mismatches the literal hello anchor — R0040 fires. Verified locally: go vet -tags=component ./tests/... clean; go test -tags=component -run='^$' ./tests/... compiles cleanly. End-to-end alert assertions run in CI. --- .../node-agent/default-rule-binding.yaml | 1 + .../templates/node-agent/default-rules.yaml | 40 ++++ tests/component_test.go | 183 ++++++++++++++++++ .../curl-exec-arg-wildcards-deployment.yaml | 28 +++ 4 files changed, 252 insertions(+) create mode 100644 tests/resources/curl-exec-arg-wildcards-deployment.yaml diff --git a/tests/chart/templates/node-agent/default-rule-binding.yaml b/tests/chart/templates/node-agent/default-rule-binding.yaml index 26367de97..710deb6e3 100644 --- a/tests/chart/templates/node-agent/default-rule-binding.yaml +++ b/tests/chart/templates/node-agent/default-rule-binding.yaml @@ -15,6 +15,7 @@ spec: - "kubeconfig" rules: - ruleName: "Unexpected process launched" + - ruleName: "Unexpected process arguments" - ruleName: "Files Access Anomalies in container" - ruleName: "Syscalls Anomalies in container" - ruleName: "Linux Capabilities Anomalies in container" diff --git a/tests/chart/templates/node-agent/default-rules.yaml b/tests/chart/templates/node-agent/default-rules.yaml index 4b245d321..e1972f146 100644 --- a/tests/chart/templates/node-agent/default-rules.yaml +++ b/tests/chart/templates/node-agent/default-rules.yaml @@ -31,6 +31,46 @@ spec: - "exec" - "applicationprofile" - "context:kubernetes" + # --------------------------------------------------------------- + # R0040 — Unexpected process arguments + # + # Additive companion to R0001. Fires only when: + # 1. The exec'd path IS in the user-defined ApplicationProfile + # (so R0001 stays silent), AND + # 2. The runtime arg vector does NOT match any profile entry's + # arg pattern via dynamicpathdetector.CompareExecArgs. + # + # Profile arg vectors may carry wildcard tokens: + # "⋯" — exactly one position; "*" — zero or more trailing args. + # Anything else is literal-equality. + # + # Use case: a profile entry like {Path: "/bin/sh", Args: ["-c", "*"]} + # allows `sh -c ` but flags `sh -x ` as drift. + # --------------------------------------------------------------- + - name: "Unexpected process arguments" + enabled: true + id: "R0040" + description: "Process path is allowed by profile but argument vector does not match any profile entry's arg pattern (literal or wildcard ⋯/*)" + expressions: + message: "'Unexpected process arguments: ' + event.comm + ' with PID ' + string(event.pid)" + uniqueId: "event.comm + '_' + event.exepath" + ruleExpression: + - eventType: "exec" + expression: > + ap.was_executed(event.containerId, parse.get_exec_path(event.args, event.comm)) && + !ap.was_executed_with_args(event.containerId, parse.get_exec_path(event.args, event.comm), event.args) + profileDependency: 0 + severity: 1 + supportPolicy: false + isTriggerAlert: true + mitreTactic: "TA0002" + mitreTechnique: "T1059" + tags: + - "anomaly" + - "process" + - "exec" + - "applicationprofile" + - "context:kubernetes" - name: "Files Access Anomalies in container" enabled: true id: "R0002" diff --git a/tests/component_test.go b/tests/component_test.go index 1d97e422c..87e37d5d3 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3157,3 +3157,186 @@ func Test_31_TamperDetectionAlert(t *testing.T) { "R1016 must fire — proves tamper detection alerting works") t.Log("Tamper detection alerting verified successfully") } + +// --------------------------------------------------------------------------- +// Test_32_UnexpectedProcessArguments — component test for the wildcard-aware +// exec-argument matching (R0040). Each subtest gets its own namespace so +// alerts don't cross-contaminate. +// +// AP overlay declares 4 allowed exec patterns for the curl pod: +// +// /bin/sleep [sleep, *] — pod startup, must stay silent +// /bin/sh [sh, -c, *] — sh -c +// /bin/echo [echo, hello, *] — echo hello +// /usr/bin/curl [curl, -s, ⋯] — curl -s +// +// Profile loaded into the new ContainerProfileCache via the unified +// kubescape.io/user-defined-profile= label. The exec.go CEL function +// routes ap.was_executed_with_args through dynamicpathdetector.CompareExecArgs. +// +// R0040 ("Unexpected process arguments") fires when: +// - the exec'd path IS in the profile (R0001 silent), AND +// - the runtime arg vector does NOT match any profile entry's pattern. +// +// Each subtest exec's a single command, then asserts presence/absence of +// R0040 only. R0001 / R0005 / R0011 may also fire on unrelated paths or +// network egress; those are not what this test is gating. +// --------------------------------------------------------------------------- +func Test_32_UnexpectedProcessArguments(t *testing.T) { + start := time.Now() + defer tearDownTest(t, start) + + const overlayName = "curl-32-overlay" + + setup := func(t *testing.T) *testutils.TestWorkload { + t.Helper() + ns := testutils.NewRandomNamespace() + k8sClient := k8sinterface.NewKubernetesApi() + storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) + + ap := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: overlayName, + Namespace: ns.Name, + }, + Spec: v1beta1.ApplicationProfileSpec{ + Containers: []v1beta1.ApplicationProfileContainer{ + { + Name: "curl", + Execs: []v1beta1.ExecCalls{ + // pod startup: sleep + {Path: "/bin/sleep", Args: []string{"sleep", dynamicpathdetector.WildcardIdentifier}}, + // sh -c + {Path: "/bin/sh", Args: []string{"sh", "-c", dynamicpathdetector.WildcardIdentifier}}, + // echo hello + {Path: "/bin/echo", Args: []string{"echo", "hello", dynamicpathdetector.WildcardIdentifier}}, + // curl -s + {Path: "/usr/bin/curl", Args: []string{"curl", "-s", dynamicpathdetector.DynamicIdentifier}}, + }, + Syscalls: []string{"socket", "connect", "sendto", "recvfrom", "read", "write", "close", "openat", "mmap", "mprotect", "munmap", "fcntl", "ioctl", "poll", "epoll_create1", "epoll_ctl", "epoll_wait", "bind", "listen", "accept4", "getsockopt", "setsockopt", "getsockname", "getpid", "fstat", "rt_sigaction", "rt_sigprocmask", "writev", "execve"}, + }, + }, + }, + } + _, err := storageClient.ApplicationProfiles(ns.Name).Create( + context.Background(), ap, metav1.CreateOptions{}) + require.NoError(t, err, "create AP") + + require.Eventually(t, func() bool { + _, apErr := storageClient.ApplicationProfiles(ns.Name).Get( + context.Background(), overlayName, v1.GetOptions{}) + return apErr == nil + }, 30*time.Second, 1*time.Second, "AP must be in storage before pod deploy") + + wl, err := testutils.NewTestWorkload(ns.Name, + path.Join(utils.CurrentDir(), "resources/curl-exec-arg-wildcards-deployment.yaml")) + require.NoError(t, err) + require.NoError(t, wl.WaitForReady(80)) + // let node-agent load the user AP into the CP cache + time.Sleep(15 * time.Second) + return wl + } + + countByRule := func(alerts []testutils.Alert, ruleID string) int { + n := 0 + for _, a := range alerts { + if a.Labels["rule_id"] == ruleID { + n++ + } + } + return n + } + + waitAlerts := func(t *testing.T, ns string) []testutils.Alert { + t.Helper() + var alerts []testutils.Alert + var err error + require.Eventually(t, func() bool { + alerts, err = testutils.GetAlerts(ns) + return err == nil + }, 60*time.Second, 5*time.Second, "must be able to fetch alerts") + // settle time for any in-flight alerts + time.Sleep(10 * time.Second) + alerts, _ = testutils.GetAlerts(ns) + return alerts + } + + logAlerts := func(t *testing.T, alerts []testutils.Alert) { + t.Helper() + for i, a := range alerts { + t.Logf(" [%d] %s(%s) comm=%s container=%s", + i, a.Labels["rule_name"], a.Labels["rule_id"], + a.Labels["comm"], a.Labels["container_name"]) + } + } + + // ----------------------------------------------------------------- + // 32a. sh -c — argv [sh, -c, "echo hi"] matches + // profile [sh, -c, *]. R0040 must NOT fire. + // ----------------------------------------------------------------- + t.Run("sh_dash_c_matches_wildcard_trailing", func(t *testing.T) { + wl := setup(t) + stdout, stderr, err := wl.ExecIntoPod([]string{"sh", "-c", "echo hi"}, "curl") + t.Logf("sh -c 'echo hi' → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + assert.Equal(t, 0, countByRule(alerts, "R0040"), + "sh -c matches profile [sh, -c, *] — R0040 must stay silent") + }) + + // ----------------------------------------------------------------- + // 32b. sh -x — argv [sh, -x, "echo hi"] does NOT match + // profile [sh, -c, *] (literal anchor `-c` mismatch). Path + // /bin/sh IS in profile so R0001 stays silent. R0040 must fire. + // ----------------------------------------------------------------- + t.Run("sh_dash_x_mismatches_R0040", func(t *testing.T) { + wl := setup(t) + stdout, stderr, err := wl.ExecIntoPod([]string{"sh", "-x", "echo hi"}, "curl") + t.Logf("sh -x 'echo hi' → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + require.Greater(t, countByRule(alerts, "R0040"), 0, + "sh -x mismatches profile [sh, -c, *] → R0040 must fire") + }) + + // ----------------------------------------------------------------- + // 32c. echo hello — argv [echo, hello, world, from, test] + // matches profile [echo, hello, *]. R0040 must NOT fire. + // ----------------------------------------------------------------- + t.Run("echo_hello_matches_wildcard_trailing", func(t *testing.T) { + wl := setup(t) + stdout, stderr, err := wl.ExecIntoPod([]string{"echo", "hello", "world", "from", "test"}, "curl") + t.Logf("echo hello world from test → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + assert.Equal(t, 0, countByRule(alerts, "R0040"), + "echo hello matches profile [echo, hello, *] — R0040 must stay silent") + }) + + // ----------------------------------------------------------------- + // 32d. echo goodbye — argv [echo, goodbye, world] does + // NOT match profile [echo, hello, *] (literal anchor `hello` + // mismatch). R0040 must fire. + // ----------------------------------------------------------------- + t.Run("echo_goodbye_mismatches_R0040", func(t *testing.T) { + wl := setup(t) + stdout, stderr, err := wl.ExecIntoPod([]string{"echo", "goodbye", "world"}, "curl") + t.Logf("echo goodbye world → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + require.Greater(t, countByRule(alerts, "R0040"), 0, + "echo goodbye mismatches profile [echo, hello, *] (literal anchor) → R0040 must fire") + }) +} diff --git a/tests/resources/curl-exec-arg-wildcards-deployment.yaml b/tests/resources/curl-exec-arg-wildcards-deployment.yaml new file mode 100644 index 000000000..2f06f8bae --- /dev/null +++ b/tests/resources/curl-exec-arg-wildcards-deployment.yaml @@ -0,0 +1,28 @@ +## Curl pod for Test_32_UnexpectedProcessArguments. +## +## Carries the unified user-defined-profile label used by upstream's +## ContainerProfileCache (kubescape/node-agent#788). The label value +## must match the name of BOTH the user ApplicationProfile and (when +## present) the user NetworkNeighborhood. The test creates only the AP +## with that name; the NN side is intentionally absent. +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: curl-32 + name: curl-32 +spec: + selector: + matchLabels: + app: curl-32 + replicas: 1 + template: + metadata: + labels: + app: curl-32 + kubescape.io/user-defined-profile: curl-32-overlay + spec: + containers: + - name: curl + image: docker.io/curlimages/curl@sha256:08e466006f0860e54fc299378de998935333e0e130a15f6f98482e9f8dab3058 + command: ["sleep", "infinity"] From 08163932755b0b29c395e6b663a5a202811c0fba Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 11:47:23 +0200 Subject: [PATCH 04/25] deps(storage): bump to rebased feat/exec-arg-wildcards tip (0de34ebc) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After rebasing storage feat/exec-arg-wildcards onto storage main, the matcher branch now sits on top of the latest fork main commit (352395a3 — Internal-field merge fix). Bump the node-agent storage replace to that new pseudo-version so this branch's tests run against storage main + matcher in one consistent baseline. Verified locally: 47/47 non-eBPF unit packages green; vet clean; the applicationprofile CEL package's TestExecWithArgsWildcardInProfile is 13/13 green; component-tests compile under the component tag. The two failing packages (pkg/containerwatcher/v2/tracers and pkg/validator) fail with the same pre-existing /sys/fs/bpf mount-permission error they have on every recent run — env, not code. --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 63a0c19be..bbe27ac05 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260429084127-3fc287210729 +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260430094046-0de34ebc3741 diff --git a/go.sum b/go.sum index 86f43cedb..4417bd604 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260429084127-3fc287210729 h1:KzutjOD6Ohl61rlq6SBbQ2q8YbWpxMpaV4X09owobi4= -github.com/k8sstormcenter/storage v0.0.240-0.20260429084127-3fc287210729/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260430094046-0de34ebc3741 h1:evbEXsfDrdev1zMLJRHDyyDnrBOKv94ZLktN+V+Ecjw= +github.com/k8sstormcenter/storage v0.0.240-0.20260430094046-0de34ebc3741/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= From fbf98b0327153f8a47b3be33ac0ef5d9533fd318 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 19:10:22 +0200 Subject: [PATCH 05/25] ci(component-tests): add Test_32_UnexpectedProcessArguments to matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The component-tests workflow uses a hardcoded matrix list, not a dynamic discovery from the test source. Test_32 (added in a613cf64) must be listed explicitly to be picked up — without this entry the test is silently skipped. --- .github/workflows/component-tests.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index c01b09dd8..f15603276 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -245,7 +245,8 @@ jobs: Test_28_UserDefinedNetworkNeighborhood, Test_29_SignedApplicationProfile, Test_30_TamperedSignedProfiles, - Test_31_TamperDetectionAlert + Test_31_TamperDetectionAlert, + Test_32_UnexpectedProcessArguments ] steps: - name: Checkout code From 6f2a5b44063619ad7e812af0425d98e6bca05362 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 19:21:51 +0200 Subject: [PATCH 06/25] fix(containerprofilecache): re-wire R1016 tamper alert + expand Test_31 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream PR #788 (Replace AP and NN cache with CP) deleted the legacy applicationprofilecache where the fork's emitTamperAlert (commit c2d681e0 'Feat/tamperalert' #22) lived. After the merge, R1016 alerts no longer fired for tampered user-defined profiles, breaking Test_31_TamperDetectionAlert (passed 3/3 on main, fails on the merged branch — confirmed regression introduced by PR #36). This restores the contract: every cache load of a user-supplied ApplicationProfile or NetworkNeighborhood overlay re-verifies the signature, and emits an R1016 'Signed profile tampered' alert through the rule-alert exporter when the signature is present but no longer valid. Alert shape preserved from the legacy cache so dashboards and component tests keep matching. Implementation: - new file pkg/objectcache/containerprofilecache/tamper_alert.go: verifyUserApplicationProfile / verifyUserNetworkNeighborhood / emitTamperAlert / extractWlidFromContainerID. Self-contained; keeps containerprofilecache.go diff small. - containerprofilecache.go: new tamperAlertExporter field + SetTamperAlertExporter setter + verify hooks immediately after GetApplicationProfile / GetNetworkNeighborhood succeed in the user-overlay branch of addContainer. - cmd/main.go: wire the alert exporter via SetTamperAlertExporter after the cache constructor (kept the constructor signature unchanged to avoid blast radius on tests). The setter is nil-safe: when no exporter is wired, verification still runs and is logged but no alert is emitted — matches the legacy behavior for unit-tests-with-no-exporter. Test_31 expanded from one scenario to four subtests, each in its own namespace to avoid alert cross-contamination: 31a tampered_user_defined_AP_fires_R1016 — original regression case 31b untampered_signed_AP_no_R1016 — negative: clean signature 31c unsigned_AP_no_R1016 — signing is opt-in 31d tampered_user_defined_NN_fires_R1016 — parallel NN code path Verified locally: - go build ./... clean - go test ./pkg/objectcache/containerprofilecache/... green - go test ./pkg/signature/... green - go vet -tags=component ./tests/... clean - go test -tags=component -run='^$' ./tests/... compiles --- cmd/main.go | 5 + .../containerprofilecache.go | 16 + .../containerprofilecache/tamper_alert.go | 151 +++++++++ tests/component_test.go | 309 ++++++++++++------ 4 files changed, 380 insertions(+), 101 deletions(-) create mode 100644 pkg/objectcache/containerprofilecache/tamper_alert.go diff --git a/cmd/main.go b/cmd/main.go index 6fcaaca42..b9d527b17 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -297,6 +297,11 @@ func main() { ruleBindingCache.AddNotifier(&ruleBindingNotify) cpc := containerprofilecache.NewContainerProfileCache(cfg, storageClient, k8sObjectCache, prometheusExporter) + // Wire R1016 tamper alerts: when a user-defined AP/NN overlay is + // loaded but its signature no longer verifies, the CP cache emits + // "Signed profile tampered" through this exporter. Optional — + // nil-safe inside the cache. + cpc.SetTamperAlertExporter(exporter) cpc.Start(ctx) logger.L().Info("ContainerProfileCache active; legacy AP/NN caches removed") diff --git a/pkg/objectcache/containerprofilecache/containerprofilecache.go b/pkg/objectcache/containerprofilecache/containerprofilecache.go index 8185957a2..c4788a0bb 100644 --- a/pkg/objectcache/containerprofilecache/containerprofilecache.go +++ b/pkg/objectcache/containerprofilecache/containerprofilecache.go @@ -15,6 +15,7 @@ import ( "github.com/kubescape/go-logger/helpers" helpersv1 "github.com/kubescape/k8s-interface/instanceidhandler/v1/helpers" "github.com/kubescape/node-agent/pkg/config" + "github.com/kubescape/node-agent/pkg/exporters" "github.com/kubescape/node-agent/pkg/metricsmanager" "github.com/kubescape/node-agent/pkg/objectcache" "github.com/kubescape/node-agent/pkg/objectcache/callstackcache" @@ -109,6 +110,11 @@ type ContainerProfileCacheImpl struct { k8sObjectCache objectcache.K8sObjectCache metricsManager metricsmanager.MetricsManager + // tamperAlertExporter receives R1016 "Signed profile tampered" alerts + // when a user-supplied AP/NN overlay fails signature verification. Set + // after construction via SetTamperAlertExporter; nil disables alerting. + tamperAlertExporter exporters.Exporter + reconcileEvery time.Duration rpcBudget time.Duration refreshInProgress atomic.Bool @@ -383,6 +389,12 @@ func (c *ContainerProfileCacheImpl) tryPopulateEntry( helpers.Error(userAPErr)) userAP = nil } + // Re-verify the user-supplied AP signature on every load. Emits + // R1016 if the profile is signed but tampered. Does not gate + // loading unless cfg.EnableSignatureVerification is true. + if userAP != nil && !c.verifyUserApplicationProfile(userAP, containerID) { + userAP = nil + } var userNNErr error _ = c.refreshRPC(ctx, func(rctx context.Context) error { userNN, userNNErr = c.storageClient.GetNetworkNeighborhood(rctx, ns, overlayName) @@ -396,6 +408,10 @@ func (c *ContainerProfileCacheImpl) tryPopulateEntry( helpers.Error(userNNErr)) userNN = nil } + // Same tamper-check on the NN side. + if userNN != nil && !c.verifyUserNetworkNeighborhood(userNN, containerID) { + userNN = nil + } } // Need SOMETHING to cache. If we have nothing, stay pending and retry. diff --git a/pkg/objectcache/containerprofilecache/tamper_alert.go b/pkg/objectcache/containerprofilecache/tamper_alert.go new file mode 100644 index 000000000..2d972151d --- /dev/null +++ b/pkg/objectcache/containerprofilecache/tamper_alert.go @@ -0,0 +1,151 @@ +// Tamper detection for user-supplied profile overlays loaded into the +// ContainerProfileCache. +// +// When a user references a signed ApplicationProfile or NetworkNeighborhood +// via the `kubescape.io/user-defined-profile` pod label, this code path +// re-verifies the signature on every cache load and emits an R1016 +// "Signed profile tampered" alert via the rule-alert exporter when the +// signature is present but no longer valid. +// +// This is the new home of the legacy applicationprofilecache's tamper +// detection (originally introduced in fork commit c2d681e0 — "Feat/ +// tamperalert"). Upstream PR #788 deleted the legacy cache; this re-wires +// the same behavior onto containerprofilecache without changing the alert +// shape so existing component tests (Test_31_TamperDetectionAlert) keep +// working. +package containerprofilecache + +import ( + "fmt" + "strings" + + "github.com/armosec/armoapi-go/armotypes" + "github.com/kubescape/go-logger" + "github.com/kubescape/go-logger/helpers" + "github.com/kubescape/node-agent/pkg/exporters" + "github.com/kubescape/node-agent/pkg/rulemanager/types" + "github.com/kubescape/node-agent/pkg/signature" + "github.com/kubescape/node-agent/pkg/signature/profiles" + "github.com/kubescape/storage/pkg/apis/softwarecomposition/v1beta1" +) + +// SetTamperAlertExporter wires the rule-alert exporter used to emit R1016. +// Optional — when nil, signature verification still runs (and is logged) +// but no alert is emitted. Production wiring lives in cmd/main.go after the +// alert exporter is constructed. +func (c *ContainerProfileCacheImpl) SetTamperAlertExporter(e exporters.Exporter) { + c.tamperAlertExporter = e +} + +// verifyUserApplicationProfile re-verifies the signature of a user-supplied +// ApplicationProfile overlay and emits R1016 if the signature is present +// but no longer valid (i.e. the profile was tampered after signing). +// +// Returns true iff the profile is acceptable for further use: +// - profile is signed and verifies → true +// - profile is not signed → true (signing is opt-in; the empty-signature +// case is handled by the caller's normal not-signed flow) +// - profile is signed but verification fails → false (and R1016 emitted) +// +// The boolean lets the caller decide whether to project the overlay into +// the cache. Today we always proceed (the legacy semantics don't actually +// gate loading on verification unless EnableSignatureVerification is true), +// but having the return value keeps the door open for stricter modes. +func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta1.ApplicationProfile, containerID string) bool { + if profile == nil { + return true + } + adapter := profiles.NewApplicationProfileAdapter(profile) + if !signature.IsSigned(adapter) { + return true + } + if err := signature.VerifyObject(adapter); err != nil { + logger.L().Warning("user-defined ApplicationProfile signature verification failed (tamper detected)", + helpers.String("profile", profile.Name), + helpers.String("namespace", profile.Namespace), + helpers.String("containerID", containerID), + helpers.Error(err)) + c.emitTamperAlert(profile.Name, profile.Namespace, containerID, "ApplicationProfile", err) + return !c.cfg.EnableSignatureVerification + } + return true +} + +// verifyUserNetworkNeighborhood is the NN-side counterpart to +// verifyUserApplicationProfile. Same contract, different object kind in +// the alert description. +func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.NetworkNeighborhood, containerID string) bool { + if nn == nil { + return true + } + adapter := profiles.NewNetworkNeighborhoodAdapter(nn) + if !signature.IsSigned(adapter) { + return true + } + if err := signature.VerifyObject(adapter); err != nil { + logger.L().Warning("user-defined NetworkNeighborhood signature verification failed (tamper detected)", + helpers.String("profile", nn.Name), + helpers.String("namespace", nn.Namespace), + helpers.String("containerID", containerID), + helpers.Error(err)) + c.emitTamperAlert(nn.Name, nn.Namespace, containerID, "NetworkNeighborhood", err) + return !c.cfg.EnableSignatureVerification + } + return true +} + +// emitTamperAlert sends a single R1016 "Signed profile tampered" alert +// through the rule-alert exporter. No-op when the exporter is unset. +// +// Alert shape mirrors the legacy applicationprofilecache.emitTamperAlert +// (fork commit c2d681e0) so dashboards and component tests keep matching. +func (c *ContainerProfileCacheImpl) emitTamperAlert(profileName, namespace, containerID, objectKind string, verifyErr error) { + if c.tamperAlertExporter == nil { + return + } + + ruleFailure := &types.GenericRuleFailure{ + BaseRuntimeAlert: armotypes.BaseRuntimeAlert{ + AlertName: "Signed profile tampered", + InfectedPID: 1, + Severity: 10, + FixSuggestions: "Investigate who modified the " + objectKind + " '" + profileName + "' in namespace '" + namespace + "'. Re-sign the profile after verifying its contents.", + }, + AlertType: armotypes.AlertTypeRule, + RuntimeProcessDetails: armotypes.ProcessTree{ + ProcessTree: armotypes.Process{ + PID: 1, + Comm: "node-agent", + }, + }, + RuleAlert: armotypes.RuleAlert{ + RuleDescription: fmt.Sprintf("Signed %s '%s' in namespace '%s' has been tampered with: %v", + objectKind, profileName, namespace, verifyErr), + }, + RuntimeAlertK8sDetails: armotypes.RuntimeAlertK8sDetails{ + Namespace: namespace, + }, + RuleID: "R1016", + } + + // Best-effort workload identifier. The legacy cache used a wlid string; + // this cache is keyed on containerID, so we just stash that as the + // workload reference. Downstream consumers (Alertmanager, exporter + // pipelines) don't structurally depend on the wlid prefix. + ruleFailure.SetWorkloadDetails(extractWlidFromContainerID(containerID)) + + c.tamperAlertExporter.SendRuleAlert(ruleFailure) +} + +// extractWlidFromContainerID is a placeholder that returns the containerID +// as-is. The legacy cache had a richer "wlid:///// +// /" string available; the new cache is keyed on +// containerID so callers that consume wlid get an opaque identifier here. +// Retained as a separate function so the alert path can be upgraded to a +// proper wlid lookup later without touching emitTamperAlert. +func extractWlidFromContainerID(containerID string) string { + if idx := strings.LastIndex(containerID, "/"); idx > 0 { + return containerID[:idx] + } + return containerID +} diff --git a/tests/component_test.go b/tests/component_test.go index 87e37d5d3..0eca1a9c7 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3029,133 +3029,240 @@ func Test_30_TamperedSignedProfiles(t *testing.T) { }) } -// Test_31_TamperDetectionAlert verifies that R1016 fires when a previously -// signed ApplicationProfile has been tampered with (signature is stale). +// Test_31_TamperDetectionAlert verifies that R1016 "Signed profile tampered" +// fires when a previously signed ApplicationProfile or NetworkNeighborhood +// has been tampered with (signature annotations stale relative to the +// resource bytes). // -// This test proves the new tamper-detection alerting: -// - Sign an AP, push to storage -// - Tamper the AP in storage (modify spec, keep stale signature annotations) -// - Deploy a pod referencing the tampered profile -// - R1016 "Signed profile tampered" must fire +// Coverage: +// 31a — tampered AP fires R1016 (the original scenario; regression-pinned +// after upstream PR #788's cache rewrite re-wired alert emission). +// 31b — untampered signed AP does NOT fire R1016 (negative; signature +// verifies cleanly so no alert). +// 31c — unsigned AP does NOT fire R1016 (signing is opt-in; not-signed +// is not the same as tampered). +// 31d — tampered NN fires R1016 via the parallel NN code path (different +// storage call, same emission contract). // -// R1016 fires regardless of enableSignatureVerification setting. -// The detection happens in the AP cache when it loads the profile. +// All four subtests share signSignedAP / signSignedNN helpers; each subtest +// uses its own namespace + its own AP/NN name to avoid alert cross-talk +// between scenarios. +// +// R1016 fires regardless of cfg.EnableSignatureVerification: the alert is +// always emitted on tamper; the flag only gates whether the cache also +// rejects the load. func Test_31_TamperDetectionAlert(t *testing.T) { start := time.Now() defer tearDownTest(t, start) - ns := testutils.NewRandomNamespace() k8sClient := k8sinterface.NewKubernetesApi() storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) - // ── 1. Build and sign an ApplicationProfile ── - ap := &v1beta1.ApplicationProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: "signed-ap", - Namespace: ns.Name, - }, - Spec: v1beta1.ApplicationProfileSpec{ - Containers: []v1beta1.ApplicationProfileContainer{ - { - Name: "curl", - Execs: []v1beta1.ExecCalls{ - {Path: "/bin/sleep"}, - {Path: "/usr/bin/curl"}, + // signSignedAP returns a signed ApplicationProfile in nsName under name. + signSignedAP := func(t *testing.T, nsName, name string) *v1beta1.ApplicationProfile { + t.Helper() + ap := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: nsName}, + Spec: v1beta1.ApplicationProfileSpec{ + Containers: []v1beta1.ApplicationProfileContainer{ + { + Name: "curl", + Execs: []v1beta1.ExecCalls{ + {Path: "/bin/sleep"}, + {Path: "/usr/bin/curl"}, + }, + Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, }, - Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, }, }, - }, + } + require.NoError(t, signature.SignObjectDisableKeyless(profiles.NewApplicationProfileAdapter(ap)), "sign AP") + return ap } - apAdapter := profiles.NewApplicationProfileAdapter(ap) - require.NoError(t, signature.SignObjectDisableKeyless(apAdapter), "sign AP") - require.True(t, signature.IsSigned(apAdapter), "AP must be signed") - require.NoError(t, signature.VerifyObjectAllowUntrusted(apAdapter), - "signature must verify immediately after signing") - t.Log("AP signed successfully") - - // ── 2. Tamper the AP (add unauthorized exec path) ── - ap.Spec.Containers[0].Execs = append(ap.Spec.Containers[0].Execs, - v1beta1.ExecCalls{Path: "/usr/bin/nslookup"}) - - // Verify the signature is now invalid - tamperedAdapter := profiles.NewApplicationProfileAdapter(ap) - require.Error(t, signature.VerifyObjectAllowUntrusted(tamperedAdapter), - "tampered AP must fail verification") - require.True(t, signature.IsSigned(tamperedAdapter), - "tampered AP must still have signature annotations (stale)") - t.Log("AP tampered — signature is stale") - - // ── 3. Push tampered AP to storage ── - _, err := storageClient.ApplicationProfiles(ns.Name).Create( - context.Background(), ap, metav1.CreateOptions{}) - require.NoError(t, err, "push tampered AP to storage") + signSignedNN := func(t *testing.T, nsName, name string) *v1beta1.NetworkNeighborhood { + t.Helper() + nn := &v1beta1.NetworkNeighborhood{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: nsName}, + Spec: v1beta1.NetworkNeighborhoodSpec{ + LabelSelector: metav1.LabelSelector{MatchLabels: map[string]string{"app": "curl-signed"}}, + Containers: []v1beta1.NetworkNeighborhoodContainer{ + {Name: "curl"}, + }, + }, + } + require.NoError(t, signature.SignObjectDisableKeyless(profiles.NewNetworkNeighborhoodAdapter(nn)), "sign NN") + return nn + } - // Verify it's stored with stale signature - require.Eventually(t, func() bool { - stored, getErr := storageClient.ApplicationProfiles(ns.Name).Get( - context.Background(), "signed-ap", v1.GetOptions{}) - if getErr != nil { - return false + // deployAndWait pushes the AP (and optionally NN) into storage, then + // deploys curl-signed-deployment.yaml and waits for it to come up. The + // deployment YAML uses kubescape.io/user-defined-profile=signed-ap as + // its label, so AP+NN names must equal "signed-ap" for the upstream + // CP cache to pick them up. + deployAndWait := func(t *testing.T, ns testutils.TestNamespace, ap *v1beta1.ApplicationProfile, nn *v1beta1.NetworkNeighborhood) *testutils.TestWorkload { + t.Helper() + if ap != nil { + _, err := storageClient.ApplicationProfiles(ns.Name).Create( + context.Background(), ap, metav1.CreateOptions{}) + require.NoError(t, err, "push AP to storage") + } + if nn != nil { + _, err := storageClient.NetworkNeighborhoods(ns.Name).Create( + context.Background(), nn, metav1.CreateOptions{}) + require.NoError(t, err, "push NN to storage") } - storedAdapter := profiles.NewApplicationProfileAdapter(stored) - return signature.IsSigned(storedAdapter) && - signature.VerifyObjectAllowUntrusted(storedAdapter) != nil - }, 30*time.Second, 1*time.Second, "stored AP must have stale signature") - t.Log("Tampered AP stored with stale signature") + require.Eventually(t, func() bool { + if ap != nil { + if _, err := storageClient.ApplicationProfiles(ns.Name).Get( + context.Background(), ap.Name, v1.GetOptions{}); err != nil { + return false + } + } + if nn != nil { + if _, err := storageClient.NetworkNeighborhoods(ns.Name).Get( + context.Background(), nn.Name, v1.GetOptions{}); err != nil { + return false + } + } + return true + }, 30*time.Second, 1*time.Second, "AP/NN must be in storage before pod deploy") - // ── 4. Deploy pod referencing the tampered profile ── - wl, err := testutils.NewTestWorkload(ns.Name, - path.Join(utils.CurrentDir(), "resources/curl-signed-deployment.yaml")) - require.NoError(t, err) - require.NoError(t, wl.WaitForReady(80)) - t.Log("Pod deployed, waiting for cache to detect tamper...") + wl, err := testutils.NewTestWorkload(ns.Name, + path.Join(utils.CurrentDir(), "resources/curl-signed-deployment.yaml")) + require.NoError(t, err) + require.NoError(t, wl.WaitForReady(80)) + return wl + } - // ── 5. Wait for R1016 "Signed profile tampered" alert ── - // The AP cache's addContainer or periodicUpdate will detect the tampered - // signature and emit R1016 via the exporter. - var alerts []testutils.Alert - require.Eventually(t, func() bool { - alerts, err = testutils.GetAlerts(ns.Name) - if err != nil || len(alerts) == 0 { - return false + countR1016 := func(t *testing.T, nsName string, settle time.Duration) int { + t.Helper() + // Allow node-agent to load the profile and for any alert to flush. + time.Sleep(settle) + alerts, err := testutils.GetAlerts(nsName) + if err != nil { + t.Logf("GetAlerts error: %v", err) + return 0 } + n := 0 for _, a := range alerts { if a.Labels["rule_id"] == "R1016" { - return true + n++ + assert.Equal(t, "Signed profile tampered", a.Labels["rule_name"], + "R1016 alert must have correct rule name") + assert.Equal(t, nsName, a.Labels["namespace"], + "R1016 alert must have correct namespace") } } - return false - }, 120*time.Second, 5*time.Second, "R1016 must fire for tampered signed AP") + t.Logf("[%s] R1016 count = %d (out of %d alerts)", nsName, n, len(alerts)) + return n + } - // ── 6. Log all alerts for debugging ── - time.Sleep(5 * time.Second) - alerts, _ = testutils.GetAlerts(ns.Name) + // ----------------------------------------------------------------- + // 31a — tampered AP fires R1016 + // ----------------------------------------------------------------- + t.Run("tampered_user_defined_AP_fires_R1016", func(t *testing.T) { + ns := testutils.NewRandomNamespace() + ap := signSignedAP(t, ns.Name, "signed-ap") + // Tamper after signing: append an unauthorized exec entry. The + // signature annotations stay (stale). + ap.Spec.Containers[0].Execs = append(ap.Spec.Containers[0].Execs, + v1beta1.ExecCalls{Path: "/usr/bin/nslookup"}) + require.Error(t, + signature.VerifyObjectAllowUntrusted(profiles.NewApplicationProfileAdapter(ap)), + "tampered AP must fail verification") - t.Logf("=== %d alerts ===", len(alerts)) - for i, a := range alerts { - t.Logf(" [%d] %s(%s) comm=%s container=%s", - i, a.Labels["rule_name"], a.Labels["rule_id"], - a.Labels["comm"], a.Labels["container_name"]) - } + _ = deployAndWait(t, ns, ap, nil) + + require.Eventually(t, func() bool { + alerts, _ := testutils.GetAlerts(ns.Name) + for _, a := range alerts { + if a.Labels["rule_id"] == "R1016" { + return true + } + } + return false + }, 120*time.Second, 5*time.Second, "tampered AP must produce R1016") + + require.Greater(t, countR1016(t, ns.Name, 5*time.Second), 0) + }) + + // ----------------------------------------------------------------- + // 31b — untampered signed AP must NOT fire R1016 + // ----------------------------------------------------------------- + t.Run("untampered_signed_AP_no_R1016", func(t *testing.T) { + ns := testutils.NewRandomNamespace() + ap := signSignedAP(t, ns.Name, "signed-ap") + // Don't tamper. Signature verifies cleanly. + require.NoError(t, + signature.VerifyObjectAllowUntrusted(profiles.NewApplicationProfileAdapter(ap)), + "untampered signed AP must verify") + + _ = deployAndWait(t, ns, ap, nil) + // Wait for cache load to happen (cache picks it up within ~15s). + assert.Equal(t, 0, countR1016(t, ns.Name, 30*time.Second), + "untampered signed AP must NOT fire R1016") + }) + + // ----------------------------------------------------------------- + // 31c — unsigned AP must NOT fire R1016 (signing is opt-in) + // ----------------------------------------------------------------- + t.Run("unsigned_AP_no_R1016", func(t *testing.T) { + ns := testutils.NewRandomNamespace() + ap := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{Name: "signed-ap", Namespace: ns.Name}, + Spec: v1beta1.ApplicationProfileSpec{ + Containers: []v1beta1.ApplicationProfileContainer{ + { + Name: "curl", + Execs: []v1beta1.ExecCalls{ + {Path: "/bin/sleep"}, + }, + Syscalls: []string{"socket"}, + }, + }, + }, + } + require.False(t, + signature.IsSigned(profiles.NewApplicationProfileAdapter(ap)), + "unsigned AP must not have signature annotations") + + _ = deployAndWait(t, ns, ap, nil) + assert.Equal(t, 0, countR1016(t, ns.Name, 30*time.Second), + "unsigned AP must NOT fire R1016 — not-signed is not the same as tampered") + }) + + // ----------------------------------------------------------------- + // 31d — tampered NN fires R1016 via the NN code path + // ----------------------------------------------------------------- + t.Run("tampered_user_defined_NN_fires_R1016", func(t *testing.T) { + ns := testutils.NewRandomNamespace() + // Untampered AP (matched on name to the pod label) so the AP path + // stays silent and we know any R1016 came from the NN path. + ap := signSignedAP(t, ns.Name, "signed-ap") + nn := signSignedNN(t, ns.Name, "signed-ap") + // Tamper the NN: add a container the original signature didn't cover. + nn.Spec.Containers = append(nn.Spec.Containers, + v1beta1.NetworkNeighborhoodContainer{Name: "drift"}) + require.Error(t, + signature.VerifyObjectAllowUntrusted(profiles.NewNetworkNeighborhoodAdapter(nn)), + "tampered NN must fail verification") + + _ = deployAndWait(t, ns, ap, nn) + + require.Eventually(t, func() bool { + alerts, _ := testutils.GetAlerts(ns.Name) + for _, a := range alerts { + if a.Labels["rule_id"] == "R1016" { + return true + } + } + return false + }, 120*time.Second, 5*time.Second, "tampered NN must produce R1016") + + require.Greater(t, countR1016(t, ns.Name, 5*time.Second), 0) + }) - // Verify R1016 alert details - r1016Count := 0 - for _, a := range alerts { - if a.Labels["rule_id"] == "R1016" { - r1016Count++ - assert.Equal(t, "Signed profile tampered", a.Labels["rule_name"], - "R1016 alert must have correct rule name") - assert.Equal(t, ns.Name, a.Labels["namespace"], - "R1016 alert must have correct namespace") - t.Logf("R1016 alert: rule_name=%s namespace=%s severity=%s", - a.Labels["rule_name"], a.Labels["namespace"], a.Labels["severity"]) - } - } - require.Greater(t, r1016Count, 0, - "R1016 must fire — proves tamper detection alerting works") - t.Log("Tamper detection alerting verified successfully") } // --------------------------------------------------------------------------- From fe80d73b6bd0402f3eea8e63f7bbc386b5101416 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 19:30:37 +0200 Subject: [PATCH 07/25] test(component): Test_32 profile uses full-path argv[0] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirical finding from CI run 25178930763 — Test_32's positive subtests (32a sh_dash_c_matches, 32c echo_hello_matches) fired R0040 when they should not. Cause: at runtime, the eBPF tracer captures argv[0] as the FULL exec path (e.g. "/bin/sh") rather than the basename ("sh"). My profile entries used basenames, so the matcher's first-position literal compare missed and the cache fell through to 'no exec entry matches' — R0040 fires. Aligns Test_32's profile with the convention already used by Test_27's wildcard_yaml_profile_allowed_opens fixture (known-application-profile-wildcards.yaml predecessor): argv[0] is the full path, subsequent positions are flags/values. Subtest expectations after this fix: 32a sh -c → matches [/bin/sh, -c, *] → R0040 silent 32b sh -x → -c anchor mismatch → R0040 fires 32c echo hello <…> → matches [/bin/echo, hello, *]→ R0040 silent 32d echo goodbye <…> → hello anchor mismatch → R0040 fires --- tests/component_test.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index 0eca1a9c7..565db5e66 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3311,14 +3311,20 @@ func Test_32_UnexpectedProcessArguments(t *testing.T) { { Name: "curl", Execs: []v1beta1.ExecCalls{ + // IMPORTANT: argv[0] in the eBPF-captured event is + // the FULL exec path (see Test_27's wildcard YAML + // fixture for the same convention). Profile arg + // vectors must include argv[0] as full path so the + // matcher's first-position literal compare hits. + // // pod startup: sleep - {Path: "/bin/sleep", Args: []string{"sleep", dynamicpathdetector.WildcardIdentifier}}, + {Path: "/bin/sleep", Args: []string{"/bin/sleep", dynamicpathdetector.WildcardIdentifier}}, // sh -c - {Path: "/bin/sh", Args: []string{"sh", "-c", dynamicpathdetector.WildcardIdentifier}}, + {Path: "/bin/sh", Args: []string{"/bin/sh", "-c", dynamicpathdetector.WildcardIdentifier}}, // echo hello - {Path: "/bin/echo", Args: []string{"echo", "hello", dynamicpathdetector.WildcardIdentifier}}, + {Path: "/bin/echo", Args: []string{"/bin/echo", "hello", dynamicpathdetector.WildcardIdentifier}}, // curl -s - {Path: "/usr/bin/curl", Args: []string{"curl", "-s", dynamicpathdetector.DynamicIdentifier}}, + {Path: "/usr/bin/curl", Args: []string{"/usr/bin/curl", "-s", dynamicpathdetector.DynamicIdentifier}}, }, Syscalls: []string{"socket", "connect", "sendto", "recvfrom", "read", "write", "close", "openat", "mmap", "mprotect", "munmap", "fcntl", "ioctl", "poll", "epoll_create1", "epoll_ctl", "epoll_wait", "bind", "listen", "accept4", "getsockopt", "setsockopt", "getsockname", "getpid", "fstat", "rt_sigaction", "rt_sigprocmask", "writev", "execve"}, }, From f828777aba04945abaa096ee06c08ed6932f3b99 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 19:37:05 +0200 Subject: [PATCH 08/25] test: AP-fixture linter (R-AP-* rules) + canonical reference profile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Catches the class of bug Test_32 hit on its first CI run (PR #37 run 25178930763): profile entries used basename argv[0] ("sh") while the eBPF tracer captures the full path ("/bin/sh"), so the matcher silently misses and the rule fires when it shouldn't. Without a linter, this kind of fixture drift only surfaces in a 15-minute component-test run on a kind cluster — too late, too expensive. The linter (LintApplicationProfile / LintApplicationProfileYAML in tests/resources/aplint_test.go) is intentionally written as a pure function returning []Violation. Zero testing-package coupling on the hot path so it can be lifted into a future bobctl subcommand `bobctl lint ` without rewrite — see backlog at ~/biz/sbob-business-plan/state.yaml. Rules: R-AP-01 — kind must be ApplicationProfile R-AP-02 — at least one container R-AP-03 — container.name non-empty R-AP-10 — exec.path absolute (catches relative paths) R-AP-11 — exec.path no wildcards (binary identity is exact) R-AP-12 — exec.args[0] equals exec.path or wildcard token (Test_32-style argv[0] basename trap) R-AP-13 — exec.args wildcard tokens are whole-word (no embedding) R-AP-20 — open.path non-empty + absolute R-AP-21 — open.flags non-empty (real auto-recorded opens always have ≥1) R-AP-22 — open.flags from known O_* set (catches typos) Each rule has a dedicated self-test that constructs a minimal-bad YAML and asserts the rule fires (5 negative tests). One positive test (TestLinter_canonical_AP_passes) parses the fork's reference known-application-profile.yaml — extracted from a real auto-recorded AP for curlimages/curl:8.5.0 in fea3b062 — and asserts zero violations. The reference YAML is restored to tests/resources/ so the canonical shape is in-tree and visible to humans + CI. Why a Go test rather than a shell linter: keeps the rule set in the same language as the storage matcher (`dynamicpathdetector`), so extending CompareExecArgs and the linter together stays cheap. Local-cluster organic learning was the original plan but k3s on OrbStack is currently flapping (LXC-related boot loop). The fea3b062 profile was extracted from real auto-learning at an earlier moment of stability, which is the next-best ground truth. --- tests/resources/aplint_test.go | 344 ++++++++++++++++++ .../resources/known-application-profile.yaml | 245 +++++++++++++ 2 files changed, 589 insertions(+) create mode 100644 tests/resources/aplint_test.go create mode 100644 tests/resources/known-application-profile.yaml diff --git a/tests/resources/aplint_test.go b/tests/resources/aplint_test.go new file mode 100644 index 000000000..3e1887592 --- /dev/null +++ b/tests/resources/aplint_test.go @@ -0,0 +1,344 @@ +// AP-fixture lint tests. +// +// Validates every ApplicationProfile / NetworkNeighborhood YAML under +// tests/resources/ against the ground-truth syntax rules learned from a +// real auto-recorded AP for curlimages/curl:8.5.0 (originally captured +// by the fork in commit fea3b062 — known-application-profile.yaml). Each +// rule maps a real-world drift mode that has bitten the fork once already +// (e.g. argv[0] basename vs full path — Test_32 first run on PR #37). +// +// Runs as a regular `go test ./...` — no component tag, no kind cluster. +// +// LintApplicationProfile is exported (uppercase) and returns []Violation +// rather than calling t.Errorf directly, so this whole file can be lifted +// into a standalone bobctl subcommand `bobctl lint ` without any +// testing-package dependency. The Test_* functions below are just thin +// wrappers that turn violations into t.Errorf calls. +package resources + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "sigs.k8s.io/yaml" +) + +// applicationProfileLike captures only the fields we lint; we don't import +// the storage v1beta1 types because we want this lint runnable in isolation. +type applicationProfileLike struct { + APIVersion string `json:"apiVersion"` + Kind string `json:"kind"` + Metadata struct { + Name string `json:"name"` + } `json:"metadata"` + Spec struct { + Containers []struct { + Name string `json:"name"` + Execs []struct { + Path string `json:"path"` + Args []string `json:"args"` + } `json:"execs"` + Opens []struct { + Path string `json:"path"` + Flags []string `json:"flags"` + } `json:"opens"` + } `json:"containers"` + } `json:"spec"` +} + +// Violation is a single rule failure — id, target file (if any), and a +// human-readable message. Returned by LintApplicationProfile so callers +// can treat lint output as data (CLI exit code, JSON, t.Errorf, etc). +type Violation struct { + Rule string + Path string + Msg string +} + +func (v Violation) String() string { + if v.Path != "" { + return fmt.Sprintf("[%s] %s: %s", v.Rule, v.Path, v.Msg) + } + return fmt.Sprintf("[%s] %s", v.Rule, v.Msg) +} + +// validOpenFlags is the set of O_* flags the fork has seen in real +// auto-recorded profiles. Extend as new flags appear; a typo'd flag +// (e.g. `O_LARGEFLE`) is caught immediately. +var validOpenFlags = map[string]bool{ + "O_RDONLY": true, + "O_WRONLY": true, + "O_RDWR": true, + "O_CLOEXEC": true, + "O_LARGEFILE": true, + "O_DIRECTORY": true, + "O_NONBLOCK": true, + "O_APPEND": true, + "O_CREAT": true, + "O_EXCL": true, + "O_TRUNC": true, + "O_NOFOLLOW": true, + "O_NOATIME": true, + "O_DIRECT": true, + "O_SYNC": true, + "O_PATH": true, + "O_TMPFILE": true, +} + +// dynamicIdentifier and wildcardIdentifier mirror the constants in +// storage/pkg/registry/file/dynamicpathdetector. Duplicated here so this +// linter has zero dependency on the storage module. +const ( + dynamicIdentifier = "⋯" + wildcardIdentifier = "*" +) + +// LintApplicationProfileYAML parses a YAML doc as an ApplicationProfile and +// runs all rules. Returns the slice of violations (empty == clean). Pure +// function — no I/O, no testing-package coupling. +func LintApplicationProfileYAML(doc []byte, sourceLabel string) []Violation { + var ap applicationProfileLike + if err := yaml.Unmarshal(doc, &ap); err != nil { + return []Violation{{Rule: "R-AP-00", Path: sourceLabel, Msg: fmt.Sprintf("yaml parse: %v", err)}} + } + return LintApplicationProfile(&ap, sourceLabel) +} + +// LintApplicationProfile runs every rule against an already-parsed AP. +// Returns the slice of violations (empty == clean). +// +// Rule IDs: +// R-AP-00 — yaml parse failure (only from LintApplicationProfileYAML) +// R-AP-01 — kind must be ApplicationProfile +// R-AP-02 — at least one container +// R-AP-03 — container name non-empty +// R-AP-10 — exec.path absolute +// R-AP-11 — exec.path no wildcards +// R-AP-12 — exec.args[0] equals exec.path (or wildcard) +// R-AP-13 — exec.args wildcard tokens are whole-word +// R-AP-20 — open.path non-empty + absolute +// R-AP-21 — open.flags non-empty +// R-AP-22 — open.flags from known O_* set +func LintApplicationProfile(ap *applicationProfileLike, src string) []Violation { + var v []Violation + add := func(rule, msg string) { v = append(v, Violation{Rule: rule, Path: src, Msg: msg}) } + + if ap.Kind != "ApplicationProfile" { + add("R-AP-01", fmt.Sprintf("kind is %q, expected \"ApplicationProfile\"", ap.Kind)) + } + if len(ap.Spec.Containers) == 0 { + add("R-AP-02", "spec.containers is empty") + return v + } + + for ci, c := range ap.Spec.Containers { + if c.Name == "" { + add("R-AP-03", fmt.Sprintf("spec.containers[%d].name is empty", ci)) + } + + for ei, e := range c.Execs { + if e.Path == "" { + add("R-AP-10", fmt.Sprintf("containers[%d].execs[%d].path is empty", ci, ei)) + continue + } + if !strings.HasPrefix(e.Path, "/") { + add("R-AP-10", fmt.Sprintf("containers[%d].execs[%d].path %q must be absolute (start with /)", ci, ei, e.Path)) + } + if strings.Contains(e.Path, dynamicIdentifier) || strings.Contains(e.Path, wildcardIdentifier) { + add("R-AP-11", fmt.Sprintf("containers[%d].execs[%d].path %q must NOT contain wildcards (only args[*] may)", ci, ei, e.Path)) + } + + if len(e.Args) == 0 { + continue // path-only entry is legal + } + + // R-AP-12: args[0] must equal the full exec.path. The eBPF + // tracer captures argv[0] as the full binary path; profile + // entries that use a basename (e.g. "sh" instead of "/bin/sh") + // silently fail to match at runtime. Caught the hard way on + // Test_32's first CI run (PR #37 run 25178930763). Exception: + // args[0] may be the wildcard token if the user genuinely + // means "any binary at this path". + if e.Args[0] != e.Path && e.Args[0] != wildcardIdentifier { + add("R-AP-12", fmt.Sprintf("containers[%d].execs[%d].args[0] = %q, must equal path %q (eBPF captures argv[0] as full path)", ci, ei, e.Args[0], e.Path)) + } + + for ai, a := range e.Args { + if a == "" { + add("R-AP-13", fmt.Sprintf("containers[%d].execs[%d].args[%d] is empty", ci, ei, ai)) + } + if strings.Contains(a, dynamicIdentifier) && a != dynamicIdentifier { + add("R-AP-13", fmt.Sprintf("containers[%d].execs[%d].args[%d] = %q — ⋯ must be its own token, not embedded", ci, ei, ai, a)) + } + } + } + + for oi, o := range c.Opens { + if o.Path == "" { + add("R-AP-20", fmt.Sprintf("containers[%d].opens[%d].path is empty", ci, oi)) + continue + } + if !strings.HasPrefix(o.Path, "/") { + add("R-AP-20", fmt.Sprintf("containers[%d].opens[%d].path %q must be absolute", ci, oi, o.Path)) + } + if len(o.Flags) == 0 { + add("R-AP-21", fmt.Sprintf("containers[%d].opens[%d].flags is empty", ci, oi)) + } + for fi, f := range o.Flags { + if !validOpenFlags[f] { + add("R-AP-22", fmt.Sprintf("containers[%d].opens[%d].flags[%d] = %q — not a recognised O_* flag (typo?)", ci, oi, fi, f)) + } + } + } + } + return v +} + +// --------------------------------------------------------------------------- +// Test layer — walk YAMLs in this directory, run the linter, surface +// violations as t.Errorf. +// --------------------------------------------------------------------------- + +func TestApplicationProfileFixturesLint(t *testing.T) { + matches, err := filepath.Glob("*.yaml") + if err != nil { + t.Fatalf("glob: %v", err) + } + if len(matches) == 0 { + t.Skip("no YAML fixtures found — running outside tests/resources?") + } + + for _, p := range matches { + p := p + t.Run(filepath.Base(p), func(t *testing.T) { + data, err := os.ReadFile(p) + if err != nil { + t.Fatalf("read %s: %v", p, err) + } + if !strings.Contains(string(data), "kind: ApplicationProfile") { + t.Skipf("not an ApplicationProfile fixture") + } + for _, v := range LintApplicationProfileYAML(data, p) { + t.Errorf("%s", v) + } + }) + } +} + +// --------------------------------------------------------------------------- +// Self-tests — feed deliberately-bad YAML, verify the expected rule fires. +// Pin rule semantics so a refactor can't silently drop a check. +// --------------------------------------------------------------------------- + +func ruleFired(violations []Violation, ruleID string) bool { + for _, v := range violations { + if v.Rule == ruleID { + return true + } + } + return false +} + +func TestLinter_R_AP_12_argv0_must_be_full_path(t *testing.T) { + bad := []byte(` +apiVersion: spdx.softwarecomposition.kubescape.io/v1beta1 +kind: ApplicationProfile +metadata: { name: bad } +spec: + containers: + - name: c + execs: + - path: /bin/sh + args: ["sh", "-c", "echo hi"] +`) + if !ruleFired(LintApplicationProfileYAML(bad, ""), "R-AP-12") { + t.Fatal("expected R-AP-12 violation for basename argv[0]") + } +} + +func TestLinter_R_AP_11_path_no_wildcards(t *testing.T) { + bad := []byte(` +apiVersion: spdx.softwarecomposition.kubescape.io/v1beta1 +kind: ApplicationProfile +metadata: { name: bad } +spec: + containers: + - name: c + execs: + - path: /usr/bin/* + args: ["/usr/bin/curl"] +`) + if !ruleFired(LintApplicationProfileYAML(bad, ""), "R-AP-11") { + t.Fatal("expected R-AP-11 violation for wildcard in path") + } +} + +func TestLinter_R_AP_22_unknown_open_flag(t *testing.T) { + bad := []byte(` +apiVersion: spdx.softwarecomposition.kubescape.io/v1beta1 +kind: ApplicationProfile +metadata: { name: bad } +spec: + containers: + - name: c + opens: + - path: /etc/passwd + flags: ["O_RDONLY", "O_LARGEFLE"] +`) + if !ruleFired(LintApplicationProfileYAML(bad, ""), "R-AP-22") { + t.Fatal("expected R-AP-22 violation for typo'd flag") + } +} + +func TestLinter_R_AP_10_path_must_be_absolute(t *testing.T) { + bad := []byte(` +apiVersion: spdx.softwarecomposition.kubescape.io/v1beta1 +kind: ApplicationProfile +metadata: { name: bad } +spec: + containers: + - name: c + execs: + - path: bin/sh + args: ["bin/sh"] +`) + if !ruleFired(LintApplicationProfileYAML(bad, ""), "R-AP-10") { + t.Fatal("expected R-AP-10 violation for relative path") + } +} + +func TestLinter_R_AP_12_wildcard_argv0_allowed(t *testing.T) { + // args[0] = "*" is the rare-but-legal "match any binary at this path" case. + ok := []byte(` +apiVersion: spdx.softwarecomposition.kubescape.io/v1beta1 +kind: ApplicationProfile +metadata: { name: ok } +spec: + containers: + - name: c + execs: + - path: /bin/sh + args: ["*"] +`) + if ruleFired(LintApplicationProfileYAML(ok, ""), "R-AP-12") { + t.Fatal("R-AP-12 must NOT fire when args[0] is the wildcard token") + } +} + +func TestLinter_canonical_AP_passes(t *testing.T) { + // The fork's reference profile (from fea3b062) is the gold standard; + // regressions here mean the linter has drifted from real-world syntax. + data, err := os.ReadFile("known-application-profile.yaml") + if err != nil { + t.Skipf("canonical AP fixture not present: %v", err) + } + violations := LintApplicationProfileYAML(data, "known-application-profile.yaml") + if len(violations) > 0 { + for _, v := range violations { + t.Errorf("%s", v) + } + } +} diff --git a/tests/resources/known-application-profile.yaml b/tests/resources/known-application-profile.yaml new file mode 100644 index 000000000..b80294157 --- /dev/null +++ b/tests/resources/known-application-profile.yaml @@ -0,0 +1,245 @@ +## +## User-defined ApplicationProfile for Test_28. +## +## Referenced directly from a pod via the label: +## kubescape.io/user-defined-profile: fusioncore-profile +## +## Modeled after a real auto-learned AP from curlimages/curl:8.5.0. +## +## Usage: +## sed "s/{{NAMESPACE}}/$NS/g" known-application-profile.yaml \ +## | kubectl apply -f - +## +apiVersion: spdx.softwarecomposition.kubescape.io/v1beta1 +kind: ApplicationProfile +metadata: + name: fusioncore-profile + namespace: "{{NAMESPACE}}" +spec: + architectures: ["amd64"] + containers: + - name: curl + imageID: "docker.io/curlimages/curl@sha256:08e466006f0860e54fc299378de998935333e0e130a15f6f98482e9f8dab3058" + imageTag: "docker.io/curlimages/curl:8.5.0" + capabilities: + - CAP_CHOWN + - CAP_DAC_OVERRIDE + - CAP_DAC_READ_SEARCH + - CAP_SETGID + - CAP_SETPCAP + - CAP_SETUID + - CAP_SYS_ADMIN + execs: + - path: /bin/sleep + args: ["/bin/sleep", "infinity"] + - path: /bin/cat + args: ["/bin/cat"] + - path: /usr/bin/curl + args: ["/usr/bin/curl", "-sm2", "fusioncore.ai"] + - path: /usr/bin/nslookup + args: ["/usr/bin/nslookup"] + opens: + - path: /7/setgroups + flags: ["O_RDONLY", "O_CLOEXEC"] + - path: /etc/hosts + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /etc/ld-musl-x86_64.path + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /etc/passwd + flags: ["O_RDONLY", "O_CLOEXEC", "O_LARGEFILE"] + - path: /etc/resolv.conf + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /etc/ssl/openssl.cnf + flags: ["O_RDONLY", "O_LARGEFILE"] + - path: /home/curl_user/.config/curlrc + flags: ["O_RDONLY", "O_LARGEFILE"] + - path: /home/curl_user/.curlrc + flags: ["O_RDONLY", "O_LARGEFILE"] + - path: /lib/libbrotlicommon.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libbrotlidec.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libcom_err.so.2.1 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /lib/libcrypto.so.3 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /lib/libcurl.so.4 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libgssapi_krb5.so.2 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libidn2.so.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libk5crypto.so.3 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /lib/libkeyutils.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libkrb5.so.3 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libkrb5support.so.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libnghttp2.so.14 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /lib/libpsl.so.5 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libssh2.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libssl.so.3 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /lib/libunistring.so.5 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /lib/libz.so.1.3 + flags: ["O_LARGEFILE", "O_CLOEXEC", "O_RDONLY"] + - path: /proc/⋯/cgroup + flags: ["O_RDONLY", "O_CLOEXEC"] + - path: /proc/⋯/kernel/cap_last_cap + flags: ["O_RDONLY", "O_CLOEXEC"] + - path: /proc/⋯/mountinfo + flags: ["O_RDONLY", "O_CLOEXEC"] + - path: /proc/⋯/task/1/fd + flags: ["O_RDONLY", "O_DIRECTORY", "O_CLOEXEC"] + - path: /proc/⋯/task/7/fd + flags: ["O_RDONLY", "O_DIRECTORY", "O_CLOEXEC"] + - path: /runc + flags: ["O_RDONLY", "O_CLOEXEC"] + - path: /sys/fs/cgroup/cpu.max + flags: ["O_RDONLY", "O_CLOEXEC"] + - path: /sys/kernel/mm/transparent_hugepage/hpage_pmd_size + flags: ["O_RDONLY"] + - path: /usr/lib/libbrotlicommon.so.1.1.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libbrotlidec.so.1.1.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libcurl.so.4.8.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libgssapi_krb5.so.2.2 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libidn2.so.0.3.8 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /usr/lib/libk5crypto.so.3.1 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /usr/lib/libkeyutils.so.1.10 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libkrb5.so.3.3 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /usr/lib/libkrb5support.so.0.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libnghttp2.so.14.25.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libpsl.so.5.3.4 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /usr/lib/libssh2.so.1.0.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/lib/libunistring.so.5.0.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libbrotlicommon.so.1 + flags: ["O_LARGEFILE", "O_CLOEXEC", "O_RDONLY"] + - path: /usr/local/lib/libbrotlidec.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libcom_err.so.2 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /usr/local/lib/libcrypto.so.3 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libcurl.so.4 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libgssapi_krb5.so.2 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libidn2.so.0 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libk5crypto.so.3 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libkeyutils.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libkrb5.so.3 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libkrb5support.so.0 + flags: ["O_CLOEXEC", "O_RDONLY", "O_LARGEFILE"] + - path: /usr/local/lib/libnghttp2.so.14 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libpsl.so.5 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libssh2.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libssl.so.3 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libunistring.so.5 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + - path: /usr/local/lib/libz.so.1 + flags: ["O_RDONLY", "O_LARGEFILE", "O_CLOEXEC"] + syscalls: + - arch_prctl + - bind + - brk + - capget + - capset + - chdir + - clone + - close + - close_range + - connect + - epoll_ctl + - epoll_pwait + - execve + - exit + - exit_group + - faccessat2 + - fchown + - fcntl + - fstat + - fstatfs + - futex + - getcwd + - getdents64 + - getegid + - geteuid + - getgid + - getpeername + - getppid + - getsockname + - getsockopt + - gettid + - getuid + - ioctl + - membarrier + - mmap + - mprotect + - munmap + - nanosleep + - newfstatat + - open + - openat + - openat2 + - pipe + - poll + - prctl + - read + - recvfrom + - recvmsg + - rt_sigaction + - rt_sigprocmask + - rt_sigreturn + - sendto + - set_tid_address + - setgid + - setgroups + - setsockopt + - setuid + - sigaltstack + - socket + - statx + - tkill + - unknown + - write + - writev + endpoints: + - endpoint: ":80/" + direction: outbound + methods: ["GET"] + internal: false + headers: '{"Host":["fusioncore.ai"]}' + seccompProfile: + spec: + defaultAction: "" + rulePolicies: {} + initContainers: [] + ephemeralContainers: [] +status: {} From 7c10baa46c6ef32018a29c4287cf5e4d1e97b916 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 20:26:13 +0200 Subject: [PATCH 09/25] fix(tamper_alert): accept self-signed profiles, only flag actual tamper Switch verifyUser{ApplicationProfile,NetworkNeighborhood} from strict VerifyObject to VerifyObjectAllowUntrusted. The strict variant requires a Sigstore Fulcio trust chain and rejects locally-signed profiles even when the signature against the embedded cert is valid. That made Test_31b 'untampered_signed_AP_no_R1016' fire R1016 against an untampered AP, and broke Test_30's 'tampered_profile_loaded_without_ enforcement' subtest the same way. The intent is: tamper detection, not trust-chain enforcement. Matches cmd/sign-object/main.go's default verifier. --- pkg/objectcache/containerprofilecache/tamper_alert.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/objectcache/containerprofilecache/tamper_alert.go b/pkg/objectcache/containerprofilecache/tamper_alert.go index 2d972151d..6caf7c764 100644 --- a/pkg/objectcache/containerprofilecache/tamper_alert.go +++ b/pkg/objectcache/containerprofilecache/tamper_alert.go @@ -59,7 +59,11 @@ func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta if !signature.IsSigned(adapter) { return true } - if err := signature.VerifyObject(adapter); err != nil { + // AllowUntrusted: accept self-signed/local-CA signatures as long as the + // signature itself verifies against the cert in the annotations. We only + // want to flag actual tampering, not the absence of a Sigstore Fulcio + // trust chain. Matches `cmd/sign-object`'s default verifier. + if err := signature.VerifyObjectAllowUntrusted(adapter); err != nil { logger.L().Warning("user-defined ApplicationProfile signature verification failed (tamper detected)", helpers.String("profile", profile.Name), helpers.String("namespace", profile.Namespace), @@ -82,7 +86,7 @@ func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.Ne if !signature.IsSigned(adapter) { return true } - if err := signature.VerifyObject(adapter); err != nil { + if err := signature.VerifyObjectAllowUntrusted(adapter); err != nil { logger.L().Warning("user-defined NetworkNeighborhood signature verification failed (tamper detected)", helpers.String("profile", nn.Name), helpers.String("namespace", nn.Namespace), From 4d374d525f56085ddfd6665335cd07c8f6381e90 Mon Sep 17 00:00:00 2001 From: Entlein Date: Thu, 30 Apr 2026 21:12:22 +0200 Subject: [PATCH 10/25] test(component): make Test_30 30b deterministic by re-execing inside Eventually The single-shot wget exec before Eventually was racy: if the eBPF event landed before the CP cache projected the user-defined AP, the rule manager evaluated against an empty baseline and R0001 never fired within the 60s polling window. Same race Test_29 already documents. Drive the wget exec inside the Eventually loop (10s tick, 120s deadline) so cache-load latency is absorbed by retries. Filter R0001 to comm=wget to make the assertion specific instead of catching any R0001. Drops the blind 15s pre-sleep and the redundant settle-then-recount block. --- tests/component_test.go | 52 +++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index 565db5e66..47fd14f95 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2972,34 +2972,35 @@ func Test_30_TamperedSignedProfiles(t *testing.T) { path.Join(utils.CurrentDir(), "resources/curl-signed-deployment.yaml")) require.NoError(t, err) require.NoError(t, wl.WaitForReady(80)) - time.Sleep(15 * time.Second) // let node-agent load profiles - - // Execute nslookup — the attacker added this to the whitelist. - // With enforcement OFF: profile is loaded despite invalid signature, - // so nslookup is "allowed" and R0001 should NOT fire for it. - wl.ExecIntoPod([]string{"nslookup", "ebpf.io"}, "curl") - - // Execute wget — NOT in the AP (even after tampering). - wl.ExecIntoPod([]string{"wget", "-qO-", "--timeout=5", "http://ebpf.io"}, "curl") - // Wait for alerts. + // Drive the unexpected exec inside Eventually so cache-load latency + // is absorbed by retries instead of a blind sleep. Same pattern as + // Test_29 (signed AP, anomalous exec) — without it, the first exec + // can land before the CP cache projects the user-defined AP, the + // rule manager evaluates against an empty baseline, and R0001 never + // fires within the polling window. + // + // wget is NOT in the AP (even after the attacker added nslookup), so + // once the cache loads, every wget exec produces an R0001 alert. var alerts []testutils.Alert require.Eventually(t, func() bool { + wl.ExecIntoPod([]string{"wget", "-qO-", "--timeout=2", "http://ebpf.io"}, "curl") alerts, err = testutils.GetAlerts(ns.Name) - if err != nil || len(alerts) == 0 { + if err != nil { return false } for _, a := range alerts { - if a.Labels["rule_id"] == "R0001" { + if a.Labels["rule_id"] == "R0001" && a.Labels["comm"] == "wget" { return true } } return false - }, 60*time.Second, 5*time.Second, "wget not in tampered AP must fire R0001") + }, 120*time.Second, 10*time.Second, + "wget not in tampered AP must fire R0001 — proves tampered profile was loaded (enforcement off)") + // Settle so any pending alerts flush, then dump for diagnostics. time.Sleep(10 * time.Second) alerts, _ = testutils.GetAlerts(ns.Name) - t.Logf("=== %d alerts ===", len(alerts)) for i, a := range alerts { t.Logf(" [%d] %s(%s) comm=%s container=%s", @@ -3007,25 +3008,14 @@ func Test_30_TamperedSignedProfiles(t *testing.T) { a.Labels["comm"], a.Labels["container_name"]) } - // R0001 must have fired (tampered profile was loaded and used). - r0001Count := 0 - for _, a := range alerts { - if a.Labels["rule_id"] == "R0001" { - r0001Count++ - } - } - require.Greater(t, r0001Count, 0, - "R0001 must fire — proves tampered profile was loaded (enableSignatureVerification=false)") - - // No dedicated tamper-detection alert exists (no R-number for this). // With enableSignatureVerification=true: - // - The tampered AP would be rejected (verifyApplicationProfile returns error) - // - ProfileState.Status would be set to "verification-failed" - // - The pod would have no baseline → no anomaly rules fire + // - The tampered AP would be rejected (verifyUserApplicationProfile returns false) + // - The pod would have no baseline → no anomaly rules fire for wget // - System fails OPEN (attacker evades detection by tampering the profile) - // - NOTE: user-defined NNs in addContainer() are NOT verified (known gap) - t.Log("NOTE: No tamper-detection alert rule exists. With enableSignatureVerification=true,") - t.Log(" the tampered profile would be silently rejected. No R-number fires for tampering.") + // - NOTE: user-defined NNs are not yet gated on the same flag (known gap) + // R1016 ("Signed profile tampered") fires regardless of the flag — that + // path is handled by Test_31. + t.Log("With enableSignatureVerification=true, the tampered profile would be silently rejected.") }) } From a59e284b13f4b4997b15b04e063a9f9d548dbff8 Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 16:30:53 +0200 Subject: [PATCH 11/25] deps(storage): bump replace to f44fed80 (analyzer trailing-* fix) Picks up the upstream-PR-#316 review fix: trailing WildcardIdentifier now requires at least one regular-path segment, matching standard glob semantics. Closes the R0002 blind spot where '/etc/*' would silently match the bare '/etc' directory. --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index bbe27ac05..fc1bd6420 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260430094046-0de34ebc3741 +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260501142939-f44fed80762e diff --git a/go.sum b/go.sum index 4417bd604..cfd3a3ac6 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260430094046-0de34ebc3741 h1:evbEXsfDrdev1zMLJRHDyyDnrBOKv94ZLktN+V+Ecjw= -github.com/k8sstormcenter/storage v0.0.240-0.20260430094046-0de34ebc3741/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260501142939-f44fed80762e h1:pGkFOoYOEwk/0GwXYq2ylhfXnlIa5i1aa2qZflANKGg= +github.com/k8sstormcenter/storage v0.0.240-0.20260501142939-f44fed80762e/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= From bcf41ea5ef0b9b068f4fbbf7ffafd11d42102821 Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 20:40:47 +0200 Subject: [PATCH 12/25] deps(storage): bump replace to 4ab95fb8 (PR #25 merged on fork main) Pulls in the full PR-#316 review fix set that just landed on storage main: proper splitPath-based trailing-* anchoring, DefaultCollapseConfigs() defensive-copy accessor, FindConfigForPath value-return, splitEndpoint defensive guard, plus the BenchmarkCompareDynamic baseline. --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index fc1bd6420..a82c675e4 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260501142939-f44fed80762e +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260501173152-4ab95fb87ba2 diff --git a/go.sum b/go.sum index cfd3a3ac6..defe0d250 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260501142939-f44fed80762e h1:pGkFOoYOEwk/0GwXYq2ylhfXnlIa5i1aa2qZflANKGg= -github.com/k8sstormcenter/storage v0.0.240-0.20260501142939-f44fed80762e/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260501173152-4ab95fb87ba2 h1:0yjl3JVdk20Ij2ppjTJ5+awY4/EIuzF9uotncB36SqI= +github.com/k8sstormcenter/storage v0.0.240-0.20260501173152-4ab95fb87ba2/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= From 9385562a6668fd4d8f5b4f4a0e30c611d943f1c2 Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 20:40:47 +0200 Subject: [PATCH 13/25] test(component): Test_33_AnalyzeOpensWildcardAnchoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit End-to-end pin of the storage-side CompareDynamic contract through R0002. Each subtest deploys a fresh nginx pod with a user-defined AP carrying ONE Opens entry, then `cat`s a target path that probes a boundary case from the storage analyzer fixes (kubescape/storage #316 review by matthyx + entlein): - Anchored trailing `*` matches one OR MORE remaining segments — never zero. So /etc/* matches /etc/passwd but NOT bare /etc. - DynamicIdentifier (⋯) consumes EXACTLY ONE segment. - Mid-path `*` is zero-or-more, so /etc/*/* matches /etc/ssh (inner * consumes zero, trailing * consumes one). - Mixed ⋯/* combinations: ⋯ pins one, * consumes the rest. - splitPath normalises trailing slashes on both sides. 11 subtests covering: trailing_star_matches_immediate_child — basic /etc/* match trailing_star_matches_deep_child — multi-segment under prefix trailing_star_does_not_match_bare_parent — the security fix deep_prefix_trailing_star_does_not_match_parent — same rule, deeper ellipsis_pin_one_segment_then_literal — ⋯ rejects zero ellipsis_then_trailing_star_matches_two_* — ⋯/* combo, 2 levels ellipsis_then_trailing_star_matches_three_* — ⋯/* combo, 3 levels double_trailing_matches_one_child — /*/* mid-zero double_trailing_matches_deep_child — /*/* mid-one double_trailing_does_not_match_parent — /*/* needs ≥1 child trailing_slash_in_profile_normalises_to_literal — splitPath on profile Pinned at component level on TOP of the unit suite in storage/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go. Both layers must agree — a drift in either lights up R0002 with a false positive or false negative. Matrix entry added to component-tests.yaml so the test runs in CI. --- .github/workflows/component-tests.yaml | 3 +- tests/component_test.go | 279 +++++++++++++++++++++++++ 2 files changed, 281 insertions(+), 1 deletion(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index f15603276..b3133464a 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -246,7 +246,8 @@ jobs: Test_29_SignedApplicationProfile, Test_30_TamperedSignedProfiles, Test_31_TamperDetectionAlert, - Test_32_UnexpectedProcessArguments + Test_32_UnexpectedProcessArguments, + Test_33_AnalyzeOpensWildcardAnchoring ] steps: - name: Checkout code diff --git a/tests/component_test.go b/tests/component_test.go index 47fd14f95..9e3a872db 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3443,3 +3443,282 @@ func Test_32_UnexpectedProcessArguments(t *testing.T) { "echo goodbye mismatches profile [echo, hello, *] (literal anchor) → R0040 must fire") }) } + +// Test_33_AnalyzeOpensWildcardAnchoring pins the wildcard-matching +// contract that storage-side CompareDynamic enforces, end-to-end through +// R0002 ("Files Access Anomalies in container"). +// +// Each subtest spins up a fresh nginx pod with a user-defined AP that +// carries ONE Opens entry, then `cat`s a target path that probes a +// boundary case from the storage-side analyzer fixes (kubescape/storage +// PR #316 review by matthyx + entlein): +// +// - Anchored trailing `*` matches one OR MORE remaining segments — +// never zero. So `/etc/*` matches `/etc/passwd` but NOT the bare +// `/etc` directory. Without this rule, R0002 silently allowed +// access to the parent of any profiled directory. +// - DynamicIdentifier (⋯) consumes EXACTLY ONE segment. +// - Mid-path `*` consumes ZERO or more, so `/etc/*/*` still matches +// `/etc/ssh` (inner `*` consumed zero, trailing `*` consumed one). +// - splitPath normalises trailing slashes on both dynamic and +// regular paths so `/etc/passwd/` is treated as `/etc/passwd`. +// - Mixed `⋯/*` patterns: ⋯ pins one segment, `*` consumes the rest +// (with one-or-more semantics). +// +// Component-level pin sits ON TOP of the unit tests in storage's +// pkg/registry/file/dynamicpathdetector/tests/coverage_test.go. +// Both layers must agree — if the unit suite drifts away from these +// runtime expectations, R0002 has either a false-positive or a +// false-negative bug. +func Test_33_AnalyzeOpensWildcardAnchoring(t *testing.T) { + start := time.Now() + defer tearDownTest(t, start) + + const ruleName = "Files Access Anomalies in container" + const profileName = "nginx-regex-profile" + + type subtestResult struct { + name string + profilePath string + filePath string + expectAlert bool + passed bool + detail string + } + var results []subtestResult + addResult := func(name, profilePath, filePath string, expectAlert, passed bool, detail string) { + results = append(results, subtestResult{name, profilePath, filePath, expectAlert, passed, detail}) + } + defer func() { + t.Log("\n========== Test_33 Summary ==========") + anyFailed := false + for _, r := range results { + status := "PASS" + if !r.passed { + status = "FAIL" + anyFailed = true + } + expect := "expect alert" + if !r.expectAlert { + expect = "expect NO alert" + } + t.Logf(" [%s] %-50s profile=%-25s file=%-30s %s", status, r.name, r.profilePath, r.filePath, expect) + if !r.passed { + t.Logf(" -> %s", r.detail) + } + } + if !anyFailed { + t.Log(" All subtests passed.") + } + t.Log("======================================") + }() + + // deployWithProfile creates a user-defined AP with a single Opens + // entry (plus a couple of always-needed paths nginx hits at startup), + // then deploys nginx with the user-defined-profile label pointing at + // it and waits for the pod + cache load. + deployWithProfile := func(t *testing.T, profilePath string) *testutils.TestWorkload { + t.Helper() + ns := testutils.NewRandomNamespace() + + profile := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: profileName, + Namespace: ns.Name, + }, + Spec: v1beta1.ApplicationProfileSpec{ + Architectures: []string{"amd64"}, + Containers: []v1beta1.ApplicationProfileContainer{ + { + Name: "nginx", + Execs: []v1beta1.ExecCalls{ + {Path: "/bin/cat", Args: []string{"/bin/cat"}}, + }, + Opens: []v1beta1.OpenCalls{ + {Path: profilePath, Flags: []string{"O_RDONLY"}}, + // Dynamic linker fires this on every exec — keep + // it whitelisted so it doesn't drown out the + // signal we actually care about. + {Path: "/etc/ld.so.cache", Flags: []string{"O_RDONLY", "O_CLOEXEC"}}, + }, + }, + }, + }, + } + + k8sClient := k8sinterface.NewKubernetesApi() + storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) + _, err := storageClient.ApplicationProfiles(ns.Name).Create( + context.Background(), profile, metav1.CreateOptions{}) + require.NoError(t, err, "create user-defined profile %q in ns %s", profileName, ns.Name) + + require.Eventually(t, func() bool { + _, apErr := storageClient.ApplicationProfiles(ns.Name).Get( + context.Background(), profileName, v1.GetOptions{}) + return apErr == nil + }, 30*time.Second, 1*time.Second, "AP must be retrievable from storage before deploying the pod") + + wl, err := testutils.NewTestWorkload(ns.Name, + path.Join(utils.CurrentDir(), "resources/nginx-user-profile-deployment.yaml")) + require.NoError(t, err, "create workload in ns %s", ns.Name) + require.NoError(t, wl.WaitForReady(80), "workload not ready in ns %s", ns.Name) + + // Wait for node-agent to load the user-defined profile into cache. + time.Sleep(10 * time.Second) + return wl + } + + // catAndAlerts execs `cat ` (ignoring cat's own exit error — + // catting a directory or a non-readable file still triggers the + // open() syscall the eBPF tracer captures), then polls for alerts. + catAndAlerts := func(t *testing.T, wl *testutils.TestWorkload, filePath string) []testutils.Alert { + t.Helper() + stdout, stderr, _ := wl.ExecIntoPod([]string{"cat", filePath}, "nginx") + t.Logf("cat %q → stdout=%q stderr=%q", filePath, stdout, stderr) + + var alerts []testutils.Alert + require.Eventually(t, func() bool { + a, err := testutils.GetAlerts(wl.Namespace) + if err != nil { + return false + } + alerts = a + return true + }, 60*time.Second, 5*time.Second, "alerts must be retrievable from ns %s", wl.Namespace) + // Settle so any late R0002 alert lands before we count. + time.Sleep(10 * time.Second) + alerts, err := testutils.GetAlerts(wl.Namespace) + require.NoError(t, err, "get alerts from ns %s", wl.Namespace) + return alerts + } + + // hasR0002 returns true if any R0002 alert fired for `cat` in the + // nginx container. + hasR0002 := func(alerts []testutils.Alert) bool { + for _, a := range alerts { + if a.Labels["rule_name"] == ruleName && + a.Labels["comm"] == "cat" && + a.Labels["container_name"] == "nginx" { + return true + } + } + return false + } + + tests := []struct { + name string + profilePath string + filePath string + expectAlert bool + why string // contract pinned by this case + }{ + // ─── Trailing-`*` anchoring (the security fix) ────────────── + { + name: "trailing_star_matches_immediate_child", + profilePath: "/etc/*", + filePath: "/etc/hosts", + expectAlert: false, + why: "/etc/* matches a one-segment child under /etc", + }, + { + name: "trailing_star_matches_deep_child", + profilePath: "/etc/*", + filePath: "/etc/ssl/openssl.cnf", + expectAlert: false, + why: "/etc/* matches a multi-segment path under /etc (mid-path zero-or-more)", + }, + { + name: "trailing_star_does_not_match_bare_parent", + profilePath: "/etc/*", + filePath: "/etc", + expectAlert: true, + why: "/etc/* must NOT match the bare /etc directory itself — closes R0002 blind spot for parent-of-profiled-dir tampering", + }, + { + name: "deep_prefix_trailing_star_does_not_match_parent", + profilePath: "/var/log/*", + filePath: "/var/log", + expectAlert: true, + why: "Same anchoring rule applies at any depth: /var/log/* does NOT match /var/log", + }, + + // ─── DynamicIdentifier (⋯) exactly-one ────────────────────── + { + name: "ellipsis_pin_one_segment_then_literal", + profilePath: "/proc/" + dynamicpathdetector.DynamicIdentifier + "/cmdline", + filePath: "/proc/cmdline", + expectAlert: true, + why: "⋯ consumes EXACTLY ONE segment; /proc/⋯/cmdline must NOT match /proc/cmdline (zero between)", + }, + + // ─── Mixed ⋯/* combinations ───────────────────────────────── + { + name: "ellipsis_then_trailing_star_matches_two_segment_tail", + profilePath: "/proc/" + dynamicpathdetector.DynamicIdentifier + "/*", + filePath: "/proc/1/status", + expectAlert: false, + why: "/proc/⋯/* matches /proc/1/status (⋯ consumes 1, * consumes ≥1)", + }, + { + name: "ellipsis_then_trailing_star_matches_three_segment_tail", + profilePath: "/proc/" + dynamicpathdetector.DynamicIdentifier + "/*", + filePath: "/proc/1/task/1", + expectAlert: false, + why: "/proc/⋯/* matches deeper paths (⋯ consumes 1, * consumes ≥1 covering rest)", + }, + + // ─── Multiple trailing wildcards ──────────────────────────── + { + name: "double_trailing_matches_one_child", + profilePath: "/etc/*/*", + filePath: "/etc/ssl", + expectAlert: false, + why: "/etc/*/* matches /etc/ssh (mid-* consumes zero, trailing-* consumes one)", + }, + { + name: "double_trailing_matches_deep_child", + profilePath: "/etc/*/*", + filePath: "/etc/ssl/openssl.cnf", + expectAlert: false, + why: "/etc/*/* matches /etc/ssl/openssl.cnf (mid-* consumes one, trailing-* consumes one)", + }, + { + name: "double_trailing_does_not_match_parent", + profilePath: "/etc/*/*", + filePath: "/etc", + expectAlert: true, + why: "/etc/*/* requires at least one segment past /etc; bare /etc must NOT match", + }, + + // ─── splitPath trailing-slash normalisation ───────────────── + { + name: "trailing_slash_in_profile_normalises_to_literal", + profilePath: "/etc/passwd/", + filePath: "/etc/passwd", + expectAlert: false, + why: "Profile `/etc/passwd/` is normalised to `/etc/passwd`; matches the literal at runtime", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Logf("contract: %s", tc.why) + wl := deployWithProfile(t, tc.profilePath) + alerts := catAndAlerts(t, wl, tc.filePath) + got := hasR0002(alerts) + + detail := fmt.Sprintf("got %d alerts total; R0002 fired = %v", len(alerts), got) + passed := got == tc.expectAlert + if !passed { + if tc.expectAlert { + t.Errorf("expected R0002 alert: profile %q must NOT match %q (%s); but no alert fired", + tc.profilePath, tc.filePath, tc.why) + } else { + t.Errorf("expected NO R0002 alert: profile %q should match %q (%s); but alert fired", + tc.profilePath, tc.filePath, tc.why) + } + } + addResult(tc.name, tc.profilePath, tc.filePath, tc.expectAlert, passed, detail) + }) + } +} From cb5767473afdf2c1ee56803e2166f698c8a460dd Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 21:05:00 +0200 Subject: [PATCH 14/25] test(component): rework Test_33 negative cases to probe under R0002's monitored prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R0002's CEL ruleExpression has a strict path-prefix filter: event.path.startsWith('/etc/') || event.path.startsWith('/var/log/') || ... All with trailing slash. Bare /etc and /var/log don't pass the filter, so R0002 never evaluates on those events — the matcher's bare-parent anchoring contract stays invisible at runtime even though the storage unit tests pin it. Probe one level deeper instead: /etc/ssl IS under the /etc/ monitored prefix, so the rule CAN see whether a /etc/ssl/* profile entry matches the bare /etc/ssl parent. Same security guarantee, observable layer. Reworked subtests: - trailing_star_does_not_match_bare_parent_under_monitored_prefix: profile /etc/ssl/*, cat /etc/ssl → R0002 fires - deep_prefix_trailing_star_does_not_match_parent: profile /etc/ssl/certs/*, cat /etc/ssl/certs → R0002 fires - ellipsis_requires_one_segment_not_zero: profile /etc/passwd/⋯, cat /etc/passwd → ⋯ requires one more segment - double_trailing_does_not_match_parent_under_monitored_prefix: profile /etc/ssl/*/*, cat /etc/ssl → R0002 fires The 7 positive subtests that already passed are untouched. Added a comment block documenting why we probe at /etc/ssl rather than /etc. --- tests/component_test.go | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index 9e3a872db..eee470014 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3613,6 +3613,15 @@ func Test_33_AnalyzeOpensWildcardAnchoring(t *testing.T) { why string // contract pinned by this case }{ // ─── Trailing-`*` anchoring (the security fix) ────────────── + // + // IMPORTANT: R0002's CEL ruleExpression has a strict prefix + // filter (event.path.startsWith('/etc/'), startsWith('/var/log/'), + // etc. — all with trailing slash). Bare `/etc` and `/var/log` + // don't match those prefixes, so the rule never evaluates on + // them and the matcher's anchoring contract stays invisible at + // runtime. Probe one level deeper instead — `/etc/ssl` IS under + // the `/etc/` monitored prefix, so R0002 CAN see whether a + // `/etc/ssl/*` profile entry matches the bare `/etc/ssl` parent. { name: "trailing_star_matches_immediate_child", profilePath: "/etc/*", @@ -3628,27 +3637,27 @@ func Test_33_AnalyzeOpensWildcardAnchoring(t *testing.T) { why: "/etc/* matches a multi-segment path under /etc (mid-path zero-or-more)", }, { - name: "trailing_star_does_not_match_bare_parent", - profilePath: "/etc/*", - filePath: "/etc", + name: "trailing_star_does_not_match_bare_parent_under_monitored_prefix", + profilePath: "/etc/ssl/*", + filePath: "/etc/ssl", expectAlert: true, - why: "/etc/* must NOT match the bare /etc directory itself — closes R0002 blind spot for parent-of-profiled-dir tampering", + why: "/etc/ssl/* must NOT match the bare /etc/ssl directory itself — pins the security fix at a path R0002's prefix filter can observe", }, { name: "deep_prefix_trailing_star_does_not_match_parent", - profilePath: "/var/log/*", - filePath: "/var/log", + profilePath: "/etc/ssl/certs/*", + filePath: "/etc/ssl/certs", expectAlert: true, - why: "Same anchoring rule applies at any depth: /var/log/* does NOT match /var/log", + why: "Same anchoring rule, deeper: /etc/ssl/certs/* does NOT match /etc/ssl/certs", }, // ─── DynamicIdentifier (⋯) exactly-one ────────────────────── { - name: "ellipsis_pin_one_segment_then_literal", - profilePath: "/proc/" + dynamicpathdetector.DynamicIdentifier + "/cmdline", - filePath: "/proc/cmdline", + name: "ellipsis_requires_one_segment_not_zero", + profilePath: "/etc/passwd/" + dynamicpathdetector.DynamicIdentifier, + filePath: "/etc/passwd", expectAlert: true, - why: "⋯ consumes EXACTLY ONE segment; /proc/⋯/cmdline must NOT match /proc/cmdline (zero between)", + why: "⋯ consumes EXACTLY ONE segment; /etc/passwd/⋯ requires one more, /etc/passwd alone has zero past — must fire R0002", }, // ─── Mixed ⋯/* combinations ───────────────────────────────── @@ -3683,11 +3692,11 @@ func Test_33_AnalyzeOpensWildcardAnchoring(t *testing.T) { why: "/etc/*/* matches /etc/ssl/openssl.cnf (mid-* consumes one, trailing-* consumes one)", }, { - name: "double_trailing_does_not_match_parent", - profilePath: "/etc/*/*", - filePath: "/etc", + name: "double_trailing_does_not_match_parent_under_monitored_prefix", + profilePath: "/etc/ssl/*/*", + filePath: "/etc/ssl", expectAlert: true, - why: "/etc/*/* requires at least one segment past /etc; bare /etc must NOT match", + why: "/etc/ssl/*/* requires at least one segment past /etc/ssl; bare /etc/ssl must NOT match (probed under /etc/ so R0002 sees it)", }, // ─── splitPath trailing-slash normalisation ───────────────── From 484d11c84f9db980e6deca7d6d6a318c04d1ae26 Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 21:27:27 +0200 Subject: [PATCH 15/25] test(component): fix Test_28 + Test_31 31b flakiness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two distinct fixes for what looked like the same intermittent failures across PR #37 runs: Test_31 31b 'untampered_signed_AP_no_R1016' — root cause: storage's PreSave runs DeflateSortString on Syscalls (and Capabilities, Architectures), which sorts + dedupes. The signSignedAP helper signed the AP BEFORE pushing, against unsorted syscalls {socket, connect, read, write, close, openat}. After PreSave the stored AP had sorted {close, connect, openat, read, socket, write}, so the content hash differed from the signature → server-side verify correctly failed → R1016 fired even though the profile was untampered. Test_29 + Test_30 30b had the same fixture but didn't observe the bug because they only assert R0001 counts, never R1016. Pre-sort the syscalls in all three test fixtures so storage's normalization is a no-op on round-trip. Test_28 28a 'allowed_fusioncore_no_alert' — root cause: 15s post-deploy sleep wasn't always enough for the upstream ContainerProfileCache to project the user-defined NN. Failure mode is alert payload `profileMetadata.errorMessage:"waiting for profile update"` — the rule manager evaluated against an unloaded NN and fired R0005/R0011 spuriously. Bumped to 30s with a comment documenting why. A real fix would poll a cache-loaded signal but no such signal is exposed from outside the node-agent today. --- tests/component_test.go | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index eee470014..51e1b00b3 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2259,7 +2259,15 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { path.Join(utils.CurrentDir(), "resources/nginx-user-defined-deployment.yaml")) require.NoError(t, err) require.NoError(t, wl.WaitForReady(80)) - time.Sleep(15 * time.Second) // let node-agent load profiles + // Cache-load latency on the upstream ContainerProfileCache is bursty + // — 15s is enough on a quiet runner but not on a loaded one. The + // failure mode is alert metadata `errorMessage:"waiting for profile + // update"`, which means the rule manager evaluated against an + // unloaded NN and fired R0005/R0011 spuriously. 30s covers the + // observed worst-case in CI without pushing total test time too + // far. Real fix would be to poll a cache-loaded signal, but no + // such signal is exposed today. + time.Sleep(30 * time.Second) return wl } @@ -2671,7 +2679,7 @@ func Test_29_SignedApplicationProfile(t *testing.T) { {Path: "/bin/sleep"}, {Path: "/usr/bin/curl"}, }, - Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, + Syscalls: []string{"close", "connect", "openat", "read", "socket", "write"}, }, }, }, @@ -2926,7 +2934,7 @@ func Test_30_TamperedSignedProfiles(t *testing.T) { {Path: "/bin/sleep"}, {Path: "/usr/bin/curl"}, }, - Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, + Syscalls: []string{"close", "connect", "openat", "read", "socket", "write"}, }, }, }, @@ -3049,8 +3057,18 @@ func Test_31_TamperDetectionAlert(t *testing.T) { storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) // signSignedAP returns a signed ApplicationProfile in nsName under name. + // + // IMPORTANT: storage's PreSave runs DeflateSortString on Syscalls and + // DeflateSortString on Capabilities, which sorts + dedupes those + // slices. The signed content must already be in storage's normalised + // form, otherwise the post-store AP that the node-agent verifies + // has a different content hash than what was signed → tamper detected + // → R1016 fires incorrectly even on an untampered profile. Pre-sort + // here so storage's normalisation is a no-op on round-trip. signSignedAP := func(t *testing.T, nsName, name string) *v1beta1.ApplicationProfile { t.Helper() + // Already in storage's sorted/deduped form: alphabetical, unique. + syscalls := []string{"close", "connect", "openat", "read", "socket", "write"} ap := &v1beta1.ApplicationProfile{ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: nsName}, Spec: v1beta1.ApplicationProfileSpec{ @@ -3061,7 +3079,7 @@ func Test_31_TamperDetectionAlert(t *testing.T) { {Path: "/bin/sleep"}, {Path: "/usr/bin/curl"}, }, - Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, + Syscalls: syscalls, }, }, }, From 4dd0b3910e4e0ec31d541762411e5bec70b00d85 Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 21:50:06 +0200 Subject: [PATCH 16/25] test(component): sign-after-roundtrip in Test_31 to defeat content-drift R1016 false positive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test_31 31b 'untampered_signed_AP_no_R1016' kept flaking because the AP's content hash drifted between client-side sign and server-side verify across the K8s/storage roundtrip. Sources of drift include storage's PreSave normalisation (DeflateSortString, DeflateStringer, DeflateRulePolicies), signature/profiles GetContent's nil→empty-map mutation on PolicyByRuleId, and any K8s server-side defaulting of spec/metadata fields. Pre-sorting Syscalls in the previous fix only covered one of these. Sign-after-roundtrip closes the whole class: 1. Push the AP UNSIGNED to storage. PreSave runs, normalises content. 2. Read it back — this is what node-agent will see at verify time. 3. Sign THAT normalised content. 4. Delete the unsigned in-storage copy so deployAndWait can Create the signed version without an AlreadyExists conflict. 5. Strip server-managed metadata (resourceVersion / uid / etc.) from the returned AP so the second Create succeeds cleanly. Second push goes through deflate again. Idempotent on already-normalised content → stored bytes identical to signed bytes → content hash matches → verify succeeds → no R1016 false positive. Tampered subtests (31a, 31d) keep working: signSignedAP returns a known-good signed AP, the test mutates it post-helper, deployAndWait Creates the mutated version, storage round-trip preserves the mutation, and verify correctly detects the divergence. --- tests/component_test.go | 65 ++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/tests/component_test.go b/tests/component_test.go index 51e1b00b3..35e5cf324 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3058,16 +3058,25 @@ func Test_31_TamperDetectionAlert(t *testing.T) { // signSignedAP returns a signed ApplicationProfile in nsName under name. // - // IMPORTANT: storage's PreSave runs DeflateSortString on Syscalls and - // DeflateSortString on Capabilities, which sorts + dedupes those - // slices. The signed content must already be in storage's normalised - // form, otherwise the post-store AP that the node-agent verifies - // has a different content hash than what was signed → tamper detected - // → R1016 fires incorrectly even on an untampered profile. Pre-sort - // here so storage's normalisation is a no-op on round-trip. + // IMPORTANT: storage's PreSave normalises spec content (DeflateSortString + // sorts+dedupes Syscalls/Capabilities/Architectures, DeflateStringer + // dedupes Execs, AnalyzeOpens/Endpoints/UnifyIdentifiedCallStacks + // rewrite their respective slices, GetContent injects empty + // PolicyByRuleId maps, and K8s itself may default fields). Signing + // locally and then pushing to storage makes the SIGNED hash mismatch + // the POST-STORE content hash that node-agent's tamper check sees, + // firing R1016 on an untampered profile. + // + // Sign-after-roundtrip eliminates every drift source at once: push + // the AP unsigned, read back the storage-normalised form, sign THAT, + // and let the caller push the signed version (deployAndWait does an + // Update-or-Create, so the second push goes through the same + // idempotent deflate and produces the same content hash). signSignedAP := func(t *testing.T, nsName, name string) *v1beta1.ApplicationProfile { t.Helper() - // Already in storage's sorted/deduped form: alphabetical, unique. + // Pre-sort syscalls so the first roundtrip is a no-op for that field + // — keeps the assertion that "deflate is idempotent on already-sorted + // content" honest. syscalls := []string{"close", "connect", "openat", "read", "socket", "write"} ap := &v1beta1.ApplicationProfile{ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: nsName}, @@ -3084,8 +3093,44 @@ func Test_31_TamperDetectionAlert(t *testing.T) { }, }, } - require.NoError(t, signature.SignObjectDisableKeyless(profiles.NewApplicationProfileAdapter(ap)), "sign AP") - return ap + + // Round-trip 1: push unsigned, read back the normalised form. + _, err := storageClient.ApplicationProfiles(nsName).Create( + context.Background(), ap, metav1.CreateOptions{}) + require.NoError(t, err, "create unsigned AP for normalisation") + var stored *v1beta1.ApplicationProfile + require.Eventually(t, func() bool { + s, gerr := storageClient.ApplicationProfiles(nsName).Get( + context.Background(), name, v1.GetOptions{}) + if gerr != nil { + return false + } + stored = s + return true + }, 30*time.Second, 1*time.Second, "AP must be retrievable after unsigned create") + + // Sign the storage-normalised content. Now the hash in the signature + // annotation matches what node-agent will see when it loads the AP. + require.NoError(t, + signature.SignObjectDisableKeyless(profiles.NewApplicationProfileAdapter(stored)), + "sign storage-normalised AP") + + // Delete the unsigned in-storage copy so the caller's deployAndWait + // Create succeeds without an AlreadyExists conflict. Storage will + // re-deflate the signed AP on the second push; since that content + // is already normalised, deflate is a no-op and the hash stays + // stable. + require.NoError(t, + storageClient.ApplicationProfiles(nsName).Delete( + context.Background(), name, metav1.DeleteOptions{}), + "delete unsigned AP before caller re-pushes signed version") + // Strip server-managed metadata so the Create call doesn't see a + // stale resourceVersion / uid / creationTimestamp. + stored.ObjectMeta.ResourceVersion = "" + stored.ObjectMeta.UID = "" + stored.ObjectMeta.CreationTimestamp = v1.Time{} + stored.ObjectMeta.Generation = 0 + return stored } signSignedNN := func(t *testing.T, nsName, name string) *v1beta1.NetworkNeighborhood { From 6dda0205c0acb7eb34a536f8c4f5d8a46c790048 Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 22:34:26 +0200 Subject: [PATCH 17/25] test(component): bump Test_33 WaitForReady to 180s for cluster-pressure tolerance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test_33 deploys 11 fresh pods sequentially, one per subtest. Later subtests race against an increasingly loaded kind cluster — CP cache reconciler, alertmanager, prometheus all chew CPU at boot. 80s WaitForReady deadline timed out on the post-23ea224 run with 'workload not ready in ns ...' for early subtests once the cluster got busy. 180s gives headroom without changing total runtime regime. --- tests/component_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/component_test.go b/tests/component_test.go index 35e5cf324..a41318025 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3624,7 +3624,12 @@ func Test_33_AnalyzeOpensWildcardAnchoring(t *testing.T) { wl, err := testutils.NewTestWorkload(ns.Name, path.Join(utils.CurrentDir(), "resources/nginx-user-profile-deployment.yaml")) require.NoError(t, err, "create workload in ns %s", ns.Name) - require.NoError(t, wl.WaitForReady(80), "workload not ready in ns %s", ns.Name) + // 11 subtests deploy a fresh pod sequentially, so each later subtest + // races against an increasingly loaded kind cluster — the upstream + // CP cache reconciler, alertmanager, and prometheus all chew CPU at + // boot. 80s timed out intermittently; 180s gives headroom without + // pushing the total test runtime into a different regime. + require.NoError(t, wl.WaitForReady(180), "workload not ready in ns %s", ns.Name) // Wait for node-agent to load the user-defined profile into cache. time.Sleep(10 * time.Second) From a5af2613447403f3ae142322bc5095f5ced30aae Mon Sep 17 00:00:00 2001 From: Entlein Date: Mon, 4 May 2026 11:02:28 +0200 Subject: [PATCH 18/25] =?UTF-8?q?deps(storage):=20bump=20replace=20to=2043?= =?UTF-8?q?795bb4=20(storage=20feat/exec-arg-wildcards=20tip=20=E2=80=94?= =?UTF-8?q?=20CRDs=20+=20CompareExecArgs)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a82c675e4..37605d194 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260501173152-4ab95fb87ba2 +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260503184242-43795bb4f0b6 diff --git a/go.sum b/go.sum index defe0d250..0d4fa4b53 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260501173152-4ab95fb87ba2 h1:0yjl3JVdk20Ij2ppjTJ5+awY4/EIuzF9uotncB36SqI= -github.com/k8sstormcenter/storage v0.0.240-0.20260501173152-4ab95fb87ba2/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260503184242-43795bb4f0b6 h1:pzIvtCkXBC6t4v7EIIekbltfBnWfvWKjB6ZsgdhkWr0= +github.com/k8sstormcenter/storage v0.0.240-0.20260503184242-43795bb4f0b6/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= From c4ac7b51411fb1b7be246e7efb109ee96aff895e Mon Sep 17 00:00:00 2001 From: Entlein Date: Mon, 4 May 2026 11:36:28 +0200 Subject: [PATCH 19/25] test(aplint): drop redundant p := p loop var (Go 1.22+, copyloopvar lint) CodeRabbit (PR #38). Loop variable shadowing is no longer required since Go 1.22 made each iteration capture its own variable. The shadow trips golangci-lint's copyloopvar check. --- tests/resources/aplint_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/resources/aplint_test.go b/tests/resources/aplint_test.go index 3e1887592..721a2d8ea 100644 --- a/tests/resources/aplint_test.go +++ b/tests/resources/aplint_test.go @@ -212,7 +212,6 @@ func TestApplicationProfileFixturesLint(t *testing.T) { } for _, p := range matches { - p := p t.Run(filepath.Base(p), func(t *testing.T) { data, err := os.ReadFile(p) if err != nil { From e7dc486cdfe535d55526ab4954c72ffce91e1cf8 Mon Sep 17 00:00:00 2001 From: Entlein Date: Mon, 4 May 2026 11:40:30 +0200 Subject: [PATCH 20/25] fix(tamper_alert): R1016 dedup + use real WLID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit (PR #38) flagged two issues on the refresh-loop tamper path: 1. Dedup R1016 (Major). The cache refresh re-runs verifyUser*() on every reconcile cycle, so a long-lived tampered profile would emit R1016 on every cycle and once per container reference. Track emitted alerts in a sync.Map keyed on (kind|ns/name@resourceVersion). LoadOrStore gives atomic 'first transition to invalid' semantics; a re-tamper at a new RV uses a fresh key and alerts again. Verification passing clears the key so future tampers re-alert. 2. Pass real WLID (Major). emitTamperAlert previously called extractWlidFromContainerID(containerID) — but containerID is a runtime ID like 'containerd://...' which GenericRuleFailure.SetWorkloadDetails parses as a WLID and silently drops kind/name/cluster attribution. Thread sharedData.Wlid (already in scope at the call site in containerprofilecache.go) through verifyUser*() into emitTamperAlert. Drop extractWlidFromContainerID — no longer needed. Test_31_TamperDetectionAlert exercises this path end-to-end; expecting it to keep passing (one alert per tampered profile, with proper workload attribution in Alertmanager). --- .../containerprofilecache.go | 12 +++- .../containerprofilecache/tamper_alert.go | 58 ++++++++++--------- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/pkg/objectcache/containerprofilecache/containerprofilecache.go b/pkg/objectcache/containerprofilecache/containerprofilecache.go index c4788a0bb..a9301cb34 100644 --- a/pkg/objectcache/containerprofilecache/containerprofilecache.go +++ b/pkg/objectcache/containerprofilecache/containerprofilecache.go @@ -122,6 +122,14 @@ type ContainerProfileCacheImpl struct { // deprecationDedup tracks (kind|ns/name@rv) keys to emit one WARN log // per legacy CRD resource-version across the process lifetime. deprecationDedup sync.Map + + // tamperEmitted dedup R1016 alerts: only emit once per + // (kind|ns/name@resourceVersion). Without this, the cache refresh loop + // would re-emit on every reconcile cycle, once per container reference. + // A re-tamper at a new resourceVersion still alerts because the key + // changes; verification passing again clears the entry so future + // transitions can re-alert. + tamperEmitted sync.Map } // NewContainerProfileCache creates a new ContainerProfileCacheImpl. @@ -392,7 +400,7 @@ func (c *ContainerProfileCacheImpl) tryPopulateEntry( // Re-verify the user-supplied AP signature on every load. Emits // R1016 if the profile is signed but tampered. Does not gate // loading unless cfg.EnableSignatureVerification is true. - if userAP != nil && !c.verifyUserApplicationProfile(userAP, containerID) { + if userAP != nil && !c.verifyUserApplicationProfile(userAP, sharedData.Wlid) { userAP = nil } var userNNErr error @@ -409,7 +417,7 @@ func (c *ContainerProfileCacheImpl) tryPopulateEntry( userNN = nil } // Same tamper-check on the NN side. - if userNN != nil && !c.verifyUserNetworkNeighborhood(userNN, containerID) { + if userNN != nil && !c.verifyUserNetworkNeighborhood(userNN, sharedData.Wlid) { userNN = nil } } diff --git a/pkg/objectcache/containerprofilecache/tamper_alert.go b/pkg/objectcache/containerprofilecache/tamper_alert.go index 6caf7c764..aa76a265b 100644 --- a/pkg/objectcache/containerprofilecache/tamper_alert.go +++ b/pkg/objectcache/containerprofilecache/tamper_alert.go @@ -17,7 +17,6 @@ package containerprofilecache import ( "fmt" - "strings" "github.com/armosec/armoapi-go/armotypes" "github.com/kubescape/go-logger" @@ -29,6 +28,14 @@ import ( "github.com/kubescape/storage/pkg/apis/softwarecomposition/v1beta1" ) +// tamperKey uniquely identifies a tampered profile occurrence. ResourceVersion +// is included so that an attacker editing the resource (which changes RV) is +// re-flagged on the next reconcile cycle, while a long-lived broken profile +// only emits one R1016 across the cache's lifetime. +func tamperKey(kind, namespace, name, resourceVersion string) string { + return kind + "|" + namespace + "/" + name + "@" + resourceVersion +} + // SetTamperAlertExporter wires the rule-alert exporter used to emit R1016. // Optional — when nil, signature verification still runs (and is logged) // but no alert is emitted. Production wiring lives in cmd/main.go after the @@ -51,7 +58,7 @@ func (c *ContainerProfileCacheImpl) SetTamperAlertExporter(e exporters.Exporter) // the cache. Today we always proceed (the legacy semantics don't actually // gate loading on verification unless EnableSignatureVerification is true), // but having the return value keeps the door open for stricter modes. -func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta1.ApplicationProfile, containerID string) bool { +func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta1.ApplicationProfile, wlid string) bool { if profile == nil { return true } @@ -59,6 +66,7 @@ func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta if !signature.IsSigned(adapter) { return true } + key := tamperKey("ApplicationProfile", profile.Namespace, profile.Name, profile.ResourceVersion) // AllowUntrusted: accept self-signed/local-CA signatures as long as the // signature itself verifies against the cert in the annotations. We only // want to flag actual tampering, not the absence of a Sigstore Fulcio @@ -67,18 +75,25 @@ func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta logger.L().Warning("user-defined ApplicationProfile signature verification failed (tamper detected)", helpers.String("profile", profile.Name), helpers.String("namespace", profile.Namespace), - helpers.String("containerID", containerID), + helpers.String("wlid", wlid), helpers.Error(err)) - c.emitTamperAlert(profile.Name, profile.Namespace, containerID, "ApplicationProfile", err) + // Dedup: emit R1016 only on first transition to invalid for this + // (kind, ns, name, resourceVersion). Otherwise the refresh loop + // would alert every reconcile cycle, once per container ref. + if _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}); !alreadyEmitted { + c.emitTamperAlert(profile.Name, profile.Namespace, wlid, "ApplicationProfile", err) + } return !c.cfg.EnableSignatureVerification } + // Verified clean — clear any prior emit so future tampers re-alert. + c.tamperEmitted.Delete(key) return true } // verifyUserNetworkNeighborhood is the NN-side counterpart to // verifyUserApplicationProfile. Same contract, different object kind in // the alert description. -func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.NetworkNeighborhood, containerID string) bool { +func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.NetworkNeighborhood, wlid string) bool { if nn == nil { return true } @@ -86,15 +101,19 @@ func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.Ne if !signature.IsSigned(adapter) { return true } + key := tamperKey("NetworkNeighborhood", nn.Namespace, nn.Name, nn.ResourceVersion) if err := signature.VerifyObjectAllowUntrusted(adapter); err != nil { logger.L().Warning("user-defined NetworkNeighborhood signature verification failed (tamper detected)", helpers.String("profile", nn.Name), helpers.String("namespace", nn.Namespace), - helpers.String("containerID", containerID), + helpers.String("wlid", wlid), helpers.Error(err)) - c.emitTamperAlert(nn.Name, nn.Namespace, containerID, "NetworkNeighborhood", err) + if _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}); !alreadyEmitted { + c.emitTamperAlert(nn.Name, nn.Namespace, wlid, "NetworkNeighborhood", err) + } return !c.cfg.EnableSignatureVerification } + c.tamperEmitted.Delete(key) return true } @@ -103,7 +122,11 @@ func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.Ne // // Alert shape mirrors the legacy applicationprofilecache.emitTamperAlert // (fork commit c2d681e0) so dashboards and component tests keep matching. -func (c *ContainerProfileCacheImpl) emitTamperAlert(profileName, namespace, containerID, objectKind string, verifyErr error) { +// `wlid` should be the authoritative workload identifier the caller has on +// hand (e.g. sharedData.Wlid in containerprofilecache.go) — using the +// runtime containerID instead loses workload kind/name/cluster attribution +// because GenericRuleFailure.SetWorkloadDetails() parses it as a WLID. +func (c *ContainerProfileCacheImpl) emitTamperAlert(profileName, namespace, wlid, objectKind string, verifyErr error) { if c.tamperAlertExporter == nil { return } @@ -132,24 +155,7 @@ func (c *ContainerProfileCacheImpl) emitTamperAlert(profileName, namespace, cont RuleID: "R1016", } - // Best-effort workload identifier. The legacy cache used a wlid string; - // this cache is keyed on containerID, so we just stash that as the - // workload reference. Downstream consumers (Alertmanager, exporter - // pipelines) don't structurally depend on the wlid prefix. - ruleFailure.SetWorkloadDetails(extractWlidFromContainerID(containerID)) + ruleFailure.SetWorkloadDetails(wlid) c.tamperAlertExporter.SendRuleAlert(ruleFailure) } - -// extractWlidFromContainerID is a placeholder that returns the containerID -// as-is. The legacy cache had a richer "wlid:///// -// /" string available; the new cache is keyed on -// containerID so callers that consume wlid get an opaque identifier here. -// Retained as a separate function so the alert path can be upgraded to a -// proper wlid lookup later without touching emitTamperAlert. -func extractWlidFromContainerID(containerID string) string { - if idx := strings.LastIndex(containerID, "/"); idx > 0 { - return containerID[:idx] - } - return containerID -} From 98bdf975ed45d51338f5ff68feac695f3a6f2b76 Mon Sep 17 00:00:00 2001 From: Entlein Date: Mon, 4 May 2026 12:00:56 +0200 Subject: [PATCH 21/25] deps(storage): bump replace to b0d68d3d (empty-Args wildcard match) Picks up storage's CompareExecArgs fix for path-only Execs entries (CodeRabbit PR #38, finding #3). With this bump, R0040 no longer fires on every legit exec of a path that the user-defined profile allowed but didn't constrain by argv. --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 37605d194..08712c2b4 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260503184242-43795bb4f0b6 +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260504093801-b0d68d3d1584 diff --git a/go.sum b/go.sum index 0d4fa4b53..c52cbc0dd 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260503184242-43795bb4f0b6 h1:pzIvtCkXBC6t4v7EIIekbltfBnWfvWKjB6ZsgdhkWr0= -github.com/k8sstormcenter/storage v0.0.240-0.20260503184242-43795bb4f0b6/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260504093801-b0d68d3d1584 h1:r2gOEvCu4/qAS0SOQ2QN4ovdaLIIYY8YFWVHDyXZaUE= +github.com/k8sstormcenter/storage v0.0.240-0.20260504093801-b0d68d3d1584/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= From 0cf4a503192850884594cbf8675a5c0ab65b57ed Mon Sep 17 00:00:00 2001 From: Entlein Date: Sat, 9 May 2026 20:52:21 +0200 Subject: [PATCH 22/25] fix: address CodeRabbit second-review batch on PR #38 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three CR concerns addressed in one commit (all submitted 2026-05-04 10:09): 1. tamper_alert.go (Major) — only emit R1016 on actual signature mismatch. Previously every error from VerifyObjectAllowUntrusted (hash computation, verifier construction, malformed annotations, signature mismatch) was treated as a tamper event. With EnableSignatureVerification=true that meant a transient operational failure could drop a valid overlay AND emit a false R1016. Fix: - Add signature.ErrSignatureMismatch sentinel in pkg/signature/annotations.go - Wrap the signature-fail return in verify.go with the sentinel (Go 1.20+ multiple-%w form) - Classify in tamper_alert.go via errors.Is(err, ErrSignatureMismatch); operational errors log with a 'NOT tamper' tag, do NOT touch the tamperEmitted dedup map, and do NOT emit R1016. They still honour strict-mode (return !EnableSignatureVerification) so behaviour is conservative. Adds tamper_alert_test.go: pins LoadOrStore semantics, confirms each operational-error variant returns false on errors.Is(ErrSignatureMismatch), smoke-tests the sentinel value. 2. aplint_test.go:221 (Minor) — replace strings.Contains substring check for fixture-type detection with proper YAML-header parse. The substring form silently skipped fixtures with quoted/non-canonical 'kind' values AND would false-positive on nested OwnerReferences with the same substring. Now uses sigs.k8s.io/yaml (already imported) to unmarshal into a {Kind string} struct and branch on the parsed value. 3. aplint_test.go R-AP-12 (Major, 'Heavy lift') — REJECTED with reasoning to be posted as CR reply. Evidence shows R-AP-12 enforces the actual runtime contract: - pkg/rulemanager/cel/libraries/parse/parse.go's getExecPath returns args[0] (the full binary path) - ap.was_executed_with_args(containerID, parse.get_exec_path(event.args, event.comm), event.args) passes the FULL argv to the matcher - Integration test pkg/rulemanager/cel/libraries/applicationprofile/ integration_test.go uses ["/bin/bash", "-c", ...] — full argv - Real fixture tests/resources/known-application-profile.yaml uses args: ["/bin/sleep", "infinity"] with args[0] == path The compare_exec_args_test.go cases that look like flags-only ([-s, ⋯]) are matcher-isolation unit tests, not the wired contract. Removing R-AP-12 would let users author flags-only profiles that silently fail to silence R0040 in production. Bumps storage replace to a7e6234349ab (storage main HEAD post-PR #27). --- go.mod | 2 +- go.sum | 4 +- .../containerprofilecache/tamper_alert.go | 65 ++++++++++---- .../tamper_alert_test.go | 84 +++++++++++++++++++ pkg/signature/annotations.go | 8 ++ pkg/signature/verify.go | 6 +- tests/resources/aplint_test.go | 16 +++- 7 files changed, 161 insertions(+), 24 deletions(-) create mode 100644 pkg/objectcache/containerprofilecache/tamper_alert_test.go diff --git a/go.mod b/go.mod index 08712c2b4..54f2392ed 100644 --- a/go.mod +++ b/go.mod @@ -507,4 +507,4 @@ replace github.com/inspektor-gadget/inspektor-gadget => github.com/matthyx/inspe replace github.com/cilium/ebpf => github.com/matthyx/ebpf v0.0.0-20260421101317-8a32d06def6c -replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260504093801-b0d68d3d1584 +replace github.com/kubescape/storage => github.com/k8sstormcenter/storage v0.0.240-0.20260509184329-a7e6234349ab diff --git a/go.sum b/go.sum index c52cbc0dd..4ae88fcc1 100644 --- a/go.sum +++ b/go.sum @@ -981,8 +981,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/k8sstormcenter/storage v0.0.240-0.20260504093801-b0d68d3d1584 h1:r2gOEvCu4/qAS0SOQ2QN4ovdaLIIYY8YFWVHDyXZaUE= -github.com/k8sstormcenter/storage v0.0.240-0.20260504093801-b0d68d3d1584/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= +github.com/k8sstormcenter/storage v0.0.240-0.20260509184329-a7e6234349ab h1:DNjKAs888GzW7P9gJUKtldL6E7zYzjLiO6pVUTvnzqc= +github.com/k8sstormcenter/storage v0.0.240-0.20260509184329-a7e6234349ab/go.mod h1:amdg/Qok9bqPzs1vZH5FW9/3MbCawc5wVsz9u3uIfu4= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= diff --git a/pkg/objectcache/containerprofilecache/tamper_alert.go b/pkg/objectcache/containerprofilecache/tamper_alert.go index aa76a265b..273b15123 100644 --- a/pkg/objectcache/containerprofilecache/tamper_alert.go +++ b/pkg/objectcache/containerprofilecache/tamper_alert.go @@ -16,6 +16,7 @@ package containerprofilecache import ( + "errors" "fmt" "github.com/armosec/armoapi-go/armotypes" @@ -71,23 +72,40 @@ func (c *ContainerProfileCacheImpl) verifyUserApplicationProfile(profile *v1beta // signature itself verifies against the cert in the annotations. We only // want to flag actual tampering, not the absence of a Sigstore Fulcio // trust chain. Matches `cmd/sign-object`'s default verifier. - if err := signature.VerifyObjectAllowUntrusted(adapter); err != nil { - logger.L().Warning("user-defined ApplicationProfile signature verification failed (tamper detected)", + err := signature.VerifyObjectAllowUntrusted(adapter) + if err == nil { + // Verified clean — clear any prior emit so future tampers re-alert. + c.tamperEmitted.Delete(key) + return true + } + // Classify the error: only ErrSignatureMismatch indicates an actual + // tamper event. Hash-computation, verifier-construction, and malformed- + // annotation errors are operational and MUST NOT raise R1016 — that + // would cause false alerts and, with EnableSignatureVerification=true, + // drop a valid overlay because of a transient operational failure. + if !errors.Is(err, signature.ErrSignatureMismatch) { + logger.L().Warning("user-defined ApplicationProfile signature verification operational error (NOT tamper)", helpers.String("profile", profile.Name), helpers.String("namespace", profile.Namespace), helpers.String("wlid", wlid), helpers.Error(err)) - // Dedup: emit R1016 only on first transition to invalid for this - // (kind, ns, name, resourceVersion). Otherwise the refresh loop - // would alert every reconcile cycle, once per container ref. - if _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}); !alreadyEmitted { - c.emitTamperAlert(profile.Name, profile.Namespace, wlid, "ApplicationProfile", err) - } + // Honour strict-mode: refuse to load on any verification failure, + // but do NOT touch the dedup map or emit R1016. return !c.cfg.EnableSignatureVerification } - // Verified clean — clear any prior emit so future tampers re-alert. - c.tamperEmitted.Delete(key) - return true + // Real tamper. + logger.L().Warning("user-defined ApplicationProfile signature mismatch (tamper detected)", + helpers.String("profile", profile.Name), + helpers.String("namespace", profile.Namespace), + helpers.String("wlid", wlid), + helpers.Error(err)) + // Dedup: emit R1016 only on first transition to invalid for this + // (kind, ns, name, resourceVersion). Otherwise the refresh loop would + // alert every reconcile cycle, once per container ref. + if _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}); !alreadyEmitted { + c.emitTamperAlert(profile.Name, profile.Namespace, wlid, "ApplicationProfile", err) + } + return !c.cfg.EnableSignatureVerification } // verifyUserNetworkNeighborhood is the NN-side counterpart to @@ -102,19 +120,30 @@ func (c *ContainerProfileCacheImpl) verifyUserNetworkNeighborhood(nn *v1beta1.Ne return true } key := tamperKey("NetworkNeighborhood", nn.Namespace, nn.Name, nn.ResourceVersion) - if err := signature.VerifyObjectAllowUntrusted(adapter); err != nil { - logger.L().Warning("user-defined NetworkNeighborhood signature verification failed (tamper detected)", + err := signature.VerifyObjectAllowUntrusted(adapter) + if err == nil { + c.tamperEmitted.Delete(key) + return true + } + // Same classification as the AP path — only ErrSignatureMismatch is a + // tamper; everything else is operational and must NOT trigger R1016. + if !errors.Is(err, signature.ErrSignatureMismatch) { + logger.L().Warning("user-defined NetworkNeighborhood signature verification operational error (NOT tamper)", helpers.String("profile", nn.Name), helpers.String("namespace", nn.Namespace), helpers.String("wlid", wlid), helpers.Error(err)) - if _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}); !alreadyEmitted { - c.emitTamperAlert(nn.Name, nn.Namespace, wlid, "NetworkNeighborhood", err) - } return !c.cfg.EnableSignatureVerification } - c.tamperEmitted.Delete(key) - return true + logger.L().Warning("user-defined NetworkNeighborhood signature mismatch (tamper detected)", + helpers.String("profile", nn.Name), + helpers.String("namespace", nn.Namespace), + helpers.String("wlid", wlid), + helpers.Error(err)) + if _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}); !alreadyEmitted { + c.emitTamperAlert(nn.Name, nn.Namespace, wlid, "NetworkNeighborhood", err) + } + return !c.cfg.EnableSignatureVerification } // emitTamperAlert sends a single R1016 "Signed profile tampered" alert diff --git a/pkg/objectcache/containerprofilecache/tamper_alert_test.go b/pkg/objectcache/containerprofilecache/tamper_alert_test.go new file mode 100644 index 000000000..488033959 --- /dev/null +++ b/pkg/objectcache/containerprofilecache/tamper_alert_test.go @@ -0,0 +1,84 @@ +// Unit tests pinning the tamper-vs-operational error classification in +// the cache's verify path. CodeRabbit PR #38 finding (tamper_alert.go:86) +// flagged that any error from VerifyObjectAllowUntrusted was being +// treated as a tamper, including hash-computation / verifier-construction +// errors — which would emit false R1016s and (with strict mode) drop +// valid overlays for non-tamper reasons. +// +// These tests use synthetic errors to bypass needing a full cosign +// fixture, and assert via the exported tamperEmitted dedup map's +// observable side effect: real tampers populate it, operational errors +// don't. +package containerprofilecache + +import ( + "errors" + "fmt" + "testing" + + "github.com/kubescape/node-agent/pkg/signature" +) + +// TestVerifyClassification_TamperPopulatesDedupMap confirms that an +// ErrSignatureMismatch-wrapped error is treated as a real tamper: +// LoadOrStore should set the key and emit (we observe via the map). +func TestVerifyClassification_TamperPopulatesDedupMap(t *testing.T) { + c := &ContainerProfileCacheImpl{} + key := tamperKey("ApplicationProfile", "ns", "p", "1") + + // Synthesise the wrapped error that VerifyObject returns on actual + // signature mismatch. + tamperErr := fmt.Errorf("%w: %w", signature.ErrSignatureMismatch, errors.New("crypto/ecdsa: verify error")) + + if !errors.Is(tamperErr, signature.ErrSignatureMismatch) { + t.Fatalf("test fixture wrong: errors.Is(tamperErr, ErrSignatureMismatch) returned false") + } + + // First-transition path: LoadOrStore returns alreadyEmitted=false. + _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}) + if alreadyEmitted { + t.Errorf("LoadOrStore on fresh key returned alreadyEmitted=true; want false") + } + // Second call: alreadyEmitted=true (dedup). + _, alreadyEmitted = c.tamperEmitted.LoadOrStore(key, struct{}{}) + if !alreadyEmitted { + t.Errorf("LoadOrStore on already-stored key returned false; want true") + } +} + +// TestVerifyClassification_OperationalErrorDistinguishable confirms that +// an operational error (no ErrSignatureMismatch wrap) returns false on +// errors.Is, so the verify path can route around the dedup map and +// emitTamperAlert. +func TestVerifyClassification_OperationalErrorDistinguishable(t *testing.T) { + cases := []struct { + name string + err error + }{ + {"hash computation failure", fmt.Errorf("failed to compute content hash: %w", errors.New("io error"))}, + {"verifier construction failure", fmt.Errorf("failed to create verifier: %w", errors.New("missing root certs"))}, + {"adapter construction failure", fmt.Errorf("failed to create cosign adapter: %w", errors.New("config invalid"))}, + {"decode signature failure", fmt.Errorf("failed to decode signature from annotations: %w", errors.New("base64 invalid"))}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if errors.Is(tc.err, signature.ErrSignatureMismatch) { + t.Errorf("operational error %q matched ErrSignatureMismatch — classification broken", tc.err) + } + }) + } +} + +// TestVerifyClassification_ErrSignatureMismatchValue is a smoke test that +// the sentinel exists with the canonical message ("signature verification +// failed"), so log scraping / alert pipelines that match the substring +// continue to work. +func TestVerifyClassification_ErrSignatureMismatchValue(t *testing.T) { + if signature.ErrSignatureMismatch == nil { + t.Fatalf("signature.ErrSignatureMismatch is nil — sentinel was removed") + } + if signature.ErrSignatureMismatch.Error() != "signature verification failed" { + t.Errorf("sentinel message changed: %q (want %q)", signature.ErrSignatureMismatch.Error(), "signature verification failed") + } +} diff --git a/pkg/signature/annotations.go b/pkg/signature/annotations.go index 8df333d21..f603b1ffe 100644 --- a/pkg/signature/annotations.go +++ b/pkg/signature/annotations.go @@ -14,3 +14,11 @@ const ( ) var ErrObjectNotSigned = errors.New("object is not signed (missing signature annotation)") + +// ErrSignatureMismatch wraps the underlying cosign verifier failure when a +// signature is present but does not verify against the object's content + +// certificate. Callers (e.g. ContainerProfileCache's tamper-alert path) +// MUST distinguish this from operational errors (hash computation failure, +// verifier construction failure, malformed signature annotations) — only +// ErrSignatureMismatch indicates an actual tamper event. +var ErrSignatureMismatch = errors.New("signature verification failed") diff --git a/pkg/signature/verify.go b/pkg/signature/verify.go index f5d3d9913..9c4e1c233 100644 --- a/pkg/signature/verify.go +++ b/pkg/signature/verify.go @@ -64,7 +64,11 @@ func VerifyObject(obj SignableObject, opts ...VerifyOption) error { helpers.String("name", obj.GetName()), helpers.String("error", verifyErr.Error())) - return fmt.Errorf("signature verification failed: %w", verifyErr) + // Wrap with the ErrSignatureMismatch sentinel so callers can + // distinguish actual tamper from operational errors (hash + // computation, verifier construction) returned above. + // errors.Is(err, ErrSignatureMismatch) is the canonical check. + return fmt.Errorf("%w: %w", ErrSignatureMismatch, verifyErr) } logger.L().Info("Successfully verified object signature", diff --git a/tests/resources/aplint_test.go b/tests/resources/aplint_test.go index 721a2d8ea..7c74c1cd3 100644 --- a/tests/resources/aplint_test.go +++ b/tests/resources/aplint_test.go @@ -217,8 +217,20 @@ func TestApplicationProfileFixturesLint(t *testing.T) { if err != nil { t.Fatalf("read %s: %v", p, err) } - if !strings.Contains(string(data), "kind: ApplicationProfile") { - t.Skipf("not an ApplicationProfile fixture") + // Skip fixtures whose top-level Kind isn't ApplicationProfile. + // Done via a real YAML parse (not strings.Contains) so quoted + // or otherwise-formatted "kind: ApplicationProfile" still + // matches, and so we don't pick up the substring inside + // nested OwnerReferences / event payloads. CodeRabbit PR #38 + // finding (aplint_test.go:221). + var head struct { + Kind string `yaml:"kind"` + } + if err := yaml.Unmarshal(data, &head); err != nil { + t.Skipf("not a parseable YAML fixture: %v", err) + } + if head.Kind != "ApplicationProfile" { + t.Skipf("not an ApplicationProfile fixture (kind=%q)", head.Kind) } for _, v := range LintApplicationProfileYAML(data, p) { t.Errorf("%s", v) From 8d89240961f238d58988bd0dabc1b8eec613fef9 Mon Sep 17 00:00:00 2001 From: Entlein Date: Sat, 9 May 2026 21:07:02 +0200 Subject: [PATCH 23/25] fix: address CodeRabbit third-review batch on PR #38 (0cf4a503) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three new findings on the May 9 review batch: 1. aplint_test.go R-AP-13 (Major) — embedded * was not flagged, only embedded ⋯. An arg like "foo*bar" linted clean, defeating the purpose of the wildcard-must-be-standalone rule. Added a parallel check for embedded wildcardIdentifier mirroring the existing dynamicIdentifier check. 2. aplint_test.go canonical-fixture skip path (Major) — t.Skipf on missing reference fixture silently disabled the gold-standard test if someone deleted/renamed it. Switched to t.Fatalf with a message that explains the file's role. Also switched the YAML-parse-failure path from Skipf to Fatalf — un-parseable YAML in tests/resources/ is a fixture-quality bug, not something to skip past. Kept Skipf only for the kind!=AP path, which correctly handles the legit non-AP fixtures (Deployments, Services etc.) that share the directory. 3. tamper_alert_test.go (Trivial nitpick, 'Low value') — added an integration-style test that exercises the full verifyUserApplicationProfile path: real signature.SignObjectDisableKeyless, tamper the spec, call verifyUserApplicationProfile, assert tamperEmitted carries the key. Then re-sign at a new RV and assert the dedup is cleared. Confirms the wiring 'verifier returns ErrSignatureMismatch → classify as tamper → LoadOrStore in dedup map' actually fires, not just the LoadOrStore call in isolation. Uses the real cosign adapter — no mocking needed. --- .../tamper_alert_test.go | 76 +++++++++++++++++++ tests/resources/aplint_test.go | 17 ++++- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/pkg/objectcache/containerprofilecache/tamper_alert_test.go b/pkg/objectcache/containerprofilecache/tamper_alert_test.go index 488033959..51056128f 100644 --- a/pkg/objectcache/containerprofilecache/tamper_alert_test.go +++ b/pkg/objectcache/containerprofilecache/tamper_alert_test.go @@ -17,6 +17,9 @@ import ( "testing" "github.com/kubescape/node-agent/pkg/signature" + "github.com/kubescape/node-agent/pkg/signature/profiles" + "github.com/kubescape/storage/pkg/apis/softwarecomposition/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // TestVerifyClassification_TamperPopulatesDedupMap confirms that an @@ -82,3 +85,76 @@ func TestVerifyClassification_ErrSignatureMismatchValue(t *testing.T) { t.Errorf("sentinel message changed: %q (want %q)", signature.ErrSignatureMismatch.Error(), "signature verification failed") } } + +// TestVerifyAP_TamperedProfile_PopulatesDedupMap exercises the full +// verifyUserApplicationProfile path end-to-end (per CodeRabbit nitpick on +// PR #38, tamper_alert_test.go:47): sign a real ApplicationProfile, +// mutate its content (fake tamper), call the verify method, and confirm +// the dedup map carries the tamperKey afterward. Confirms the wiring +// from "verifier returns ErrSignatureMismatch" all the way through the +// classification + LoadOrStore branch. +func TestVerifyAP_TamperedProfile_PopulatesDedupMap(t *testing.T) { + profile := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tampered", + Namespace: "test-ns", + ResourceVersion: "42", + UID: "ap-uid-tamper", + }, + Spec: v1beta1.ApplicationProfileSpec{ + Containers: []v1beta1.ApplicationProfileContainer{{Name: "test"}}, + }, + } + + // Sign with a real cosign signer (test-only; uses an ephemeral key + // from the cosign adapter — no Sigstore Fulcio interaction). + adapter := profiles.NewApplicationProfileAdapter(profile) + if err := signature.SignObjectDisableKeyless(adapter); err != nil { + t.Fatalf("sign profile: %v", err) + } + if !signature.IsSigned(adapter) { + t.Fatalf("post-Sign IsSigned returned false") + } + + // Tamper: mutate spec content after signing. Verification will + // recompute the content hash, find it differs from the signed hash, + // and return ErrSignatureMismatch. + profile.Spec.Containers[0].Name = "MUTATED" + + c := &ContainerProfileCacheImpl{} + ok := c.verifyUserApplicationProfile(profile, "wlid://test/cluster/ns/Pod/p") + // EnableSignatureVerification is false (zero-value) → returns true + // even though tamper was detected. R1016 emit is dedup-tracked via + // tamperEmitted regardless. + if !ok { + t.Errorf("verify returned false; expected true (legacy permissive mode)") + } + + key := tamperKey("ApplicationProfile", profile.Namespace, profile.Name, profile.ResourceVersion) + if _, found := c.tamperEmitted.Load(key); !found { + t.Errorf("tamperEmitted missing key %q after a real tamper — wiring from verifier-error to dedup map is broken", key) + } + + // Second call on the SAME tampered profile must not re-flag the key + // as a new emit (dedup). + _, alreadyEmitted := c.tamperEmitted.LoadOrStore(key, struct{}{}) + if !alreadyEmitted { + t.Errorf("dedup broken: re-storing existing key returned alreadyEmitted=false") + } + + // Re-sign over the mutated content — verification now succeeds, and + // the dedup entry should be cleared so a future tamper at a NEW + // resourceVersion can re-alert. + profile.ResourceVersion = "43" // simulate the cluster bumping RV on update + if err := signature.SignObjectDisableKeyless(adapter); err != nil { + t.Fatalf("re-sign profile: %v", err) + } + ok = c.verifyUserApplicationProfile(profile, "wlid://test/cluster/ns/Pod/p") + if !ok { + t.Errorf("verify after re-sign returned false; expected true") + } + newKey := tamperKey("ApplicationProfile", profile.Namespace, profile.Name, profile.ResourceVersion) + if _, found := c.tamperEmitted.Load(newKey); found { + t.Errorf("tamperEmitted has key %q after a successful re-verify; the verify-clean path must clear it", newKey) + } +} diff --git a/tests/resources/aplint_test.go b/tests/resources/aplint_test.go index 7c74c1cd3..87c1be234 100644 --- a/tests/resources/aplint_test.go +++ b/tests/resources/aplint_test.go @@ -173,6 +173,9 @@ func LintApplicationProfile(ap *applicationProfileLike, src string) []Violation if strings.Contains(a, dynamicIdentifier) && a != dynamicIdentifier { add("R-AP-13", fmt.Sprintf("containers[%d].execs[%d].args[%d] = %q — ⋯ must be its own token, not embedded", ci, ei, ai, a)) } + if strings.Contains(a, wildcardIdentifier) && a != wildcardIdentifier { + add("R-AP-13", fmt.Sprintf("containers[%d].execs[%d].args[%d] = %q — * must be its own token, not embedded", ci, ei, ai, a)) + } } } @@ -227,9 +230,16 @@ func TestApplicationProfileFixturesLint(t *testing.T) { Kind string `yaml:"kind"` } if err := yaml.Unmarshal(data, &head); err != nil { - t.Skipf("not a parseable YAML fixture: %v", err) + // Un-parseable YAML in this directory is a fixture-quality + // bug; failing here surfaces it instead of silently + // skipping the guardrail. CodeRabbit PR #38 finding + // (aplint_test.go:230). + t.Fatalf("fixture %s is not valid YAML: %v", p, err) } if head.Kind != "ApplicationProfile" { + // Skipping is correct here — the resources/ directory + // also holds Deployments, Services, NetworkNeighborhoods + // etc. that this linter intentionally doesn't cover. t.Skipf("not an ApplicationProfile fixture (kind=%q)", head.Kind) } for _, v := range LintApplicationProfileYAML(data, p) { @@ -342,9 +352,12 @@ spec: func TestLinter_canonical_AP_passes(t *testing.T) { // The fork's reference profile (from fea3b062) is the gold standard; // regressions here mean the linter has drifted from real-world syntax. + // Failing (not skipping) when the fixture is missing keeps the + // "gold standard" test from silently disappearing if someone deletes + // or renames the file. CodeRabbit PR #38 finding (aplint_test.go:230). data, err := os.ReadFile("known-application-profile.yaml") if err != nil { - t.Skipf("canonical AP fixture not present: %v", err) + t.Fatalf("canonical AP fixture missing — this guards the linter's gold standard, never delete it: %v", err) } violations := LintApplicationProfileYAML(data, "known-application-profile.yaml") if len(violations) > 0 { From 045940c8e01c287f8adbdf3233e4071ff9f37605 Mon Sep 17 00:00:00 2001 From: Entlein Date: Sat, 9 May 2026 22:56:46 +0200 Subject: [PATCH 24/25] fix(test): make dedup-clearing assertion non-trivial MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit nitpick on PR #38 (tamper_alert_test.go:159): the prior version of TestVerifyAP_TamperedProfile_PopulatesDedupMap bumped the profile's ResourceVersion to '43' before the re-sign, then asserted that the key for RV='43' was absent from tamperEmitted. But '43' was never added in the first place — only '42' was added during the tamper phase. The assertion was trivially true. Drop the RV bump: re-sign at the SAME RV='42' so that verifyUserApplicationProfile takes the verify-clean branch and Deletes the existing dedup entry. The assertion now actually exercises the clearing path. Production code unchanged. --- .../containerprofilecache/tamper_alert_test.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/objectcache/containerprofilecache/tamper_alert_test.go b/pkg/objectcache/containerprofilecache/tamper_alert_test.go index 51056128f..d28951ca7 100644 --- a/pkg/objectcache/containerprofilecache/tamper_alert_test.go +++ b/pkg/objectcache/containerprofilecache/tamper_alert_test.go @@ -142,10 +142,14 @@ func TestVerifyAP_TamperedProfile_PopulatesDedupMap(t *testing.T) { t.Errorf("dedup broken: re-storing existing key returned alreadyEmitted=false") } - // Re-sign over the mutated content — verification now succeeds, and - // the dedup entry should be cleared so a future tamper at a NEW - // resourceVersion can re-alert. - profile.ResourceVersion = "43" // simulate the cluster bumping RV on update + // Re-sign over the mutated content at the SAME ResourceVersion — the + // verifier now sees a valid signature over the current spec, so + // verifyUserApplicationProfile MUST take the verify-clean branch + // and Delete the existing dedup entry. CodeRabbit nitpick on PR + // #38 (tamper_alert_test.go:159): the prior version of this test + // bumped RV before the re-sign, so the assertion checked a key + // that was never added — trivially true. This now actually + // exercises the clearing path. if err := signature.SignObjectDisableKeyless(adapter); err != nil { t.Fatalf("re-sign profile: %v", err) } @@ -153,8 +157,7 @@ func TestVerifyAP_TamperedProfile_PopulatesDedupMap(t *testing.T) { if !ok { t.Errorf("verify after re-sign returned false; expected true") } - newKey := tamperKey("ApplicationProfile", profile.Namespace, profile.Name, profile.ResourceVersion) - if _, found := c.tamperEmitted.Load(newKey); found { - t.Errorf("tamperEmitted has key %q after a successful re-verify; the verify-clean path must clear it", newKey) + if _, found := c.tamperEmitted.Load(key); found { + t.Errorf("tamperEmitted still has key %q after a successful re-verify at the same RV; the verify-clean path must Delete it", key) } } From eb40bbf127bc2ff123bbb0cb48bce4a79caf9d17 Mon Sep 17 00:00:00 2001 From: Entlein Date: Sat, 9 May 2026 23:05:30 +0200 Subject: [PATCH 25/25] fix(ci): drop 'permissions: read-all' from reusable workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chore PR #39 added 'permissions: read-all' to every workflow file including the workflow_call reusables. That broke the reusable-call contract: a called workflow's top-level permissions are the FLOOR the caller must grant. Callers of these reusables (pr-created.yaml's pr-created job, pr-merged.yaml's pr-merged job, etc.) only grant a narrow set of scopes (id-token:write, packages:write, security-events: write, pull-requests:write, contents). They do NOT grant the full read-all set (actions:read, artifact-metadata:read, attestations:read, checks:read, deployments:read, discussions:read, issues:read, models:read, vulnerability-alerts:read, packages:read, pages:read, repository-projects:read, statuses:read, id-token:read). Result: every PR opened on this fork since chore #39 merged (May 6) has had pr-created.yaml startup_failure with: Error calling workflow ... incluster-comp-pr-created.yaml@. The workflow is requesting 'actions: read, ...', but is only allowed [...] Fix: strip top-level 'permissions: read-all' from the four reusables: - benchmark.yaml (also direct-trigger; falls back to per-job perms) - go-basic-tests.yaml - incluster-comp-pr-created.yaml - incluster-comp-pr-merged.yaml Each reusable's per-job 'permissions:' blocks remain intact and correctly request only what the job needs. Callers' existing grants are sufficient. Top-level 'permissions: read-all' stays on the OUTERMOST workflows (pr-created.yaml, pr-merged.yaml, build.yaml, bypass.yaml, component-tests.yaml, sign-object.yaml, scorecard.yml) — they're event-triggered, not workflow_call'd, so the read-all default still hardens GITHUB_TOKEN there. --- .github/workflows/benchmark.yaml | 3 --- .github/workflows/go-basic-tests.yaml | 3 --- .github/workflows/incluster-comp-pr-created.yaml | 3 --- .github/workflows/incluster-comp-pr-merged.yaml | 3 --- 4 files changed, 12 deletions(-) diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index f3f1687ae..6bec7abf0 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -24,9 +24,6 @@ on: required: false type: string -# Default to read-only at the workflow level (least privilege per Scorecard). -# Jobs that need elevated scopes override below. -permissions: read-all concurrency: group: benchmark-${{ github.ref }} diff --git a/.github/workflows/go-basic-tests.yaml b/.github/workflows/go-basic-tests.yaml index 67bb179d5..a2676a2bd 100644 --- a/.github/workflows/go-basic-tests.yaml +++ b/.github/workflows/go-basic-tests.yaml @@ -36,9 +36,6 @@ on: GITGUARDIAN_API_KEY: required: false -# Default to read-only at the workflow level (least privilege per Scorecard). -# Jobs that need elevated scopes override below. -permissions: read-all jobs: Check-secret: diff --git a/.github/workflows/incluster-comp-pr-created.yaml b/.github/workflows/incluster-comp-pr-created.yaml index 1226b0a62..c37cb410a 100644 --- a/.github/workflows/incluster-comp-pr-created.yaml +++ b/.github/workflows/incluster-comp-pr-created.yaml @@ -33,9 +33,6 @@ on: GITGUARDIAN_API_KEY: required: false -# Default to read-only at the workflow level (least privilege per Scorecard). -# Jobs that need elevated scopes override below. -permissions: read-all jobs: test: diff --git a/.github/workflows/incluster-comp-pr-merged.yaml b/.github/workflows/incluster-comp-pr-merged.yaml index 6c5fcd537..5299ad2cc 100644 --- a/.github/workflows/incluster-comp-pr-merged.yaml +++ b/.github/workflows/incluster-comp-pr-merged.yaml @@ -60,9 +60,6 @@ on: default: false type: boolean -# Default to read-only at the workflow level (least privilege per Scorecard). -# Jobs that need elevated scopes override below. -permissions: read-all jobs: docker-build: