From 2e17372e739d3de89d10fb3d8dcc3a6b31f09b98 Mon Sep 17 00:00:00 2001 From: William Findlay Date: Fri, 24 Jan 2025 11:36:49 -0500 Subject: [PATCH 1/3] observertesthelper: move docker into separate package A subsequent commit in this series needs to user observertesthelper's docker-related helpers in a test, but that creates an import cycle. Resolve the import cycle by moving docker-related helpers into a separate package and maintain backward compatibility by introducing deprecated stubs in observertesthelper that call into that new package. Signed-off-by: William Findlay --- .../observertesthelper/docker/docker.go | 73 +++++++++++++++++++ .../observer_test_helper.go | 63 ---------------- pkg/sensors/exec/exec_test.go | 9 ++- 3 files changed, 78 insertions(+), 67 deletions(-) create mode 100644 pkg/observer/observertesthelper/docker/docker.go diff --git a/pkg/observer/observertesthelper/docker/docker.go b/pkg/observer/observertesthelper/docker/docker.go new file mode 100644 index 00000000000..9b7dcb6403a --- /dev/null +++ b/pkg/observer/observertesthelper/docker/docker.go @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package docker + +import ( + "os/exec" + "strings" + "testing" +) + +// Create creates a new docker container in the background. The container will +// be killed and removed on test cleanup. +// It returns the containerId on success, or an error if spawning the container failed. +func Create(tb testing.TB, args ...string) (containerId string) { + // note: we are not using `--rm` so we can choose to wait on the container + // with `docker wait`. We remove it manually below in t.Cleanup instead + args = append([]string{"create"}, args...) + id, err := exec.Command("docker", args...).Output() + if err != nil { + tb.Fatalf("failed to spawn docker container %v: %s", args, err) + } + + containerId = strings.TrimSpace(string(id)) + tb.Cleanup(func() { + err := exec.Command("docker", "rm", "--force", containerId).Run() + if err != nil { + tb.Logf("failed to remove container %s: %s", containerId, err) + } + }) + + return containerId +} + +// Start starts a new docker container with a given ID. +func Start(tb testing.TB, id string) { + err := exec.Command("docker", "start", id).Run() + if err != nil { + tb.Fatalf("failed to start docker container %s: %s", id, err) + } +} + +// dockerRun starts a new docker container in the background. The container will +// be killed and removed on test cleanup. +// It returns the containerId on success, or an error if spawning the container failed. +func Run(tb testing.TB, args ...string) (containerId string) { + // note: we are not using `--rm` so we can choose to wait on the container + // with `docker wait`. We remove it manually below in t.Cleanup instead + args = append([]string{"run", "--detach"}, args...) + id, err := exec.Command("docker", args...).Output() + if err != nil { + tb.Fatalf("failed to spawn docker container %v: %s", args, err) + } + + containerId = strings.TrimSpace(string(id)) + tb.Cleanup(func() { + err := exec.Command("docker", "rm", "--force", containerId).Run() + if err != nil { + tb.Logf("failed to remove container %s: %s", containerId, err) + } + }) + + return containerId +} + +// dockerExec executes a command in a container. +func Exec(tb testing.TB, id string, args ...string) { + args = append([]string{"exec", id}, args...) + err := exec.Command("docker", args...).Run() + if err != nil { + tb.Fatalf("failed to exec in docker container %v: %s", args, err) + } +} diff --git a/pkg/observer/observertesthelper/observer_test_helper.go b/pkg/observer/observertesthelper/observer_test_helper.go index ea5b92bd84d..63fd315b181 100644 --- a/pkg/observer/observertesthelper/observer_test_helper.go +++ b/pkg/observer/observertesthelper/observer_test_helper.go @@ -521,69 +521,6 @@ func ExecWGCurl(readyWG *sync.WaitGroup, retries uint, args ...string) error { return err } -// DockerCreate creates a new docker container in the background. The container will -// be killed and removed on test cleanup. -// It returns the containerId on success, or an error if spawning the container failed. -func DockerCreate(tb testing.TB, args ...string) (containerId string) { - // note: we are not using `--rm` so we can choose to wait on the container - // with `docker wait`. We remove it manually below in t.Cleanup instead - args = append([]string{"create"}, args...) - id, err := exec.Command("docker", args...).Output() - if err != nil { - tb.Fatalf("failed to spawn docker container %v: %s", args, err) - } - - containerId = strings.TrimSpace(string(id)) - tb.Cleanup(func() { - err := exec.Command("docker", "rm", "--force", containerId).Run() - if err != nil { - tb.Logf("failed to remove container %s: %s", containerId, err) - } - }) - - return containerId -} - -// DockerStart starts a new docker container with a given ID. -func DockerStart(tb testing.TB, id string) { - err := exec.Command("docker", "start", id).Run() - if err != nil { - tb.Fatalf("failed to start docker container %s: %s", id, err) - } -} - -// dockerRun starts a new docker container in the background. The container will -// be killed and removed on test cleanup. -// It returns the containerId on success, or an error if spawning the container failed. -func DockerRun(tb testing.TB, args ...string) (containerId string) { - // note: we are not using `--rm` so we can choose to wait on the container - // with `docker wait`. We remove it manually below in t.Cleanup instead - args = append([]string{"run", "--detach"}, args...) - id, err := exec.Command("docker", args...).Output() - if err != nil { - tb.Fatalf("failed to spawn docker container %v: %s", args, err) - } - - containerId = strings.TrimSpace(string(id)) - tb.Cleanup(func() { - err := exec.Command("docker", "rm", "--force", containerId).Run() - if err != nil { - tb.Logf("failed to remove container %s: %s", containerId, err) - } - }) - - return containerId -} - -// dockerExec executes a command in a container. -func DockerExec(tb testing.TB, id string, args ...string) { - args = append([]string{"exec", id}, args...) - err := exec.Command("docker", args...).Run() - if err != nil { - tb.Fatalf("failed to exec in docker container %v: %s", args, err) - } -} - type fakeK8sWatcher struct { fakePod, fakeNamespace string } diff --git a/pkg/sensors/exec/exec_test.go b/pkg/sensors/exec/exec_test.go index 0a58d5093e2..9a83ff16d30 100644 --- a/pkg/sensors/exec/exec_test.go +++ b/pkg/sensors/exec/exec_test.go @@ -32,6 +32,7 @@ import ( sm "github.com/cilium/tetragon/pkg/matchers/stringmatcher" "github.com/cilium/tetragon/pkg/observer" "github.com/cilium/tetragon/pkg/observer/observertesthelper" + "github.com/cilium/tetragon/pkg/observer/observertesthelper/docker" "github.com/cilium/tetragon/pkg/option" proc "github.com/cilium/tetragon/pkg/process" "github.com/cilium/tetragon/pkg/reader/caps" @@ -597,7 +598,7 @@ func TestDocker(t *testing.T) { observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs) readyWG.Wait() - serverDockerID := observertesthelper.DockerRun(t, "--name", "fgs-test-server", "--entrypoint", "nc", "quay.io/cilium/alpine-curl:v1.6.0", "-nvlp", "8081", "-s", "0.0.0.0") + serverDockerID := docker.Run(t, "--name", "fgs-test-server", "--entrypoint", "nc", "quay.io/cilium/alpine-curl:v1.6.0", "-nvlp", "8081", "-s", "0.0.0.0") time.Sleep(1 * time.Second) // Tetragon sends 31 bytes + \0 to user-space. Since it might have an arbitrary prefix, @@ -637,7 +638,7 @@ func TestInInitTree(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), tus.Conf().CmdWaitTime) defer cancel() - containerID := observertesthelper.DockerCreate(t, "--name", "in-init-tree-test", "bash", "bash", "-c", "sleep infinity") + containerID := docker.Create(t, "--name", "in-init-tree-test", "bash", "bash", "-c", "sleep infinity") // Tetragon sends 31 bytes + \0 to user-space. Since it might have an arbitrary prefix, // match only on the first 24 bytes. trimmedContainerID := sm.Prefix(containerID[:24]) @@ -650,9 +651,9 @@ func TestInInitTree(t *testing.T) { observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs) readyWG.Wait() - observertesthelper.DockerStart(t, "in-init-tree-test") + docker.Start(t, "in-init-tree-test") time.Sleep(1 * time.Second) - observertesthelper.DockerExec(t, "in-init-tree-test", "ls") + docker.Exec(t, "in-init-tree-test", "ls") // This is the initial cmd, so inInitTree should be true entrypointChecker := ec.NewProcessChecker(). From f386c2b30f6f52996baab0a27301906ce7376127 Mon Sep 17 00:00:00 2001 From: William Findlay Date: Fri, 24 Jan 2025 11:38:23 -0500 Subject: [PATCH 2/3] proc_reader: handle in_init_tree Recently we introduced the in_init_tree flag into execve map values to indicate whether a process is a member of the initial process tree for a container. This worked well for containers started after Tetragon, but broke for cases where the container was started before Tetragon, since our procfs walk did not account for the in_init_tree flag. Fix this behaviour by introducing logic in the procfs walk to account for this. Signed-off-by: William Findlay --- pkg/sensors/exec/procevents/proc_reader.go | 68 ++++++++++++------- .../exec/procevents/proc_reader_test.go | 39 +++++++++++ 2 files changed, 82 insertions(+), 25 deletions(-) diff --git a/pkg/sensors/exec/procevents/proc_reader.go b/pkg/sensors/exec/procevents/proc_reader.go index 6e3f0d3aba9..7580507b315 100644 --- a/pkg/sensors/exec/procevents/proc_reader.go +++ b/pkg/sensors/exec/procevents/proc_reader.go @@ -4,10 +4,12 @@ package procevents import ( + "cmp" "fmt" "os" "path/filepath" "regexp" + "slices" "sort" "strconv" "strings" @@ -320,6 +322,41 @@ func updateExecveMapStats(procs int64) { } } +func procToKeyValue(p procs, inInitTree map[uint32]struct{}) (*execvemap.ExecveKey, *execvemap.ExecveValue) { + k := &execvemap.ExecveKey{Pid: p.pid} + v := &execvemap.ExecveValue{} + + v.Parent.Pid = p.ppid + v.Parent.Ktime = p.pktime + v.Process.Pid = p.pid + v.Process.Ktime = p.ktime + v.Flags = 0 + v.Nspid = p.nspid + v.Capabilities.Permitted = p.permitted + v.Capabilities.Effective = p.effective + v.Capabilities.Inheritable = p.inheritable + v.Namespaces.UtsInum = p.uts_ns + v.Namespaces.IpcInum = p.ipc_ns + v.Namespaces.MntInum = p.mnt_ns + v.Namespaces.PidInum = p.pid_ns + v.Namespaces.PidChildInum = p.pid_for_children_ns + v.Namespaces.NetInum = p.net_ns + v.Namespaces.TimeInum = p.time_ns + v.Namespaces.TimeChildInum = p.time_for_children_ns + v.Namespaces.CgroupInum = p.cgroup_ns + v.Namespaces.UserInum = p.user_ns + pathLength := copy(v.Binary.Path[:], p.exe) + v.Binary.PathLength = int32(pathLength) + + _, parentInInitTree := inInitTree[p.ppid] + if v.Nspid == 1 || parentInInitTree { + v.Flags |= api.EventInInitTree + inInitTree[p.pid] = struct{}{} + } + + return k, v +} + func writeExecveMap(procs []procs) { mapDir := bpf.MapPrefixPath() @@ -335,32 +372,9 @@ func writeExecveMap(procs []procs) { panic(err) } } + inInitTree := make(map[uint32]struct{}) for _, p := range procs { - k := &execvemap.ExecveKey{Pid: p.pid} - v := &execvemap.ExecveValue{} - - v.Parent.Pid = p.ppid - v.Parent.Ktime = p.pktime - v.Process.Pid = p.pid - v.Process.Ktime = p.ktime - v.Flags = 0 - v.Nspid = p.nspid - v.Capabilities.Permitted = p.permitted - v.Capabilities.Effective = p.effective - v.Capabilities.Inheritable = p.inheritable - v.Namespaces.UtsInum = p.uts_ns - v.Namespaces.IpcInum = p.ipc_ns - v.Namespaces.MntInum = p.mnt_ns - v.Namespaces.PidInum = p.pid_ns - v.Namespaces.PidChildInum = p.pid_for_children_ns - v.Namespaces.NetInum = p.net_ns - v.Namespaces.TimeInum = p.time_ns - v.Namespaces.TimeChildInum = p.time_for_children_ns - v.Namespaces.CgroupInum = p.cgroup_ns - v.Namespaces.UserInum = p.user_ns - pathLength := copy(v.Binary.Path[:], p.exe) - v.Binary.PathLength = int32(pathLength) - + k, v := procToKeyValue(p, inInitTree) err := m.Put(k, v) if err != nil { logger.GetLogger().WithField("value", v).WithError(err).Warn("failed to put value in execve_map") @@ -647,6 +661,10 @@ func listRunningProcs(procPath string) ([]procs, error) { logger.GetLogger().Infof("Read ProcFS %s appended %d/%d entries", option.Config.ProcFS, len(processes), len(procFS)) + slices.SortFunc(processes, func(a, b procs) int { + return cmp.Compare(a.pid, b.pid) + }) + return processes, nil } diff --git a/pkg/sensors/exec/procevents/proc_reader_test.go b/pkg/sensors/exec/procevents/proc_reader_test.go index c0e514e15b3..88a66aacbfb 100644 --- a/pkg/sensors/exec/procevents/proc_reader_test.go +++ b/pkg/sensors/exec/procevents/proc_reader_test.go @@ -4,8 +4,15 @@ package procevents import ( + "os/exec" + "strconv" + "strings" "testing" + "time" + "github.com/cilium/tetragon/pkg/api" + "github.com/cilium/tetragon/pkg/observer/observertesthelper/docker" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -20,3 +27,35 @@ func TestListRunningProcs(t *testing.T) { require.Equal(t, p.pid, p.tid) } } + +func TestInInitTreeProcfs(t *testing.T) { + if err := exec.Command("docker", "version").Run(); err != nil { + t.Skipf("docker not available. skipping test: %s", err) + } + + containerID := docker.Create(t, "--name", "procfs-in-init-tree-test", "bash", "bash", "-c", "sleep infinity") + + docker.Start(t, "procfs-in-init-tree-test") + time.Sleep(1 * time.Second) + + rootPidOutput, err := exec.Command("docker", "inspect", "-f", "{{.State.Pid}}", containerID).Output() + require.NoError(t, err, "root pid should fetch") + rootPid, err := strconv.Atoi(strings.TrimSpace(string(rootPidOutput))) + require.NoError(t, err, "root pid should parse") + + procs, err := listRunningProcs("/proc") + require.NoError(t, err) + require.NotNil(t, procs) + require.NotEqual(t, 0, len(procs)) + + inInitTree := make(map[uint32]struct{}) + for _, p := range procs { + require.NotZero(t, p.pid) + require.Equal(t, p.pid, p.tid) + _, v := procToKeyValue(p, inInitTree) + if v.Process.Pid == uint32(rootPid) || v.Parent.Pid == uint32(rootPid) { + isInInitTree := v.Flags&api.EventInInitTree == api.EventInInitTree + assert.True(t, isInInitTree) + } + } +} From df89bd1ccf6501b6efb2988679224438c6a41732 Mon Sep 17 00:00:00 2001 From: William Findlay Date: Fri, 24 Jan 2025 14:39:26 -0500 Subject: [PATCH 3/3] proc: handle docker container id format in CI In our CI environment, docker cgroups do not contain the key word docker. This caused the procfs walker to fail to identify the container ID's of docker container processes started before Tetragon. Add some naive logic to fall back to so that we can handle this case. Signed-off-by: William Findlay --- pkg/sensors/exec/procevents/proc.go | 12 ++++++++++++ pkg/sensors/exec/procevents/proc_reader.go | 6 ------ pkg/sensors/exec/procevents/proc_test.go | 5 +++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pkg/sensors/exec/procevents/proc.go b/pkg/sensors/exec/procevents/proc.go index 0901c94c5e1..3d41d298fc2 100644 --- a/pkg/sensors/exec/procevents/proc.go +++ b/pkg/sensors/exec/procevents/proc.go @@ -5,6 +5,7 @@ package procevents import ( "bytes" + "encoding/hex" "fmt" "os" "path/filepath" @@ -136,6 +137,17 @@ func procsFindDockerId(cgroups string) (string, int) { return container, i } } + // In some environments, such as the GitHub Ubuntu CI runner, docker cgroups do not contain the docker keyword but do end with a hex ID in their last component. Fall back to a naive approach here to handle that case. + components := strings.Split(s, "/") + if len(components) > 0 { + id := components[len(components)-1] + _, err := hex.DecodeString(id) + if err == nil { + if len(id) >= 31 { + return id[:31], len(strings.Join(components[:len(components)-1], "")) + 1 + } + } + } } return "", 0 } diff --git a/pkg/sensors/exec/procevents/proc_reader.go b/pkg/sensors/exec/procevents/proc_reader.go index 7580507b315..7917338cc84 100644 --- a/pkg/sensors/exec/procevents/proc_reader.go +++ b/pkg/sensors/exec/procevents/proc_reader.go @@ -4,12 +4,10 @@ package procevents import ( - "cmp" "fmt" "os" "path/filepath" "regexp" - "slices" "sort" "strconv" "strings" @@ -661,10 +659,6 @@ func listRunningProcs(procPath string) ([]procs, error) { logger.GetLogger().Infof("Read ProcFS %s appended %d/%d entries", option.Config.ProcFS, len(processes), len(procFS)) - slices.SortFunc(processes, func(a, b procs) int { - return cmp.Compare(a.pid, b.pid) - }) - return processes, nil } diff --git a/pkg/sensors/exec/procevents/proc_test.go b/pkg/sensors/exec/procevents/proc_test.go index 452282ab5a2..0da8fb17a70 100644 --- a/pkg/sensors/exec/procevents/proc_test.go +++ b/pkg/sensors/exec/procevents/proc_test.go @@ -165,6 +165,11 @@ func TestProcsFindContainerId(t *testing.T) { assert.Equal(t, i, 80, "ContainerId offset wrong") assert.Equal(t, d, "0ca2b3cd20e5f55a2bbe8d4aa3f811c", "ContainerId wrong") + p = "11:pids:/actions_job/ec5fd62ba68d0b75a3cbdb7f7f78b526440b7969e22b2b362fb6f429ded42fdc" + d, i = procsFindDockerId(p) + assert.Equal(t, i, 20, "ContainerId offset wrong") + assert.Equal(t, d, "ec5fd62ba68d0b75a3cbdb7f7f78b52", "ContainerId wrong") + p = "" d, i = procsFindDockerId(p) assert.Equal(t, d, "", "Expect output '' empty string")