diff --git a/process/process.go b/process/process.go index e7502492c..54878b8d1 100644 --- a/process/process.go +++ b/process/process.go @@ -10,8 +10,10 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path" + "path/filepath" "regexp" "strconv" "strings" @@ -181,6 +183,62 @@ func extractContainerID(pid libpf.PID) (libpf.String, error) { return parseContainerID(cgroupFile), nil } +// CgroupRootInode returns the inode of /proc//root/sys/fs/cgroup, which identifies +// the cgroup namespace root visible to the given process, unaffected by namespace masking. +func CgroupRootInode(pid libpf.PID) (uint64, error) { + var st unix.Stat_t + if err := unix.Stat(fmt.Sprintf("/proc/%d/root/sys/fs/cgroup", pid), &st); err != nil { + return 0, err + } + return st.Ino, nil +} + +// DetectSelfContainerIDViaInode detects the current process's container ID by matching +// cgroup directory inodes. When the process runs in a private cgroup namespace (cgroup v2), +// /proc/self/cgroup returns a path relative to the namespace root (e.g. "0::/"), making it +// impossible to extract the container ID via the standard path. However, stat("/sys/fs/cgroup") +// returns the inode of the process's actual cgroup directory on the host, unaffected by +// namespace masking. This function walks the host's cgroup tree (via +// /proc/1/root/sys/fs/cgroup) to find the directory whose inode matches, then extracts +// the container ID from its path. +func DetectSelfContainerIDViaInode() (libpf.String, uint64, error) { + const hostCgroupRoot = "/proc/1/root/sys/fs/cgroup" + + var selfStat unix.Stat_t + if err := unix.Stat("/sys/fs/cgroup", &selfStat); err != nil { + return libpf.NullString, 0, fmt.Errorf("failed to stat /sys/fs/cgroup: %w", err) + } + selfIno := selfStat.Ino + + var matched libpf.String + err := filepath.WalkDir(hostCgroupRoot, func(path string, d fs.DirEntry, err error) error { + if err != nil { + if d == nil { + return err // root is inaccessible + } + return nil // skip inaccessible subdirectories + } + if !d.IsDir() { + return nil + } + var st unix.Stat_t + if err := unix.Stat(path, &st); err != nil { + return nil + } + if st.Ino == selfIno { + if parts := expContainerID.FindStringSubmatch(path); len(parts) == 2 { + matched = libpf.Intern(parts[1]) + } + return filepath.SkipAll + } + return nil + }) + if err != nil { + return libpf.NullString, 0, fmt.Errorf("failed to walk host cgroup tree: %w", err) + } + return matched, selfIno, nil +} + func trimMappingPath(path string) string { // Trim the deleted indication from the path. // See path_with_deleted in linux/fs/d_path.c diff --git a/processmanager/manager.go b/processmanager/manager.go index f280a4fbd..ce2661cee 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -24,6 +24,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/nativeunwind" "go.opentelemetry.io/ebpf-profiler/periodiccaller" + "go.opentelemetry.io/ebpf-profiler/process" pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" @@ -94,6 +95,11 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter interpreters := make(map[libpf.PID]map[util.OnDiskFileIdentifier]interpreter.Instance) + selfContainerID, selfCgroupIno, err := process.DetectSelfContainerIDViaInode() + if err != nil { + log.Debugf("Failed to detect self container ID via inode: %v", err) + } + pm := &ProcessManager{ interpreterTracerEnabled: em.NumInterpreterLoaders() > 0, eim: em, @@ -108,6 +114,8 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter metricsAddSlice: metrics.AddSlice, filterErrorFrames: filterErrorFrames, includeEnvVars: includeEnvVars, + selfCgroupIno: selfCgroupIno, + selfContainerID: selfContainerID, } collectInterpreterMetrics(ctx, pm, monitorInterval) diff --git a/processmanager/processinfo.go b/processmanager/processinfo.go index cf95cfa9b..f0d5966c9 100644 --- a/processmanager/processinfo.go +++ b/processmanager/processinfo.go @@ -129,8 +129,10 @@ func (pm *ProcessManager) getPidInformation(pid libpf.PID, pr process.Process, return nil } + meta := pr.GetProcessMeta(process.MetaConfig{IncludeEnvVars: pm.includeEnvVars}) + pm.fillSelfContainerID(pid, &meta) info := &processInfo{ - meta: pr.GetProcessMeta(process.MetaConfig{IncludeEnvVars: pm.includeEnvVars}), + meta: meta, libcInfo: nil, } pm.pidToProcessInfo[pid] = info @@ -138,6 +140,23 @@ func (pm *ProcessManager) getPidInformation(pid libpf.PID, pr process.Process, return info } +// fillSelfContainerID sets the container ID on meta if the process has the same cgroup +// directory root as the profiler and the standard cgroup-based detection returned no result. +func (pm *ProcessManager) fillSelfContainerID(pid libpf.PID, meta *process.ProcessMeta) { + if meta.ContainerID != libpf.NullString || pm.selfContainerID == libpf.NullString { + return + } + ino, err := process.CgroupRootInode(pid) + if err != nil { + return + } + if ino == pm.selfCgroupIno { + meta.ContainerID = pm.selfContainerID + } else { + log.Debugf("Process %d cgroup inode (%d) doesn't match profiler (%d)", pid, ino, pm.selfCgroupIno) + } +} + // assignInterpreter will update the interpreters maps with given interpreter.Instance. // Caller is responsible to hold pm.mu write lock to avoid race conditions. func (pm *ProcessManager) assignInterpreter(pid libpf.PID, key util.OnDiskFileIdentifier, @@ -648,6 +667,7 @@ func (pm *ProcessManager) SynchronizeProcess(pr process.Process) { var meta process.ProcessMeta if updateProcessMeta { meta = pr.GetProcessMeta(process.MetaConfig{IncludeEnvVars: pm.includeEnvVars}) + pm.fillSelfContainerID(pid, &meta) } // Sort and publish the new mappings and meta diff --git a/processmanager/types.go b/processmanager/types.go index ffe8e4ec5..c06dd0e5d 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -113,6 +113,16 @@ type ProcessManager struct { // includeEnvVars holds a list of env vars that should be captured from processes includeEnvVars libpf.Set[string] + + // selfCgroupIno is the inode of the profiler's cgroup directory + // (stat("/sys/fs/cgroup")). Used to identify processes whose cgroup root + // matches the profiler's, which need the selfContainerID fallback. + selfCgroupIno uint64 + + // selfContainerID is the profiler's own container ID, detected once at startup. + // Used as a fallback when /proc//cgroup yields no container ID for processes + // that share the profiler's cgroup directory (e.g., private cgroup namespace). + selfContainerID libpf.String } // Mapping represents an executable memory mapping of a process.