From 499551ba7273c88aa9546499131be8f94afc0213 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Wed, 16 Jul 2025 16:30:53 +0700 Subject: [PATCH 01/12] process: allow compiling on nonlinux systems --- process/debug.go | 2 ++ process/debug_amd64.go | 4 +--- process/debug_arm64.go | 4 +--- process/debug_other.go | 18 ++++++++++++++++++ process/machine_amd64.go | 10 ++++++++++ process/machine_arm64.go | 10 ++++++++++ process/process_test.go | 4 ++++ 7 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 process/debug_other.go create mode 100644 process/machine_amd64.go create mode 100644 process/machine_arm64.go diff --git a/process/debug.go b/process/debug.go index c3efa6573..cf5c498a7 100644 --- a/process/debug.go +++ b/process/debug.go @@ -1,3 +1,5 @@ +//go:build linux + // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 diff --git a/process/debug_amd64.go b/process/debug_amd64.go index 769ce8772..bcd0ea1e5 100644 --- a/process/debug_amd64.go +++ b/process/debug_amd64.go @@ -1,4 +1,4 @@ -//go:build amd64 +//go:build linux && amd64 // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 @@ -11,8 +11,6 @@ import ( "fmt" ) -const currentMachine = elf.EM_X86_64 - func (sp *ptraceProcess) getThreadInfo(tid int) (ThreadInfo, error) { prStatus := make([]byte, 28*8) if err := ptraceGetRegset(tid, int(elf.NT_PRSTATUS), prStatus); err != nil { diff --git a/process/debug_arm64.go b/process/debug_arm64.go index 17024e252..30a91393e 100644 --- a/process/debug_arm64.go +++ b/process/debug_arm64.go @@ -1,4 +1,4 @@ -//go:build arm64 +//go:build linux && arm64 // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 @@ -11,8 +11,6 @@ import ( "fmt" ) -const currentMachine = elf.EM_AARCH64 - func (sp *ptraceProcess) GetMachineData() MachineData { pacMask := make([]byte, 16) _ = ptraceGetRegset(int(sp.pid), int(NT_ARM_PAC_MASK), pacMask) diff --git a/process/debug_other.go b/process/debug_other.go new file mode 100644 index 000000000..81e6586fb --- /dev/null +++ b/process/debug_other.go @@ -0,0 +1,18 @@ +//go:build !linux + +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package process // import "go.opentelemetry.io/ebpf-profiler/process" + +import ( + "fmt" + "go.opentelemetry.io/ebpf-profiler/libpf" + "runtime" +) + +// NewPtrace is the stub implementation, allowing to compile the process +// package on non linux systems, always failing at runtime with an error if used. +func NewPtrace(_ libpf.PID) (Process, error) { + return nil, fmt.Errorf("unsupported os %s", runtime.GOOS) +} diff --git a/process/machine_amd64.go b/process/machine_amd64.go new file mode 100644 index 000000000..6313e9a49 --- /dev/null +++ b/process/machine_amd64.go @@ -0,0 +1,10 @@ +//go:build amd64 + +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package process // import "go.opentelemetry.io/ebpf-profiler/process" + +import "debug/elf" + +const currentMachine = elf.EM_X86_64 diff --git a/process/machine_arm64.go b/process/machine_arm64.go new file mode 100644 index 000000000..9c4caa689 --- /dev/null +++ b/process/machine_arm64.go @@ -0,0 +1,10 @@ +//go:build arm64 + +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package process // import "go.opentelemetry.io/ebpf-profiler/process" + +import "debug/elf" + +const currentMachine = elf.EM_AARCH64 diff --git a/process/process_test.go b/process/process_test.go index 57bd87949..b6313550f 100644 --- a/process/process_test.go +++ b/process/process_test.go @@ -6,6 +6,7 @@ package process import ( "debug/elf" "os" + "runtime" "strings" "testing" @@ -113,6 +114,9 @@ func TestParseMappings(t *testing.T) { } func TestNewPIDOfSelf(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skipf("unsupported os %s", runtime.GOOS) + } pid := libpf.PID(os.Getpid()) pr := New(pid, pid) assert.NotNil(t, pr) From d1c72d937e4a81ec7829e3913a8ec349f34fab25 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Wed, 16 Jul 2025 18:58:28 +0700 Subject: [PATCH 02/12] rename debug.go to debug_linuxx.go --- process/{debug.go => debug_linux.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename process/{debug.go => debug_linux.go} (100%) diff --git a/process/debug.go b/process/debug_linux.go similarity index 100% rename from process/debug.go rename to process/debug_linux.go From 66ffc02fd41adf4a5f654f875917ee32efb06cb3 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 16:38:58 +0700 Subject: [PATCH 03/12] split processmanager/ebpf --- processmanager/ebpf/ebpf.go | 683 +---------------- processmanager/ebpf/{ => impl}/asyncupdate.go | 2 +- .../asyncupdate_integration_test.go | 2 +- processmanager/ebpf/impl/ebpf.go | 691 ++++++++++++++++++ .../ebpf/{ => impl}/ebpf_integration_test.go | 2 +- processmanager/ebpf/{ => impl}/ebpf_test.go | 2 +- processmanager/ebpf/impl/types.go | 4 + processmanager/ebpf/types.go | 12 - processmanager/execinfomanager/manager.go | 10 +- processmanager/manager.go | 4 +- processmanager/manager_test.go | 6 +- processmanager/types.go | 4 +- tools/coredump/ebpfmaps.go | 6 +- tracer/tracer.go | 2 +- 14 files changed, 723 insertions(+), 707 deletions(-) rename processmanager/ebpf/{ => impl}/asyncupdate.go (97%) rename processmanager/ebpf/{ => impl}/asyncupdate_integration_test.go (99%) create mode 100644 processmanager/ebpf/impl/ebpf.go rename processmanager/ebpf/{ => impl}/ebpf_integration_test.go (99%) rename processmanager/ebpf/{ => impl}/ebpf_test.go (98%) create mode 100644 processmanager/ebpf/impl/types.go delete mode 100644 processmanager/ebpf/types.go diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index 72a79b6fd..9a29b1cb7 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -4,35 +4,15 @@ package ebpf // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" import ( - "context" - "errors" "fmt" - "math/bits" - "reflect" - "sync" - "unsafe" - - cebpf "github.com/cilium/ebpf" - "github.com/cilium/ebpf/features" - log "github.com/sirupsen/logrus" "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" - sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" - "go.opentelemetry.io/ebpf-profiler/rlimit" + "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" - "golang.org/x/exp/constraints" -) - -const ( - // updatePoolWorkers decides how many background workers we spawn to - // process map-in-map updates. - updatePoolWorkers = 16 - // updatePoolQueueCap decides the work queue capacity of each worker. - updatePoolQueueCap = 8 ) // EbpfHandler provides the functionality to interact with eBPF maps. @@ -44,7 +24,7 @@ type EbpfHandler interface { RemoveReportedPID(pid libpf.PID) // UpdateUnwindInfo writes UnwindInfo to given unwind info array index - UpdateUnwindInfo(index uint16, info sdtypes.UnwindInfo) error + UpdateUnwindInfo(index uint16, info stackdeltatypes.UnwindInfo) error // UpdateExeIDToStackDeltas defines a function that updates the eBPF map exe_id_to_stack_deltas // for host.FileID with the elements of StackDeltaEBPF. It returns the mapID used. @@ -84,101 +64,12 @@ type EbpfHandler interface { SupportsLPMTrieBatchOperations() bool } -type ebpfMapsImpl struct { - // Interpreter related eBPF maps - InterpreterOffsets *cebpf.Map `name:"interpreter_offsets"` - DotnetProcs *cebpf.Map `name:"dotnet_procs"` - PerlProcs *cebpf.Map `name:"perl_procs"` - PyProcs *cebpf.Map `name:"py_procs"` - HotspotProcs *cebpf.Map `name:"hotspot_procs"` - PhpProcs *cebpf.Map `name:"php_procs"` - RubyProcs *cebpf.Map `name:"ruby_procs"` - V8Procs *cebpf.Map `name:"v8_procs"` - ApmIntProcs *cebpf.Map `name:"apm_int_procs"` - GoLabelsProcs *cebpf.Map `name:"go_labels_procs"` - - // Stackdelta and process related eBPF maps - ExeIDToStackDeltaMaps []*cebpf.Map - StackDeltaPageToInfo *cebpf.Map `name:"stack_delta_page_to_info"` - PidPageToMappingInfo *cebpf.Map `name:"pid_page_to_mapping_info"` - UnwindInfoArray *cebpf.Map `name:"unwind_info_array"` - ReportedPIDs *cebpf.Map `name:"reported_pids"` - - errCounterLock sync.Mutex - errCounter map[metrics.MetricID]int64 - - hasGenericBatchOperations bool - hasLPMTrieBatchOperations bool - - updateWorkers *asyncMapUpdaterPool -} - -// Compile time check to make sure ebpfMapsImpl satisfies the interface . -var _ EbpfHandler = &ebpfMapsImpl{} - -// LoadMaps checks if the needed maps for the process manager are available -// and loads their references into a package-internal structure. -// -// It further spawns background workers for deferred map updates; the given -// context can be used to terminate them on shutdown. -func LoadMaps(ctx context.Context, maps map[string]*cebpf.Map) (EbpfHandler, error) { - impl := &ebpfMapsImpl{} - impl.errCounter = make(map[metrics.MetricID]int64) - - implRefVal := reflect.ValueOf(impl).Elem() - implRefType := reflect.TypeOf(impl).Elem() - for i := 0; i < implRefType.NumField(); i++ { - fieldType := implRefType.Field(i) - nameTag, ok := fieldType.Tag.Lookup("name") - if !ok { - continue - } - mapVal, ok := maps[nameTag] - if !ok { - log.Fatalf("Map %v is not available", nameTag) - } - implRefVal.Field(i).Set(reflect.ValueOf(mapVal)) - } - - numBuckets := support.StackDeltaBucketLargest - support.StackDeltaBucketSmallest + 1 - impl.ExeIDToStackDeltaMaps = make([]*cebpf.Map, numBuckets) - for i := support.StackDeltaBucketSmallest; i <= support.StackDeltaBucketLargest; i++ { - deltasMapName := fmt.Sprintf("exe_id_to_%d_stack_deltas", i) - deltasMap, ok := maps[deltasMapName] - if !ok { - log.Fatalf("Map %s is not available", deltasMapName) - } - impl.ExeIDToStackDeltaMaps[i-support.StackDeltaBucketSmallest] = deltasMap - } - - if err := probeBatchOperations(cebpf.Hash); err == nil { - log.Infof("Supports generic eBPF map batch operations") - impl.hasGenericBatchOperations = true - } - - if err := probeBatchOperations(cebpf.LPMTrie); err == nil { - log.Infof("Supports LPM trie eBPF map batch operations") - impl.hasLPMTrieBatchOperations = true - } - - impl.updateWorkers = newAsyncMapUpdaterPool(ctx, updatePoolWorkers, updatePoolQueueCap) - - return impl, nil -} - -// UpdateInterpreterOffsets adds the given moduleRanges to the eBPF map interpreterOffsets. -func (impl *ebpfMapsImpl) UpdateInterpreterOffsets(ebpfProgIndex uint16, fileID host.FileID, - offsetRanges []util.Range) error { - key, value, err := InterpreterOffsetKeyValue(ebpfProgIndex, fileID, offsetRanges) - if err != nil { - return err - } - if err := impl.InterpreterOffsets.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), - cebpf.UpdateAny); err != nil { - log.Fatalf("Failed to place interpreter range in map: %v", err) - } - - return nil +// StackDeltaEBPF represents stack deltas preprocessed by the ProcessManager which are +// then loaded to the eBPF map. This is Go equivalent of 'struct StackDelta' in eBPF types.h. +// See the eBPF header file for details. +type StackDeltaEBPF struct { + AddressLow uint16 + UnwindInfo uint16 } func InterpreterOffsetKeyValue(ebpfProgIndex uint16, fileID host.FileID, @@ -209,561 +100,3 @@ func InterpreterOffsetKeyValue(ebpfProgIndex uint16, fileID host.FileID, } return key, value, nil } - -// getInterpreterTypeMap returns the eBPF map for the given typ -// or an error if typ is not supported. -func (impl *ebpfMapsImpl) getInterpreterTypeMap(typ libpf.InterpreterType) (*cebpf.Map, error) { - switch typ { - case libpf.Dotnet: - return impl.DotnetProcs, nil - case libpf.Perl: - return impl.PerlProcs, nil - case libpf.Python: - return impl.PyProcs, nil - case libpf.HotSpot: - return impl.HotspotProcs, nil - case libpf.PHP: - return impl.PhpProcs, nil - case libpf.Ruby: - return impl.RubyProcs, nil - case libpf.V8: - return impl.V8Procs, nil - case libpf.APMInt: - return impl.ApmIntProcs, nil - case libpf.GoLabels: - return impl.GoLabelsProcs, nil - default: - return nil, fmt.Errorf("type %d is not (yet) supported", typ) - } -} - -// UpdateProcData adds the given PID specific data to the specified interpreter data eBPF map. -func (impl *ebpfMapsImpl) UpdateProcData(typ libpf.InterpreterType, pid libpf.PID, - data unsafe.Pointer) error { - log.Debugf("Loading symbol addresses into eBPF map for PID %d type %d", - pid, typ) - ebpfMap, err := impl.getInterpreterTypeMap(typ) - if err != nil { - return err - } - - pid32 := uint32(pid) - if err := ebpfMap.Update(unsafe.Pointer(&pid32), data, cebpf.UpdateAny); err != nil { - return fmt.Errorf("failed to add %v info: %s", typ, err) - } - return nil -} - -// DeleteProcData removes the given PID specific data of the specified interpreter data eBPF map. -func (impl *ebpfMapsImpl) DeleteProcData(typ libpf.InterpreterType, pid libpf.PID) error { - log.Debugf("Removing symbol addresses from eBPF map for PID %d type %d", - pid, typ) - ebpfMap, err := impl.getInterpreterTypeMap(typ) - if err != nil { - return err - } - - pid32 := uint32(pid) - if err := ebpfMap.Delete(unsafe.Pointer(&pid32)); err != nil { - return fmt.Errorf("failed to remove info: %v", err) - } - return nil -} - -// getPIDPage initializes a PIDPage instance. -func getPIDPage(pid libpf.PID, key uint64, length uint32) support.PIDPage { - // pid_page_to_mapping_info is an LPM trie and expects the pid and page - // to be in big endian format. - return support.PIDPage{ - Pid: bits.ReverseBytes32(uint32(pid)), - Page: bits.ReverseBytes64(key), - PrefixLen: support.BitWidthPID + length, - } -} - -// getPIDPageFromPrefix initializes a PIDPage instance from a PID and lpm.Prefix. -func getPIDPageFromPrefix(pid libpf.PID, prefix lpm.Prefix) support.PIDPage { - return getPIDPage(pid, prefix.Key, prefix.Length) -} - -// UpdatePidInterpreterMapping updates the eBPF map pidPageToMappingInfo with the -// data required to call the correct interpreter unwinder for that memory region. -func (impl *ebpfMapsImpl) UpdatePidInterpreterMapping(pid libpf.PID, prefix lpm.Prefix, - interpreterProgram uint8, fileID host.FileID, bias uint64) error { - cKey := getPIDPageFromPrefix(pid, prefix) - biasAndUnwindProgram, err := support.EncodeBiasAndUnwindProgram(bias, interpreterProgram) - if err != nil { - return err - } - - cValue := support.PIDPageMappingInfo{ - File_id: uint64(fileID), - Bias_and_unwind_program: biasAndUnwindProgram, - } - - return impl.PidPageToMappingInfo.Update(unsafe.Pointer(&cKey), unsafe.Pointer(&cValue), - cebpf.UpdateNoExist) -} - -// DeletePidInterpreterMapping removes the element specified by pid, prefix and a corresponding -// mapping size from the eBPF map pidPageToMappingInfo. It is normally used when an -// interpreter process dies or a region that formerly required interpreter-based unwinding is no -// longer needed. -func (impl *ebpfMapsImpl) DeletePidInterpreterMapping(pid libpf.PID, prefix lpm.Prefix) error { - cKey := getPIDPageFromPrefix(pid, prefix) - return impl.PidPageToMappingInfo.Delete(unsafe.Pointer(&cKey)) -} - -// trackMapError is a wrapper to report issues with changes to eBPF maps. -func (impl *ebpfMapsImpl) trackMapError(id metrics.MetricID, err error) error { - if err != nil { - impl.errCounterLock.Lock() - impl.errCounter[id]++ - impl.errCounterLock.Unlock() - } - return err -} - -// CollectMetrics returns gathered errors for changes to eBPF maps. -func (impl *ebpfMapsImpl) CollectMetrics() []metrics.Metric { - impl.errCounterLock.Lock() - defer impl.errCounterLock.Unlock() - - counts := make([]metrics.Metric, 0, 7) - for id, value := range impl.errCounter { - counts = append(counts, metrics.Metric{ - ID: id, - Value: metrics.MetricValue(value), - }) - // As we don't want to report metrics with zero values on the next call, - // we delete the entries from the map instead of just resetting them. - delete(impl.errCounter, id) - } - - return counts -} - -// poolPIDPage caches reusable heap-allocated PIDPage instances -// to avoid excessive heap allocations. -var poolPIDPage = sync.Pool{ - New: func() any { - return new(support.PIDPage) - }, -} - -// getPIDPagePooled returns a heap-allocated and initialized PIDPage instance. -// After usage, put the instance back into the pool with poolPIDPage.Put(). -func getPIDPagePooled(pid libpf.PID, prefix lpm.Prefix) *support.PIDPage { - cPIDPage := poolPIDPage.Get().(*support.PIDPage) - *cPIDPage = getPIDPageFromPrefix(pid, prefix) - return cPIDPage -} - -// poolPIDPageMappingInfo caches reusable heap-allocated PIDPageMappingInfo instances -// to avoid excessive heap allocations. -var poolPIDPageMappingInfo = sync.Pool{ - New: func() any { - return new(support.PIDPageMappingInfo) - }, -} - -// getPIDPageMappingInfo returns a heap-allocated and initialized PIDPageMappingInfo instance. -// After usage, put the instance back into the pool with poolPIDPageMappingInfo.Put(). -func getPIDPageMappingInfo(fileID, biasAndUnwindProgram uint64) *support.PIDPageMappingInfo { - cInfo := poolPIDPageMappingInfo.Get().(*support.PIDPageMappingInfo) - cInfo.File_id = fileID - cInfo.Bias_and_unwind_program = biasAndUnwindProgram - - return cInfo -} - -// probeBatchOperations tests if the BPF syscall accepts batch operations. It -// returns nil if batch operations are supported for mapType or an error otherwise. -func probeBatchOperations(mapType cebpf.MapType) error { - restoreRlimit, err := rlimit.MaximizeMemlock() - if err != nil { - // In environment like github action runners, we can not adjust rlimit. - // Therefore we just return false here and do not use batch operations. - return fmt.Errorf("failed to adjust rlimit: %w", err) - } - defer restoreRlimit() - - updates := 5 - mapSpec := &cebpf.MapSpec{ - Type: mapType, - KeySize: 8, - ValueSize: 8, - MaxEntries: uint32(updates), - Flags: features.BPF_F_NO_PREALLOC, - } - - var keys any - switch mapType { - case cebpf.Array: - // KeySize for Array maps always needs to be 4. - mapSpec.KeySize = 4 - // Array maps are always preallocated. - mapSpec.Flags = 0 - keys = generateSlice[uint32](updates) - default: - keys = generateSlice[uint64](updates) - } - - probeMap, err := cebpf.NewMap(mapSpec) - if err != nil { - return fmt.Errorf("failed to create %s map for batch probing: %v", - mapType, err) - } - defer probeMap.Close() - - values := generateSlice[uint64](updates) - - n, err := probeMap.BatchUpdate(keys, values, nil) - if err != nil { - // Older kernel do not support batch operations on maps. - // This is just fine and we return here. - return err - } - if n != updates { - return fmt.Errorf("unexpected batch update return: expected %d but got %d", - updates, n) - } - - // Remove the probe entries from the map. - m, err := probeMap.BatchDelete(keys, nil) - if err != nil { - return err - } - if m != updates { - return fmt.Errorf("unexpected batch delete return: expected %d but got %d", - updates, m) - } - return nil -} - -// getMapID returns the mapID number to use for given number of stack deltas. -func getMapID(numDeltas uint32) (uint16, error) { - significantBits := 32 - bits.LeadingZeros32(numDeltas) - if significantBits <= support.StackDeltaBucketSmallest { - return support.StackDeltaBucketSmallest, nil - } - if significantBits > support.StackDeltaBucketLargest { - return 0, fmt.Errorf("no map available for %d stack deltas", numDeltas) - } - return uint16(significantBits), nil -} - -// getOuterMap is a helper function to select the correct outer map for -// storing the stack deltas based on the mapID. -func (impl *ebpfMapsImpl) getOuterMap(mapID uint16) *cebpf.Map { - if mapID < support.StackDeltaBucketSmallest || - mapID > support.StackDeltaBucketLargest { - return nil - } - return impl.ExeIDToStackDeltaMaps[mapID-support.StackDeltaBucketSmallest] -} - -// RemoveReportedPID removes a PID from the reported_pids eBPF map. The kernel component will -// place a PID in this map before it reports it to Go for further processing. -func (impl *ebpfMapsImpl) RemoveReportedPID(pid libpf.PID) { - key := uint32(pid) - _ = impl.ReportedPIDs.Delete(unsafe.Pointer(&key)) -} - -// UpdateUnwindInfo writes UnwindInfo into the unwind info array at the given index -func (impl *ebpfMapsImpl) UpdateUnwindInfo(index uint16, info sdtypes.UnwindInfo) error { - if uint32(index) >= impl.UnwindInfoArray.MaxEntries() { - return fmt.Errorf("unwind info array full (%d/%d items)", - index, impl.UnwindInfoArray.MaxEntries()) - } - - key := uint32(index) - value := support.UnwindInfo{ - Opcode: info.Opcode, - FpOpcode: info.FPOpcode, - MergeOpcode: info.MergeOpcode, - Param: info.Param, - FpParam: info.FPParam, - } - return impl.trackMapError(metrics.IDUnwindInfoArrayUpdate, - impl.UnwindInfoArray.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), - cebpf.UpdateAny)) -} - -// UpdateExeIDToStackDeltas creates a nested map for fileID in the eBPF map exeIDTostack_deltas -// and inserts the elements of the deltas array in this nested map. Returns mapID or error. -func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, deltas []StackDeltaEBPF) ( - uint16, error) { - numDeltas := len(deltas) - mapID, err := getMapID(uint32(numDeltas)) - if err != nil { - return 0, err - } - outerMap := impl.getOuterMap(mapID) - - restoreRlimit, err := rlimit.MaximizeMemlock() - if err != nil { - return 0, fmt.Errorf("failed to increase rlimit: %v", err) - } - defer restoreRlimit() - innerMap, err := cebpf.NewMap(&cebpf.MapSpec{ - Type: cebpf.Array, - KeySize: 4, - ValueSize: support.Sizeof_StackDelta, - MaxEntries: 1 << mapID, - }) - if err != nil { - return 0, fmt.Errorf("failed to create inner map: %v", err) - } - defer func() { - if err = innerMap.Close(); err != nil { - log.Errorf("Failed to close FD of inner map for 0x%x: %v", fileID, err) - } - }() - - // We continue updating the inner map after enqueueing the update to the - // outer map. Both the async update pool and our code below need an open - // file descriptor to work, and we don't know which will complete first. - // We thus clone the FD, transfer ownership of the clone to the update - // pool and continue using our original FD whose lifetime is now no longer - // tied to the FD used in the updater pool. - innerMapCloned, err := innerMap.Clone() - if err != nil { - return 0, fmt.Errorf("failed to clone inner map: %v", err) - } - - impl.updateWorkers.EnqueueUpdate(outerMap, fileID, innerMapCloned) - - if impl.hasGenericBatchOperations { - innerKeys := make([]uint32, numDeltas) - stackDeltas := make([]support.StackDelta, numDeltas) - - // Prepare values for batch update. - for index, delta := range deltas { - innerKeys[index] = uint32(index) - stackDeltas[index].AddrLow = delta.AddressLow - stackDeltas[index].UnwindInfo = delta.UnwindInfo - } - - _, err := innerMap.BatchUpdate( - ptrCastMarshaler[uint32](innerKeys), - ptrCastMarshaler[support.StackDelta](stackDeltas), - &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateAny)}) - if err != nil { - return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasBatchUpdate, - fmt.Errorf("failed to batch insert %d elements for 0x%x "+ - "into exeIDTostack_deltas: %v", - numDeltas, fileID, err)) - } - return mapID, nil - } - - innerKey := uint32(0) - stackDelta := support.StackDelta{} - for index, delta := range deltas { - stackDelta.AddrLow = delta.AddressLow - stackDelta.UnwindInfo = delta.UnwindInfo - innerKey = uint32(index) - if err := innerMap.Update(unsafe.Pointer(&innerKey), unsafe.Pointer(&stackDelta), - cebpf.UpdateAny); err != nil { - return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasUpdate, fmt.Errorf( - "failed to insert element %d for 0x%x into exeIDTostack_deltas: %v", - index, fileID, err)) - } - } - - return mapID, nil -} - -// DeleteExeIDToStackDeltas removes all eBPF stack delta entries for given fileID and mapID number. -func (impl *ebpfMapsImpl) DeleteExeIDToStackDeltas(fileID host.FileID, mapID uint16) error { - outerMap := impl.getOuterMap(mapID) - if outerMap == nil { - return fmt.Errorf("invalid mapID %d", mapID) - } - - // Deleting the entry from the outer maps deletes also the entries of the inner - // map associated with this outer key. - impl.updateWorkers.EnqueueUpdate(outerMap, fileID, nil) - - return nil -} - -// UpdateStackDeltaPages adds fileID/page with given information to eBPF map. If the entry exists, -// it will return an error. Otherwise the key/value pairs will be appended to the hash. -func (impl *ebpfMapsImpl) UpdateStackDeltaPages(fileID host.FileID, numDeltasPerPage []uint16, - mapID uint16, firstPageAddr uint64) error { - firstDelta := uint32(0) - keys := make([]support.StackDeltaPageKey, len(numDeltasPerPage)) - values := make([]support.StackDeltaPageInfo, len(numDeltasPerPage)) - - // Prepare the key/value combinations that will be loaded. - for pageNumber, numDeltas := range numDeltasPerPage { - pageAddr := firstPageAddr + uint64(pageNumber)< support.StackDeltaBucketLargest { + return 0, fmt.Errorf("no map available for %d stack deltas", numDeltas) + } + return uint16(significantBits), nil +} + +// getOuterMap is a helper function to select the correct outer map for +// storing the stack deltas based on the mapID. +func (impl *ebpfMapsImpl) getOuterMap(mapID uint16) *cebpf.Map { + if mapID < support.StackDeltaBucketSmallest || + mapID > support.StackDeltaBucketLargest { + return nil + } + return impl.ExeIDToStackDeltaMaps[mapID-support.StackDeltaBucketSmallest] +} + +// RemoveReportedPID removes a PID from the reported_pids eBPF map. The kernel component will +// place a PID in this map before it reports it to Go for further processing. +func (impl *ebpfMapsImpl) RemoveReportedPID(pid libpf.PID) { + key := uint32(pid) + _ = impl.ReportedPIDs.Delete(unsafe.Pointer(&key)) +} + +// UpdateUnwindInfo writes UnwindInfo into the unwind info array at the given index +func (impl *ebpfMapsImpl) UpdateUnwindInfo(index uint16, info sdtypes.UnwindInfo) error { + if uint32(index) >= impl.UnwindInfoArray.MaxEntries() { + return fmt.Errorf("unwind info array full (%d/%d items)", + index, impl.UnwindInfoArray.MaxEntries()) + } + + key := uint32(index) + value := support.UnwindInfo{ + Opcode: info.Opcode, + FpOpcode: info.FPOpcode, + MergeOpcode: info.MergeOpcode, + Param: info.Param, + FpParam: info.FPParam, + } + return impl.trackMapError(metrics.IDUnwindInfoArrayUpdate, + impl.UnwindInfoArray.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), + cebpf.UpdateAny)) +} + +// UpdateExeIDToStackDeltas creates a nested map for fileID in the eBPF map exeIDTostack_deltas +// and inserts the elements of the deltas array in this nested map. Returns mapID or error. +func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, deltas []ebpf.StackDeltaEBPF) ( + uint16, error) { + numDeltas := len(deltas) + mapID, err := getMapID(uint32(numDeltas)) + if err != nil { + return 0, err + } + outerMap := impl.getOuterMap(mapID) + + restoreRlimit, err := rlimit.MaximizeMemlock() + if err != nil { + return 0, fmt.Errorf("failed to increase rlimit: %v", err) + } + defer restoreRlimit() + innerMap, err := cebpf.NewMap(&cebpf.MapSpec{ + Type: cebpf.Array, + KeySize: 4, + ValueSize: support.Sizeof_StackDelta, + MaxEntries: 1 << mapID, + }) + if err != nil { + return 0, fmt.Errorf("failed to create inner map: %v", err) + } + defer func() { + if err = innerMap.Close(); err != nil { + log.Errorf("Failed to close FD of inner map for 0x%x: %v", fileID, err) + } + }() + + // We continue updating the inner map after enqueueing the update to the + // outer map. Both the async update pool and our code below need an open + // file descriptor to work, and we don't know which will complete first. + // We thus clone the FD, transfer ownership of the clone to the update + // pool and continue using our original FD whose lifetime is now no longer + // tied to the FD used in the updater pool. + innerMapCloned, err := innerMap.Clone() + if err != nil { + return 0, fmt.Errorf("failed to clone inner map: %v", err) + } + + impl.updateWorkers.EnqueueUpdate(outerMap, fileID, innerMapCloned) + + if impl.hasGenericBatchOperations { + innerKeys := make([]uint32, numDeltas) + stackDeltas := make([]support.StackDelta, numDeltas) + + // Prepare values for batch update. + for index, delta := range deltas { + innerKeys[index] = uint32(index) + stackDeltas[index].AddrLow = delta.AddressLow + stackDeltas[index].UnwindInfo = delta.UnwindInfo + } + + _, err := innerMap.BatchUpdate( + ptrCastMarshaler[uint32](innerKeys), + ptrCastMarshaler[support.StackDelta](stackDeltas), + &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateAny)}) + if err != nil { + return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasBatchUpdate, + fmt.Errorf("failed to batch insert %d elements for 0x%x "+ + "into exeIDTostack_deltas: %v", + numDeltas, fileID, err)) + } + return mapID, nil + } + + innerKey := uint32(0) + stackDelta := support.StackDelta{} + for index, delta := range deltas { + stackDelta.AddrLow = delta.AddressLow + stackDelta.UnwindInfo = delta.UnwindInfo + innerKey = uint32(index) + if err := innerMap.Update(unsafe.Pointer(&innerKey), unsafe.Pointer(&stackDelta), + cebpf.UpdateAny); err != nil { + return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasUpdate, fmt.Errorf( + "failed to insert element %d for 0x%x into exeIDTostack_deltas: %v", + index, fileID, err)) + } + } + + return mapID, nil +} + +// DeleteExeIDToStackDeltas removes all eBPF stack delta entries for given fileID and mapID number. +func (impl *ebpfMapsImpl) DeleteExeIDToStackDeltas(fileID host.FileID, mapID uint16) error { + outerMap := impl.getOuterMap(mapID) + if outerMap == nil { + return fmt.Errorf("invalid mapID %d", mapID) + } + + // Deleting the entry from the outer maps deletes also the entries of the inner + // map associated with this outer key. + impl.updateWorkers.EnqueueUpdate(outerMap, fileID, nil) + + return nil +} + +// UpdateStackDeltaPages adds fileID/page with given information to eBPF map. If the entry exists, +// it will return an error. Otherwise the key/value pairs will be appended to the hash. +func (impl *ebpfMapsImpl) UpdateStackDeltaPages(fileID host.FileID, numDeltasPerPage []uint16, + mapID uint16, firstPageAddr uint64) error { + firstDelta := uint32(0) + keys := make([]support.StackDeltaPageKey, len(numDeltasPerPage)) + values := make([]support.StackDeltaPageInfo, len(numDeltasPerPage)) + + // Prepare the key/value combinations that will be loaded. + for pageNumber, numDeltas := range numDeltasPerPage { + pageAddr := firstPageAddr + uint64(pageNumber)<0. So to fake this behavior, we return // parts of the fileID. diff --git a/processmanager/types.go b/processmanager/types.go index 9807aaae8..af77e7609 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -4,6 +4,7 @@ package processmanager // import "go.opentelemetry.io/ebpf-profiler/processmanager" import ( + ebpf2 "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "sync" "sync/atomic" @@ -14,7 +15,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/metrics" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" @@ -57,7 +57,7 @@ type ProcessManager struct { exitEvents map[libpf.PID]times.KTime // ebpf contains the interface to manipulate ebpf maps - ebpf pmebpf.EbpfHandler + ebpf ebpf2.EbpfHandler // FileIDMapper provides a cache that implements the FileIDMapper interface. The tracer writes // the 64-bit to 128-bit file ID mapping to the cache, as this is where the two values are diff --git a/tools/coredump/ebpfmaps.go b/tools/coredump/ebpfmaps.go index 412864b4f..4c644e4e4 100644 --- a/tools/coredump/ebpfmaps.go +++ b/tools/coredump/ebpfmaps.go @@ -5,6 +5,7 @@ package main import ( "fmt" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "math/bits" "unsafe" @@ -14,7 +15,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" ) @@ -44,7 +44,7 @@ func (emc *ebpfMapsCoredump) CollectMetrics() []metrics.Metric { func (emc *ebpfMapsCoredump) UpdateInterpreterOffsets(ebpfProgIndex uint16, fileID host.FileID, offsetRanges []util.Range) error { - key, value, err := pmebpf.InterpreterOffsetKeyValue(ebpfProgIndex, fileID, offsetRanges) + key, value, err := ebpf.InterpreterOffsetKeyValue(ebpfProgIndex, fileID, offsetRanges) if err != nil { return err } @@ -166,7 +166,7 @@ func (emc *ebpfMapsCoredump) UpdateUnwindInfo(index uint16, info sdtypes.UnwindI // Stack delta management func (emc *ebpfMapsCoredump) UpdateExeIDToStackDeltas(fileID host.FileID, - deltaArrays []pmebpf.StackDeltaEBPF) (uint16, error) { + deltaArrays []ebpf.StackDeltaEBPF) (uint16, error) { entSize := C.sizeof_StackDelta deltas := C.malloc(C.size_t(len(deltaArrays) * entSize)) for index, delta := range deltaArrays { diff --git a/tracer/tracer.go b/tracer/tracer.go index fcca906b6..44359f575 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -10,6 +10,7 @@ import ( "context" "errors" "fmt" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf/impl" "math" "math/rand/v2" "strings" @@ -32,7 +33,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind/elfunwindinfo" "go.opentelemetry.io/ebpf-profiler/periodiccaller" pm "go.opentelemetry.io/ebpf-profiler/processmanager" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" From f215f8da4965affc4533fe7f4752e6cc6ca0a07e Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 17:02:40 +0700 Subject: [PATCH 04/12] revert some changes --- processmanager/ebpf/ebpf.go | 8 -------- processmanager/ebpf/impl/types.go | 4 ---- processmanager/ebpf/types.go | 12 ++++++++++++ 3 files changed, 12 insertions(+), 12 deletions(-) delete mode 100644 processmanager/ebpf/impl/types.go create mode 100644 processmanager/ebpf/types.go diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index 9a29b1cb7..aa13ed1c4 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -64,14 +64,6 @@ type EbpfHandler interface { SupportsLPMTrieBatchOperations() bool } -// StackDeltaEBPF represents stack deltas preprocessed by the ProcessManager which are -// then loaded to the eBPF map. This is Go equivalent of 'struct StackDelta' in eBPF types.h. -// See the eBPF header file for details. -type StackDeltaEBPF struct { - AddressLow uint16 - UnwindInfo uint16 -} - func InterpreterOffsetKeyValue(ebpfProgIndex uint16, fileID host.FileID, offsetRanges []util.Range) (key uint64, value support.OffsetRange, err error) { rLen := len(offsetRanges) diff --git a/processmanager/ebpf/impl/types.go b/processmanager/ebpf/impl/types.go deleted file mode 100644 index a995a5cb8..000000000 --- a/processmanager/ebpf/impl/types.go +++ /dev/null @@ -1,4 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package impl // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf/impl" diff --git a/processmanager/ebpf/types.go b/processmanager/ebpf/types.go new file mode 100644 index 000000000..bd5de07d0 --- /dev/null +++ b/processmanager/ebpf/types.go @@ -0,0 +1,12 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ebpf // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + +// StackDeltaEBPF represents stack deltas preprocessed by the ProcessManager which are +// then loaded to the eBPF map. This is Go equivalent of 'struct StackDelta' in eBPF types.h. +// See the eBPF header file for details. +type StackDeltaEBPF struct { + AddressLow uint16 + UnwindInfo uint16 +} From d67372a612b0391ad27891aa51ced76c4f077eb0 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 17:08:45 +0700 Subject: [PATCH 05/12] lint --- processmanager/ebpf/ebpf.go | 1 + processmanager/ebpf/impl/ebpf.go | 5 +++-- processmanager/execinfomanager/manager.go | 6 +++--- processmanager/manager.go | 5 ++--- processmanager/manager_test.go | 8 ++++---- processmanager/types.go | 4 ++-- support/ebpf/errors.h | 3 +-- tools/coredump/ebpfmaps.go | 2 +- tracer/tracer.go | 4 ++-- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index aa13ed1c4..db3b4302b 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -5,6 +5,7 @@ package ebpf // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" import ( "fmt" + "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" diff --git a/processmanager/ebpf/impl/ebpf.go b/processmanager/ebpf/impl/ebpf.go index 4dd6fcaac..925e229d6 100644 --- a/processmanager/ebpf/impl/ebpf.go +++ b/processmanager/ebpf/impl/ebpf.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "math/bits" "reflect" "sync" @@ -21,6 +20,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" @@ -414,7 +414,8 @@ func (impl *ebpfMapsImpl) UpdateUnwindInfo(index uint16, info sdtypes.UnwindInfo // UpdateExeIDToStackDeltas creates a nested map for fileID in the eBPF map exeIDTostack_deltas // and inserts the elements of the deltas array in this nested map. Returns mapID or error. -func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, deltas []ebpf.StackDeltaEBPF) ( +func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, + deltas []ebpf.StackDeltaEBPF) ( uint16, error) { numDeltas := len(deltas) mapID, err := getMapID(uint32(numDeltas)) diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 6793e724a..8bd2b32cc 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -6,13 +6,10 @@ package execinfomanager // import "go.opentelemetry.io/ebpf-profiler/processmana import ( "errors" "fmt" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "os" "time" log "github.com/sirupsen/logrus" - "go.opentelemetry.io/ebpf-profiler/libpf" - "go.opentelemetry.io/ebpf-profiler/tracer/types" lru "github.com/elastic/go-freelru" @@ -28,13 +25,16 @@ import ( "go.opentelemetry.io/ebpf-profiler/interpreter/php" "go.opentelemetry.io/ebpf-profiler/interpreter/python" "go.opentelemetry.io/ebpf-profiler/interpreter/ruby" + "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/libpf/xsync" "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/tpbase" + "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/processmanager/manager.go b/processmanager/manager.go index e167a98a1..a1a61d3b8 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -8,14 +8,11 @@ import ( "context" "errors" "fmt" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "time" lru "github.com/elastic/go-freelru" log "github.com/sirupsen/logrus" - "go.opentelemetry.io/ebpf-profiler/tracer/types" - "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/interpreter/apmint" @@ -25,9 +22,11 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/periodiccaller" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" + "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/traceutil" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/processmanager/manager_test.go b/processmanager/manager_test.go index 341ad8384..a2c93435d 100644 --- a/processmanager/manager_test.go +++ b/processmanager/manager_test.go @@ -9,13 +9,15 @@ import ( "context" "errors" "fmt" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "math/rand/v2" "os" "testing" "time" "unsafe" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" @@ -25,15 +27,13 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/process" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/support" tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/traceutil" "go.opentelemetry.io/ebpf-profiler/util" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) // dummyProcess implements pfelf.Process for testing purposes diff --git a/processmanager/types.go b/processmanager/types.go index af77e7609..d5e908c86 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -4,7 +4,6 @@ package processmanager // import "go.opentelemetry.io/ebpf-profiler/processmanager" import ( - ebpf2 "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "sync" "sync/atomic" @@ -15,6 +14,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/metrics" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" @@ -57,7 +57,7 @@ type ProcessManager struct { exitEvents map[libpf.PID]times.KTime // ebpf contains the interface to manipulate ebpf maps - ebpf ebpf2.EbpfHandler + ebpf ebpf.EbpfHandler // FileIDMapper provides a cache that implements the FileIDMapper interface. The tracer writes // the 64-bit to 128-bit file ID mapping to the cache, as this is where the two values are diff --git a/support/ebpf/errors.h b/support/ebpf/errors.h index 16837d2fd..4dd60ba8e 100644 --- a/support/ebpf/errors.h +++ b/support/ebpf/errors.h @@ -121,8 +121,7 @@ typedef enum ErrorCode { // Native: Unable to read the IRQ stack link ERR_NATIVE_CHASE_IRQ_STACK_LINK = 4010, - // Native: Unexpectedly encountered a kernel mode pointer while attempting to unwind user-mode - // stack + // Native: Unexpectedly encountered a kernel mode pointer while attempting to unwind user-mode stack ERR_NATIVE_UNEXPECTED_KERNEL_ADDRESS = 4011, // Native: Unable to locate the PID page mapping for the current instruction pointer diff --git a/tools/coredump/ebpfmaps.go b/tools/coredump/ebpfmaps.go index 4c644e4e4..5b9c372c6 100644 --- a/tools/coredump/ebpfmaps.go +++ b/tools/coredump/ebpfmaps.go @@ -5,7 +5,6 @@ package main import ( "fmt" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "math/bits" "unsafe" @@ -15,6 +14,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/tracer/tracer.go b/tracer/tracer.go index 44359f575..2e4afe3f7 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -10,7 +10,6 @@ import ( "context" "errors" "fmt" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf/impl" "math" "math/rand/v2" "strings" @@ -24,7 +23,7 @@ import ( "github.com/elastic/go-perf" log "github.com/sirupsen/logrus" "github.com/zeebo/xxh3" - + "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/kallsyms" "go.opentelemetry.io/ebpf-profiler/libpf" @@ -33,6 +32,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind/elfunwindinfo" "go.opentelemetry.io/ebpf-profiler/periodiccaller" pm "go.opentelemetry.io/ebpf-profiler/processmanager" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf/impl" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" From 9b9d13545a8f3855fbb499b16445d9cc2aad178b Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 17:12:19 +0700 Subject: [PATCH 06/12] revert some changes --- processmanager/execinfomanager/manager.go | 10 +++++----- processmanager/manager.go | 4 ++-- processmanager/manager_test.go | 6 +++--- tools/coredump/ebpfmaps.go | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 8bd2b32cc..45c467e87 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -31,7 +31,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/tpbase" "go.opentelemetry.io/ebpf-profiler/tracer/types" @@ -100,7 +100,7 @@ type ExecutableInfoManager struct { // NewExecutableInfoManager creates a new instance of the executable info manager. func NewExecutableInfoManager( sdp nativeunwind.StackDeltaProvider, - ebpf ebpf.EbpfHandler, + ebpf pmebpf.EbpfHandler, includeTracers types.IncludedTracers, ) (*ExecutableInfoManager, error) { // Initialize interpreter loaders. @@ -322,7 +322,7 @@ type executableInfoManagerState struct { interpreterLoaders []interpreter.Loader // ebpf provides the interface to manipulate eBPF maps. - ebpf ebpf.EbpfHandler + ebpf pmebpf.EbpfHandler // executables is the primary mapping from file ID to executable information. Entries are // managed with reference counting and are synchronized with various eBPF maps: @@ -392,7 +392,7 @@ func (state *executableInfoManagerState) loadDeltas( // Index the unwind-info. var unwindInfo sdtypes.UnwindInfo - ebpfDeltas := make([]ebpf.StackDeltaEBPF, 0, numDeltas) + ebpfDeltas := make([]pmebpf.StackDeltaEBPF, 0, numDeltas) for index, delta := range deltas { if unwindInfo.MergeOpcode != 0 { // This delta was merged in the previous iteration. @@ -420,7 +420,7 @@ func (state *executableInfoManagerState) loadDeltas( if err != nil { return mapRef{}, nil, err } - ebpfDeltas = append(ebpfDeltas, ebpf.StackDeltaEBPF{ + ebpfDeltas = append(ebpfDeltas, pmebpf.StackDeltaEBPF{ AddressLow: uint16(delta.Address), UnwindInfo: unwindInfoIndex, }) diff --git a/processmanager/manager.go b/processmanager/manager.go index a1a61d3b8..bcfbe0154 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -22,7 +22,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/periodiccaller" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" @@ -66,7 +66,7 @@ var ( // fileIDMapper and symbolReporter. Specify nil for fileIDMapper to use the default // implementation. func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInterval time.Duration, - ebpf ebpf.EbpfHandler, fileIDMapper FileIDMapper, symbolReporter reporter.SymbolReporter, + ebpf pmebpf.EbpfHandler, fileIDMapper FileIDMapper, symbolReporter reporter.SymbolReporter, sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool, includeEnvVars libpf.Set[string]) (*ProcessManager, error) { if fileIDMapper == nil { diff --git a/processmanager/manager_test.go b/processmanager/manager_test.go index a2c93435d..70c674f67 100644 --- a/processmanager/manager_test.go +++ b/processmanager/manager_test.go @@ -27,7 +27,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/process" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/support" @@ -155,7 +155,7 @@ type mappingArgs struct { type ebpfMapsMockup struct { updateProcCount, deleteProcCount uint8 - stackDeltaMemory []ebpf.StackDeltaEBPF + stackDeltaMemory []pmebpf.StackDeltaEBPF // deleteStackDeltaRangesCount reflects the number of times // the deleteStackDeltaRanges to update the eBPF map was called. deleteStackDeltaRangesCount uint8 @@ -202,7 +202,7 @@ func (mockup *ebpfMapsMockup) DeletePidInterpreterMapping(libpf.PID, lpm.Prefix) func (mockup *ebpfMapsMockup) UpdateUnwindInfo(uint16, sdtypes.UnwindInfo) error { return nil } func (mockup *ebpfMapsMockup) UpdateExeIDToStackDeltas(fileID host.FileID, - deltaArrays []ebpf.StackDeltaEBPF) (uint16, error) { + deltaArrays []pmebpf.StackDeltaEBPF) (uint16, error) { mockup.stackDeltaMemory = append(mockup.stackDeltaMemory, deltaArrays...) // execinfomanager expects a mapID >0. So to fake this behavior, we return // parts of the fileID. diff --git a/tools/coredump/ebpfmaps.go b/tools/coredump/ebpfmaps.go index 5b9c372c6..412864b4f 100644 --- a/tools/coredump/ebpfmaps.go +++ b/tools/coredump/ebpfmaps.go @@ -14,7 +14,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" ) @@ -44,7 +44,7 @@ func (emc *ebpfMapsCoredump) CollectMetrics() []metrics.Metric { func (emc *ebpfMapsCoredump) UpdateInterpreterOffsets(ebpfProgIndex uint16, fileID host.FileID, offsetRanges []util.Range) error { - key, value, err := ebpf.InterpreterOffsetKeyValue(ebpfProgIndex, fileID, offsetRanges) + key, value, err := pmebpf.InterpreterOffsetKeyValue(ebpfProgIndex, fileID, offsetRanges) if err != nil { return err } @@ -166,7 +166,7 @@ func (emc *ebpfMapsCoredump) UpdateUnwindInfo(index uint16, info sdtypes.UnwindI // Stack delta management func (emc *ebpfMapsCoredump) UpdateExeIDToStackDeltas(fileID host.FileID, - deltaArrays []ebpf.StackDeltaEBPF) (uint16, error) { + deltaArrays []pmebpf.StackDeltaEBPF) (uint16, error) { entSize := C.sizeof_StackDelta deltas := C.malloc(C.size_t(len(deltaArrays) * entSize)) for index, delta := range deltaArrays { From 30e6ea96c95cef4d0d8f252d73d6ff9b6ee03a39 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 17:17:23 +0700 Subject: [PATCH 07/12] revert some changes --- process/debug_other.go | 3 ++- processmanager/execinfomanager/manager.go | 4 ++-- processmanager/manager.go | 3 ++- processmanager/manager_test.go | 6 +++--- processmanager/types.go | 4 ++-- support/ebpf/errors.h | 3 ++- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/process/debug_other.go b/process/debug_other.go index 81e6586fb..8234b6041 100644 --- a/process/debug_other.go +++ b/process/debug_other.go @@ -7,8 +7,9 @@ package process // import "go.opentelemetry.io/ebpf-profiler/process" import ( "fmt" - "go.opentelemetry.io/ebpf-profiler/libpf" "runtime" + + "go.opentelemetry.io/ebpf-profiler/libpf" ) // NewPtrace is the stub implementation, allowing to compile the process diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 45c467e87..bee2fee7e 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -10,6 +10,8 @@ import ( "time" log "github.com/sirupsen/logrus" + "go.opentelemetry.io/ebpf-profiler/libpf" + "go.opentelemetry.io/ebpf-profiler/tracer/types" lru "github.com/elastic/go-freelru" @@ -25,7 +27,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/interpreter/php" "go.opentelemetry.io/ebpf-profiler/interpreter/python" "go.opentelemetry.io/ebpf-profiler/interpreter/ruby" - "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/libpf/xsync" "go.opentelemetry.io/ebpf-profiler/metrics" @@ -34,7 +35,6 @@ import ( pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/tpbase" - "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/processmanager/manager.go b/processmanager/manager.go index bcfbe0154..fc0fbe610 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -13,6 +13,8 @@ import ( lru "github.com/elastic/go-freelru" log "github.com/sirupsen/logrus" + "go.opentelemetry.io/ebpf-profiler/tracer/types" + "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/interpreter/apmint" @@ -26,7 +28,6 @@ import ( eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" - "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/traceutil" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/processmanager/manager_test.go b/processmanager/manager_test.go index 70c674f67..50a85e418 100644 --- a/processmanager/manager_test.go +++ b/processmanager/manager_test.go @@ -15,9 +15,6 @@ import ( "time" "unsafe" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" @@ -34,6 +31,9 @@ import ( tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/traceutil" "go.opentelemetry.io/ebpf-profiler/util" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // dummyProcess implements pfelf.Process for testing purposes diff --git a/processmanager/types.go b/processmanager/types.go index d5e908c86..9807aaae8 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -14,7 +14,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/metrics" - "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" @@ -57,7 +57,7 @@ type ProcessManager struct { exitEvents map[libpf.PID]times.KTime // ebpf contains the interface to manipulate ebpf maps - ebpf ebpf.EbpfHandler + ebpf pmebpf.EbpfHandler // FileIDMapper provides a cache that implements the FileIDMapper interface. The tracer writes // the 64-bit to 128-bit file ID mapping to the cache, as this is where the two values are diff --git a/support/ebpf/errors.h b/support/ebpf/errors.h index 4dd60ba8e..16837d2fd 100644 --- a/support/ebpf/errors.h +++ b/support/ebpf/errors.h @@ -121,7 +121,8 @@ typedef enum ErrorCode { // Native: Unable to read the IRQ stack link ERR_NATIVE_CHASE_IRQ_STACK_LINK = 4010, - // Native: Unexpectedly encountered a kernel mode pointer while attempting to unwind user-mode stack + // Native: Unexpectedly encountered a kernel mode pointer while attempting to unwind user-mode + // stack ERR_NATIVE_UNEXPECTED_KERNEL_ADDRESS = 4011, // Native: Unable to locate the PID page mapping for the current instruction pointer From a34798f78534cbc6000de27d54cbc0b77958d5a9 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 17:36:11 +0700 Subject: [PATCH 08/12] add macos job --- .../workflows/unit-test-on-pull-request.yml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index a48f6e6b4..f06fa631d 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -137,6 +137,28 @@ jobs: name: integration-test-binaries-${{ matrix.target_arch }} path: support/*.test + coredump-test-macos: + name: Coredump tests (macOS) + runs-on: macos-latest + steps: + - name: Clone code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Set up Go + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0 + with: + go-version-file: go.mod + cache-dependency-path: go.sum + - name: Cache coredump modules + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: tools/coredump/modulecache + key: coredumps-macos-${{ hashFiles('tools/coredump/testdata/*/*.json') }} + restore-keys: | + coredumps-macos + coredumps- + - name: Run coredump tests + run: GODEBUG=asyncpreemptoff=1 go test -v ./tools/coredump/ + integration-tests: name: Integration tests (v${{ matrix.kernel }} ${{ matrix.target_arch }}) runs-on: ubuntu-24.04 From 47b34733c522fd6c14f2b5b10796b6dec344b012 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 25 Jul 2025 17:41:53 +0700 Subject: [PATCH 09/12] fmt --- tracer/tracer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracer/tracer.go b/tracer/tracer.go index 2e4afe3f7..7dbd0bc03 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -23,7 +23,7 @@ import ( "github.com/elastic/go-perf" log "github.com/sirupsen/logrus" "github.com/zeebo/xxh3" - + "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/kallsyms" "go.opentelemetry.io/ebpf-profiler/libpf" From f500d40bc20cb07a4542569ca95962a63f633fd2 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Tue, 29 Jul 2025 10:20:21 +0700 Subject: [PATCH 10/12] ebpfapi - the other way around --- processmanager/ebpf/{impl => }/asyncupdate.go | 3 +- .../asyncupdate_integration_test.go | 2 +- processmanager/ebpf/ebpf.go | 754 ++++++++++++++++-- .../ebpf/{impl => }/ebpf_integration_test.go | 2 +- processmanager/ebpf/{impl => }/ebpf_test.go | 3 +- processmanager/ebpf/impl/ebpf.go | 692 ---------------- processmanager/ebpfapi/ebpf.go | 95 +++ processmanager/{ebpf => ebpfapi}/types.go | 2 +- processmanager/execinfomanager/manager.go | 9 +- processmanager/manager.go | 5 +- processmanager/manager_test.go | 2 +- processmanager/types.go | 2 +- tools/coredump/ebpfmaps.go | 2 +- tracer/tracer.go | 2 +- 14 files changed, 788 insertions(+), 787 deletions(-) rename processmanager/ebpf/{impl => }/asyncupdate.go (97%) rename processmanager/ebpf/{impl => }/asyncupdate_integration_test.go (99%) rename processmanager/ebpf/{impl => }/ebpf_integration_test.go (99%) rename processmanager/ebpf/{impl => }/ebpf_test.go (98%) delete mode 100644 processmanager/ebpf/impl/ebpf.go create mode 100644 processmanager/ebpfapi/ebpf.go rename processmanager/{ebpf => ebpfapi}/types.go (81%) diff --git a/processmanager/ebpf/impl/asyncupdate.go b/processmanager/ebpf/asyncupdate.go similarity index 97% rename from processmanager/ebpf/impl/asyncupdate.go rename to processmanager/ebpf/asyncupdate.go index fa63d66ce..944200f5b 100644 --- a/processmanager/ebpf/impl/asyncupdate.go +++ b/processmanager/ebpf/asyncupdate.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package impl // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf/impl" +package ebpf // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" import ( "context" @@ -10,6 +10,7 @@ import ( cebpf "github.com/cilium/ebpf" log "github.com/sirupsen/logrus" + "go.opentelemetry.io/ebpf-profiler/host" ) diff --git a/processmanager/ebpf/impl/asyncupdate_integration_test.go b/processmanager/ebpf/asyncupdate_integration_test.go similarity index 99% rename from processmanager/ebpf/impl/asyncupdate_integration_test.go rename to processmanager/ebpf/asyncupdate_integration_test.go index 5bc4438fa..9c469d288 100644 --- a/processmanager/ebpf/impl/asyncupdate_integration_test.go +++ b/processmanager/ebpf/asyncupdate_integration_test.go @@ -3,7 +3,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package impl +package ebpf import ( "context" diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index db3b4302b..047242753 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -4,92 +4,690 @@ package ebpf // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" import ( + "context" + "errors" "fmt" + "math/bits" + "reflect" + "sync" + "unsafe" + + cebpf "github.com/cilium/ebpf" + "github.com/cilium/ebpf/features" + log "github.com/sirupsen/logrus" + "golang.org/x/exp/constraints" "go.opentelemetry.io/ebpf-profiler/host" - "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" - "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" + sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" + "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" + "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" ) -// EbpfHandler provides the functionality to interact with eBPF maps. -type EbpfHandler interface { - // Embed interpreter.EbpfHandler as subset of this interface. - interpreter.EbpfHandler - - // RemoveReportedPID removes a PID from the reported_pids eBPF map. - RemoveReportedPID(pid libpf.PID) - - // UpdateUnwindInfo writes UnwindInfo to given unwind info array index - UpdateUnwindInfo(index uint16, info stackdeltatypes.UnwindInfo) error - - // UpdateExeIDToStackDeltas defines a function that updates the eBPF map exe_id_to_stack_deltas - // for host.FileID with the elements of StackDeltaEBPF. It returns the mapID used. - UpdateExeIDToStackDeltas(fileID host.FileID, deltas []StackDeltaEBPF) (uint16, error) - - // DeleteExeIDToStackDeltas defines a function that removes the entries from the outer eBPF - // map exe_id_to_stack_deltas and its associated inner map entries. - DeleteExeIDToStackDeltas(fileID host.FileID, mapID uint16) error - - // UpdateStackDeltaPages defines a function that updates the mapping in a eBPF map from - // a FileID and page to its stack delta lookup information. - UpdateStackDeltaPages(fileID host.FileID, numDeltasPerPage []uint16, - mapID uint16, firstPageAddr uint64) error - - // DeleteStackDeltaPage defines a function that removes the element specified by fileID and page - // from the eBPF map. - DeleteStackDeltaPage(fileID host.FileID, page uint64) error - - // UpdatePidPageMappingInfo defines a function that updates the eBPF map - // pid_page_to_mapping_info with the given pidAndPage and fileIDAndOffset encoded values - // as key/value pair. - UpdatePidPageMappingInfo(pid libpf.PID, prefix lpm.Prefix, fileID, bias uint64) error - - // DeletePidPageMappingInfo removes the elements specified by prefixes from eBPF map - // pid_page_to_mapping_info and returns the number of elements removed. - DeletePidPageMappingInfo(pid libpf.PID, prefixes []lpm.Prefix) (int, error) - - // CollectMetrics returns gathered errors for changes to eBPF maps. - CollectMetrics() []metrics.Metric - - // SupportsGenericBatchOperations returns true if the kernel supports eBPF batch operations - // on hash and array maps. - SupportsGenericBatchOperations() bool - - // SupportsLPMTrieBatchOperations returns true if the kernel supports eBPF batch operations - // on LPM trie maps. - SupportsLPMTrieBatchOperations() bool -} - -func InterpreterOffsetKeyValue(ebpfProgIndex uint16, fileID host.FileID, - offsetRanges []util.Range) (key uint64, value support.OffsetRange, err error) { - rLen := len(offsetRanges) - if rLen < 1 || rLen > 2 { - return 0, support.OffsetRange{}, fmt.Errorf("invalid ranges %v", offsetRanges) - } - // The keys of this map are executable-id-and-offset-into-text entries, and - // the offset_range associated with them gives the precise area in that page - // where the main interpreter loop is located. This is required to unwind - // nicely from native code into interpreted code. - key = uint64(fileID) - first := offsetRanges[0] - value = support.OffsetRange{ - Lower_offset1: first.Start, - Upper_offset1: first.End, - Program_index: ebpfProgIndex, - } - if len(offsetRanges) == 2 { - // Fields {lower,upper}_offset2 may be used to specify an optional second range - // of an interpreter function. This may be useful if the interpreter function - // consists of two non-contiguous memory ranges, which may happen due to Hot/Cold - // split compiler optimization - second := offsetRanges[1] - value.Lower_offset2 = second.Start - value.Upper_offset2 = second.End - } - return key, value, nil +const ( + // updatePoolWorkers decides how many background workers we spawn to + // process map-in-map updates. + updatePoolWorkers = 16 + // updatePoolQueueCap decides the work queue capacity of each worker. + updatePoolQueueCap = 8 +) + +type ebpfMapsImpl struct { + // Interpreter related eBPF maps + InterpreterOffsets *cebpf.Map `name:"interpreter_offsets"` + DotnetProcs *cebpf.Map `name:"dotnet_procs"` + PerlProcs *cebpf.Map `name:"perl_procs"` + PyProcs *cebpf.Map `name:"py_procs"` + HotspotProcs *cebpf.Map `name:"hotspot_procs"` + PhpProcs *cebpf.Map `name:"php_procs"` + RubyProcs *cebpf.Map `name:"ruby_procs"` + V8Procs *cebpf.Map `name:"v8_procs"` + ApmIntProcs *cebpf.Map `name:"apm_int_procs"` + GoLabelsProcs *cebpf.Map `name:"go_labels_procs"` + + // Stackdelta and process related eBPF maps + ExeIDToStackDeltaMaps []*cebpf.Map + StackDeltaPageToInfo *cebpf.Map `name:"stack_delta_page_to_info"` + PidPageToMappingInfo *cebpf.Map `name:"pid_page_to_mapping_info"` + UnwindInfoArray *cebpf.Map `name:"unwind_info_array"` + ReportedPIDs *cebpf.Map `name:"reported_pids"` + + errCounterLock sync.Mutex + errCounter map[metrics.MetricID]int64 + + hasGenericBatchOperations bool + hasLPMTrieBatchOperations bool + + updateWorkers *asyncMapUpdaterPool +} + +// Compile time check to make sure ebpfMapsImpl satisfies the interface . +var _ ebpfapi.EbpfHandler = &ebpfMapsImpl{} + +// LoadMaps checks if the needed maps for the process manager are available +// and loads their references into a package-internal structure. +// +// It further spawns background workers for deferred map updates; the given +// context can be used to terminate them on shutdown. +func LoadMaps(ctx context.Context, maps map[string]*cebpf.Map) (ebpfapi.EbpfHandler, error) { + impl := &ebpfMapsImpl{} + impl.errCounter = make(map[metrics.MetricID]int64) + + implRefVal := reflect.ValueOf(impl).Elem() + implRefType := reflect.TypeOf(impl).Elem() + for i := 0; i < implRefType.NumField(); i++ { + fieldType := implRefType.Field(i) + nameTag, ok := fieldType.Tag.Lookup("name") + if !ok { + continue + } + mapVal, ok := maps[nameTag] + if !ok { + log.Fatalf("Map %v is not available", nameTag) + } + implRefVal.Field(i).Set(reflect.ValueOf(mapVal)) + } + + numBuckets := support.StackDeltaBucketLargest - support.StackDeltaBucketSmallest + 1 + impl.ExeIDToStackDeltaMaps = make([]*cebpf.Map, numBuckets) + for i := support.StackDeltaBucketSmallest; i <= support.StackDeltaBucketLargest; i++ { + deltasMapName := fmt.Sprintf("exe_id_to_%d_stack_deltas", i) + deltasMap, ok := maps[deltasMapName] + if !ok { + log.Fatalf("Map %s is not available", deltasMapName) + } + impl.ExeIDToStackDeltaMaps[i-support.StackDeltaBucketSmallest] = deltasMap + } + + if err := probeBatchOperations(cebpf.Hash); err == nil { + log.Infof("Supports generic eBPF map batch operations") + impl.hasGenericBatchOperations = true + } + + if err := probeBatchOperations(cebpf.LPMTrie); err == nil { + log.Infof("Supports LPM trie eBPF map batch operations") + impl.hasLPMTrieBatchOperations = true + } + + impl.updateWorkers = newAsyncMapUpdaterPool(ctx, updatePoolWorkers, updatePoolQueueCap) + + return impl, nil +} + +// UpdateInterpreterOffsets adds the given moduleRanges to the eBPF map interpreterOffsets. +func (impl *ebpfMapsImpl) UpdateInterpreterOffsets(ebpfProgIndex uint16, fileID host.FileID, + offsetRanges []util.Range) error { + key, value, err := ebpfapi.InterpreterOffsetKeyValue(ebpfProgIndex, fileID, offsetRanges) + if err != nil { + return err + } + if err := impl.InterpreterOffsets.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), + cebpf.UpdateAny); err != nil { + log.Fatalf("Failed to place interpreter range in map: %v", err) + } + + return nil +} + +// getInterpreterTypeMap returns the eBPF map for the given typ +// or an error if typ is not supported. +func (impl *ebpfMapsImpl) getInterpreterTypeMap(typ libpf.InterpreterType) (*cebpf.Map, error) { + switch typ { + case libpf.Dotnet: + return impl.DotnetProcs, nil + case libpf.Perl: + return impl.PerlProcs, nil + case libpf.Python: + return impl.PyProcs, nil + case libpf.HotSpot: + return impl.HotspotProcs, nil + case libpf.PHP: + return impl.PhpProcs, nil + case libpf.Ruby: + return impl.RubyProcs, nil + case libpf.V8: + return impl.V8Procs, nil + case libpf.APMInt: + return impl.ApmIntProcs, nil + case libpf.GoLabels: + return impl.GoLabelsProcs, nil + default: + return nil, fmt.Errorf("type %d is not (yet) supported", typ) + } +} + +// UpdateProcData adds the given PID specific data to the specified interpreter data eBPF map. +func (impl *ebpfMapsImpl) UpdateProcData(typ libpf.InterpreterType, pid libpf.PID, + data unsafe.Pointer) error { + log.Debugf("Loading symbol addresses into eBPF map for PID %d type %d", + pid, typ) + ebpfMap, err := impl.getInterpreterTypeMap(typ) + if err != nil { + return err + } + + pid32 := uint32(pid) + if err := ebpfMap.Update(unsafe.Pointer(&pid32), data, cebpf.UpdateAny); err != nil { + return fmt.Errorf("failed to add %v info: %s", typ, err) + } + return nil +} + +// DeleteProcData removes the given PID specific data of the specified interpreter data eBPF map. +func (impl *ebpfMapsImpl) DeleteProcData(typ libpf.InterpreterType, pid libpf.PID) error { + log.Debugf("Removing symbol addresses from eBPF map for PID %d type %d", + pid, typ) + ebpfMap, err := impl.getInterpreterTypeMap(typ) + if err != nil { + return err + } + + pid32 := uint32(pid) + if err := ebpfMap.Delete(unsafe.Pointer(&pid32)); err != nil { + return fmt.Errorf("failed to remove info: %v", err) + } + return nil +} + +// getPIDPage initializes a PIDPage instance. +func getPIDPage(pid libpf.PID, key uint64, length uint32) support.PIDPage { + // pid_page_to_mapping_info is an LPM trie and expects the pid and page + // to be in big endian format. + return support.PIDPage{ + Pid: bits.ReverseBytes32(uint32(pid)), + Page: bits.ReverseBytes64(key), + PrefixLen: support.BitWidthPID + length, + } +} + +// getPIDPageFromPrefix initializes a PIDPage instance from a PID and lpm.Prefix. +func getPIDPageFromPrefix(pid libpf.PID, prefix lpm.Prefix) support.PIDPage { + return getPIDPage(pid, prefix.Key, prefix.Length) +} + +// UpdatePidInterpreterMapping updates the eBPF map pidPageToMappingInfo with the +// data required to call the correct interpreter unwinder for that memory region. +func (impl *ebpfMapsImpl) UpdatePidInterpreterMapping(pid libpf.PID, prefix lpm.Prefix, + interpreterProgram uint8, fileID host.FileID, bias uint64) error { + cKey := getPIDPageFromPrefix(pid, prefix) + biasAndUnwindProgram, err := support.EncodeBiasAndUnwindProgram(bias, interpreterProgram) + if err != nil { + return err + } + + cValue := support.PIDPageMappingInfo{ + File_id: uint64(fileID), + Bias_and_unwind_program: biasAndUnwindProgram, + } + + return impl.PidPageToMappingInfo.Update(unsafe.Pointer(&cKey), unsafe.Pointer(&cValue), + cebpf.UpdateNoExist) +} + +// DeletePidInterpreterMapping removes the element specified by pid, prefix and a corresponding +// mapping size from the eBPF map pidPageToMappingInfo. It is normally used when an +// interpreter process dies or a region that formerly required interpreter-based unwinding is no +// longer needed. +func (impl *ebpfMapsImpl) DeletePidInterpreterMapping(pid libpf.PID, prefix lpm.Prefix) error { + cKey := getPIDPageFromPrefix(pid, prefix) + return impl.PidPageToMappingInfo.Delete(unsafe.Pointer(&cKey)) +} + +// trackMapError is a wrapper to report issues with changes to eBPF maps. +func (impl *ebpfMapsImpl) trackMapError(id metrics.MetricID, err error) error { + if err != nil { + impl.errCounterLock.Lock() + impl.errCounter[id]++ + impl.errCounterLock.Unlock() + } + return err +} + +// CollectMetrics returns gathered errors for changes to eBPF maps. +func (impl *ebpfMapsImpl) CollectMetrics() []metrics.Metric { + impl.errCounterLock.Lock() + defer impl.errCounterLock.Unlock() + + counts := make([]metrics.Metric, 0, 7) + for id, value := range impl.errCounter { + counts = append(counts, metrics.Metric{ + ID: id, + Value: metrics.MetricValue(value), + }) + // As we don't want to report metrics with zero values on the next call, + // we delete the entries from the map instead of just resetting them. + delete(impl.errCounter, id) + } + + return counts +} + +// poolPIDPage caches reusable heap-allocated PIDPage instances +// to avoid excessive heap allocations. +var poolPIDPage = sync.Pool{ + New: func() any { + return new(support.PIDPage) + }, +} + +// getPIDPagePooled returns a heap-allocated and initialized PIDPage instance. +// After usage, put the instance back into the pool with poolPIDPage.Put(). +func getPIDPagePooled(pid libpf.PID, prefix lpm.Prefix) *support.PIDPage { + cPIDPage := poolPIDPage.Get().(*support.PIDPage) + *cPIDPage = getPIDPageFromPrefix(pid, prefix) + return cPIDPage +} + +// poolPIDPageMappingInfo caches reusable heap-allocated PIDPageMappingInfo instances +// to avoid excessive heap allocations. +var poolPIDPageMappingInfo = sync.Pool{ + New: func() any { + return new(support.PIDPageMappingInfo) + }, +} + +// getPIDPageMappingInfo returns a heap-allocated and initialized PIDPageMappingInfo instance. +// After usage, put the instance back into the pool with poolPIDPageMappingInfo.Put(). +func getPIDPageMappingInfo(fileID, biasAndUnwindProgram uint64) *support.PIDPageMappingInfo { + cInfo := poolPIDPageMappingInfo.Get().(*support.PIDPageMappingInfo) + cInfo.File_id = fileID + cInfo.Bias_and_unwind_program = biasAndUnwindProgram + + return cInfo +} + +// probeBatchOperations tests if the BPF syscall accepts batch operations. It +// returns nil if batch operations are supported for mapType or an error otherwise. +func probeBatchOperations(mapType cebpf.MapType) error { + restoreRlimit, err := rlimit.MaximizeMemlock() + if err != nil { + // In environment like github action runners, we can not adjust rlimit. + // Therefore we just return false here and do not use batch operations. + return fmt.Errorf("failed to adjust rlimit: %w", err) + } + defer restoreRlimit() + + updates := 5 + mapSpec := &cebpf.MapSpec{ + Type: mapType, + KeySize: 8, + ValueSize: 8, + MaxEntries: uint32(updates), + Flags: features.BPF_F_NO_PREALLOC, + } + + var keys any + switch mapType { + case cebpf.Array: + // KeySize for Array maps always needs to be 4. + mapSpec.KeySize = 4 + // Array maps are always preallocated. + mapSpec.Flags = 0 + keys = generateSlice[uint32](updates) + default: + keys = generateSlice[uint64](updates) + } + + probeMap, err := cebpf.NewMap(mapSpec) + if err != nil { + return fmt.Errorf("failed to create %s map for batch probing: %v", + mapType, err) + } + defer probeMap.Close() + + values := generateSlice[uint64](updates) + + n, err := probeMap.BatchUpdate(keys, values, nil) + if err != nil { + // Older kernel do not support batch operations on maps. + // This is just fine and we return here. + return err + } + if n != updates { + return fmt.Errorf("unexpected batch update return: expected %d but got %d", + updates, n) + } + + // Remove the probe entries from the map. + m, err := probeMap.BatchDelete(keys, nil) + if err != nil { + return err + } + if m != updates { + return fmt.Errorf("unexpected batch delete return: expected %d but got %d", + updates, m) + } + return nil +} + +// getMapID returns the mapID number to use for given number of stack deltas. +func getMapID(numDeltas uint32) (uint16, error) { + significantBits := 32 - bits.LeadingZeros32(numDeltas) + if significantBits <= support.StackDeltaBucketSmallest { + return support.StackDeltaBucketSmallest, nil + } + if significantBits > support.StackDeltaBucketLargest { + return 0, fmt.Errorf("no map available for %d stack deltas", numDeltas) + } + return uint16(significantBits), nil +} + +// getOuterMap is a helper function to select the correct outer map for +// storing the stack deltas based on the mapID. +func (impl *ebpfMapsImpl) getOuterMap(mapID uint16) *cebpf.Map { + if mapID < support.StackDeltaBucketSmallest || + mapID > support.StackDeltaBucketLargest { + return nil + } + return impl.ExeIDToStackDeltaMaps[mapID-support.StackDeltaBucketSmallest] +} + +// RemoveReportedPID removes a PID from the reported_pids eBPF map. The kernel component will +// place a PID in this map before it reports it to Go for further processing. +func (impl *ebpfMapsImpl) RemoveReportedPID(pid libpf.PID) { + key := uint32(pid) + _ = impl.ReportedPIDs.Delete(unsafe.Pointer(&key)) +} + +// UpdateUnwindInfo writes UnwindInfo into the unwind info array at the given index +func (impl *ebpfMapsImpl) UpdateUnwindInfo(index uint16, info sdtypes.UnwindInfo) error { + if uint32(index) >= impl.UnwindInfoArray.MaxEntries() { + return fmt.Errorf("unwind info array full (%d/%d items)", + index, impl.UnwindInfoArray.MaxEntries()) + } + + key := uint32(index) + value := support.UnwindInfo{ + Opcode: info.Opcode, + FpOpcode: info.FPOpcode, + MergeOpcode: info.MergeOpcode, + Param: info.Param, + FpParam: info.FPParam, + } + return impl.trackMapError(metrics.IDUnwindInfoArrayUpdate, + impl.UnwindInfoArray.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), + cebpf.UpdateAny)) +} + +// UpdateExeIDToStackDeltas creates a nested map for fileID in the eBPF map exeIDTostack_deltas +// and inserts the elements of the deltas array in this nested map. Returns mapID or error. +func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, + deltas []ebpfapi.StackDeltaEBPF) ( + uint16, error) { + numDeltas := len(deltas) + mapID, err := getMapID(uint32(numDeltas)) + if err != nil { + return 0, err + } + outerMap := impl.getOuterMap(mapID) + + restoreRlimit, err := rlimit.MaximizeMemlock() + if err != nil { + return 0, fmt.Errorf("failed to increase rlimit: %v", err) + } + defer restoreRlimit() + innerMap, err := cebpf.NewMap(&cebpf.MapSpec{ + Type: cebpf.Array, + KeySize: 4, + ValueSize: support.Sizeof_StackDelta, + MaxEntries: 1 << mapID, + }) + if err != nil { + return 0, fmt.Errorf("failed to create inner map: %v", err) + } + defer func() { + if err = innerMap.Close(); err != nil { + log.Errorf("Failed to close FD of inner map for 0x%x: %v", fileID, err) + } + }() + + // We continue updating the inner map after enqueueing the update to the + // outer map. Both the async update pool and our code below need an open + // file descriptor to work, and we don't know which will complete first. + // We thus clone the FD, transfer ownership of the clone to the update + // pool and continue using our original FD whose lifetime is now no longer + // tied to the FD used in the updater pool. + innerMapCloned, err := innerMap.Clone() + if err != nil { + return 0, fmt.Errorf("failed to clone inner map: %v", err) + } + + impl.updateWorkers.EnqueueUpdate(outerMap, fileID, innerMapCloned) + + if impl.hasGenericBatchOperations { + innerKeys := make([]uint32, numDeltas) + stackDeltas := make([]support.StackDelta, numDeltas) + + // Prepare values for batch update. + for index, delta := range deltas { + innerKeys[index] = uint32(index) + stackDeltas[index].AddrLow = delta.AddressLow + stackDeltas[index].UnwindInfo = delta.UnwindInfo + } + + _, err := innerMap.BatchUpdate( + ptrCastMarshaler[uint32](innerKeys), + ptrCastMarshaler[support.StackDelta](stackDeltas), + &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateAny)}) + if err != nil { + return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasBatchUpdate, + fmt.Errorf("failed to batch insert %d elements for 0x%x "+ + "into exeIDTostack_deltas: %v", + numDeltas, fileID, err)) + } + return mapID, nil + } + + innerKey := uint32(0) + stackDelta := support.StackDelta{} + for index, delta := range deltas { + stackDelta.AddrLow = delta.AddressLow + stackDelta.UnwindInfo = delta.UnwindInfo + innerKey = uint32(index) + if err := innerMap.Update(unsafe.Pointer(&innerKey), unsafe.Pointer(&stackDelta), + cebpf.UpdateAny); err != nil { + return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasUpdate, fmt.Errorf( + "failed to insert element %d for 0x%x into exeIDTostack_deltas: %v", + index, fileID, err)) + } + } + + return mapID, nil +} + +// DeleteExeIDToStackDeltas removes all eBPF stack delta entries for given fileID and mapID number. +func (impl *ebpfMapsImpl) DeleteExeIDToStackDeltas(fileID host.FileID, mapID uint16) error { + outerMap := impl.getOuterMap(mapID) + if outerMap == nil { + return fmt.Errorf("invalid mapID %d", mapID) + } + + // Deleting the entry from the outer maps deletes also the entries of the inner + // map associated with this outer key. + impl.updateWorkers.EnqueueUpdate(outerMap, fileID, nil) + + return nil +} + +// UpdateStackDeltaPages adds fileID/page with given information to eBPF map. If the entry exists, +// it will return an error. Otherwise the key/value pairs will be appended to the hash. +func (impl *ebpfMapsImpl) UpdateStackDeltaPages(fileID host.FileID, numDeltasPerPage []uint16, + mapID uint16, firstPageAddr uint64) error { + firstDelta := uint32(0) + keys := make([]support.StackDeltaPageKey, len(numDeltasPerPage)) + values := make([]support.StackDeltaPageInfo, len(numDeltasPerPage)) + + // Prepare the key/value combinations that will be loaded. + for pageNumber, numDeltas := range numDeltasPerPage { + pageAddr := firstPageAddr + uint64(pageNumber)< support.StackDeltaBucketLargest { - return 0, fmt.Errorf("no map available for %d stack deltas", numDeltas) - } - return uint16(significantBits), nil -} - -// getOuterMap is a helper function to select the correct outer map for -// storing the stack deltas based on the mapID. -func (impl *ebpfMapsImpl) getOuterMap(mapID uint16) *cebpf.Map { - if mapID < support.StackDeltaBucketSmallest || - mapID > support.StackDeltaBucketLargest { - return nil - } - return impl.ExeIDToStackDeltaMaps[mapID-support.StackDeltaBucketSmallest] -} - -// RemoveReportedPID removes a PID from the reported_pids eBPF map. The kernel component will -// place a PID in this map before it reports it to Go for further processing. -func (impl *ebpfMapsImpl) RemoveReportedPID(pid libpf.PID) { - key := uint32(pid) - _ = impl.ReportedPIDs.Delete(unsafe.Pointer(&key)) -} - -// UpdateUnwindInfo writes UnwindInfo into the unwind info array at the given index -func (impl *ebpfMapsImpl) UpdateUnwindInfo(index uint16, info sdtypes.UnwindInfo) error { - if uint32(index) >= impl.UnwindInfoArray.MaxEntries() { - return fmt.Errorf("unwind info array full (%d/%d items)", - index, impl.UnwindInfoArray.MaxEntries()) - } - - key := uint32(index) - value := support.UnwindInfo{ - Opcode: info.Opcode, - FpOpcode: info.FPOpcode, - MergeOpcode: info.MergeOpcode, - Param: info.Param, - FpParam: info.FPParam, - } - return impl.trackMapError(metrics.IDUnwindInfoArrayUpdate, - impl.UnwindInfoArray.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), - cebpf.UpdateAny)) -} - -// UpdateExeIDToStackDeltas creates a nested map for fileID in the eBPF map exeIDTostack_deltas -// and inserts the elements of the deltas array in this nested map. Returns mapID or error. -func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, - deltas []ebpf.StackDeltaEBPF) ( - uint16, error) { - numDeltas := len(deltas) - mapID, err := getMapID(uint32(numDeltas)) - if err != nil { - return 0, err - } - outerMap := impl.getOuterMap(mapID) - - restoreRlimit, err := rlimit.MaximizeMemlock() - if err != nil { - return 0, fmt.Errorf("failed to increase rlimit: %v", err) - } - defer restoreRlimit() - innerMap, err := cebpf.NewMap(&cebpf.MapSpec{ - Type: cebpf.Array, - KeySize: 4, - ValueSize: support.Sizeof_StackDelta, - MaxEntries: 1 << mapID, - }) - if err != nil { - return 0, fmt.Errorf("failed to create inner map: %v", err) - } - defer func() { - if err = innerMap.Close(); err != nil { - log.Errorf("Failed to close FD of inner map for 0x%x: %v", fileID, err) - } - }() - - // We continue updating the inner map after enqueueing the update to the - // outer map. Both the async update pool and our code below need an open - // file descriptor to work, and we don't know which will complete first. - // We thus clone the FD, transfer ownership of the clone to the update - // pool and continue using our original FD whose lifetime is now no longer - // tied to the FD used in the updater pool. - innerMapCloned, err := innerMap.Clone() - if err != nil { - return 0, fmt.Errorf("failed to clone inner map: %v", err) - } - - impl.updateWorkers.EnqueueUpdate(outerMap, fileID, innerMapCloned) - - if impl.hasGenericBatchOperations { - innerKeys := make([]uint32, numDeltas) - stackDeltas := make([]support.StackDelta, numDeltas) - - // Prepare values for batch update. - for index, delta := range deltas { - innerKeys[index] = uint32(index) - stackDeltas[index].AddrLow = delta.AddressLow - stackDeltas[index].UnwindInfo = delta.UnwindInfo - } - - _, err := innerMap.BatchUpdate( - ptrCastMarshaler[uint32](innerKeys), - ptrCastMarshaler[support.StackDelta](stackDeltas), - &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateAny)}) - if err != nil { - return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasBatchUpdate, - fmt.Errorf("failed to batch insert %d elements for 0x%x "+ - "into exeIDTostack_deltas: %v", - numDeltas, fileID, err)) - } - return mapID, nil - } - - innerKey := uint32(0) - stackDelta := support.StackDelta{} - for index, delta := range deltas { - stackDelta.AddrLow = delta.AddressLow - stackDelta.UnwindInfo = delta.UnwindInfo - innerKey = uint32(index) - if err := innerMap.Update(unsafe.Pointer(&innerKey), unsafe.Pointer(&stackDelta), - cebpf.UpdateAny); err != nil { - return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasUpdate, fmt.Errorf( - "failed to insert element %d for 0x%x into exeIDTostack_deltas: %v", - index, fileID, err)) - } - } - - return mapID, nil -} - -// DeleteExeIDToStackDeltas removes all eBPF stack delta entries for given fileID and mapID number. -func (impl *ebpfMapsImpl) DeleteExeIDToStackDeltas(fileID host.FileID, mapID uint16) error { - outerMap := impl.getOuterMap(mapID) - if outerMap == nil { - return fmt.Errorf("invalid mapID %d", mapID) - } - - // Deleting the entry from the outer maps deletes also the entries of the inner - // map associated with this outer key. - impl.updateWorkers.EnqueueUpdate(outerMap, fileID, nil) - - return nil -} - -// UpdateStackDeltaPages adds fileID/page with given information to eBPF map. If the entry exists, -// it will return an error. Otherwise the key/value pairs will be appended to the hash. -func (impl *ebpfMapsImpl) UpdateStackDeltaPages(fileID host.FileID, numDeltasPerPage []uint16, - mapID uint16, firstPageAddr uint64) error { - firstDelta := uint32(0) - keys := make([]support.StackDeltaPageKey, len(numDeltasPerPage)) - values := make([]support.StackDeltaPageInfo, len(numDeltasPerPage)) - - // Prepare the key/value combinations that will be loaded. - for pageNumber, numDeltas := range numDeltasPerPage { - pageAddr := firstPageAddr + uint64(pageNumber)< 2 { + return 0, support.OffsetRange{}, fmt.Errorf("invalid ranges %v", offsetRanges) + } + // The keys of this map are executable-id-and-offset-into-text entries, and + // the offset_range associated with them gives the precise area in that page + // where the main interpreter loop is located. This is required to unwind + // nicely from native code into interpreted code. + key = uint64(fileID) + first := offsetRanges[0] + value = support.OffsetRange{ + Lower_offset1: first.Start, + Upper_offset1: first.End, + Program_index: ebpfProgIndex, + } + if len(offsetRanges) == 2 { + // Fields {lower,upper}_offset2 may be used to specify an optional second range + // of an interpreter function. This may be useful if the interpreter function + // consists of two non-contiguous memory ranges, which may happen due to Hot/Cold + // split compiler optimization + second := offsetRanges[1] + value.Lower_offset2 = second.Start + value.Upper_offset2 = second.End + } + return key, value, nil +} diff --git a/processmanager/ebpf/types.go b/processmanager/ebpfapi/types.go similarity index 81% rename from processmanager/ebpf/types.go rename to processmanager/ebpfapi/types.go index bd5de07d0..2f802c852 100644 --- a/processmanager/ebpf/types.go +++ b/processmanager/ebpfapi/types.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package ebpf // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" +package ebpfapi // import "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" // StackDeltaEBPF represents stack deltas preprocessed by the ProcessManager which are // then loaded to the eBPF map. This is Go equivalent of 'struct StackDelta' in eBPF types.h. diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index bee2fee7e..bf3e1e44a 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -9,11 +9,8 @@ import ( "os" "time" - log "github.com/sirupsen/logrus" - "go.opentelemetry.io/ebpf-profiler/libpf" - "go.opentelemetry.io/ebpf-profiler/tracer/types" - lru "github.com/elastic/go-freelru" + log "github.com/sirupsen/logrus" "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" @@ -27,14 +24,16 @@ import ( "go.opentelemetry.io/ebpf-profiler/interpreter/php" "go.opentelemetry.io/ebpf-profiler/interpreter/python" "go.opentelemetry.io/ebpf-profiler/interpreter/ruby" + "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/libpf/xsync" "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/tpbase" + "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/processmanager/manager.go b/processmanager/manager.go index fc0fbe610..da58e3101 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -13,8 +13,6 @@ import ( lru "github.com/elastic/go-freelru" log "github.com/sirupsen/logrus" - "go.opentelemetry.io/ebpf-profiler/tracer/types" - "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/interpreter/apmint" @@ -24,10 +22,11 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/periodiccaller" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" + "go.opentelemetry.io/ebpf-profiler/tracer/types" "go.opentelemetry.io/ebpf-profiler/traceutil" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/processmanager/manager_test.go b/processmanager/manager_test.go index 50a85e418..8df1dcbba 100644 --- a/processmanager/manager_test.go +++ b/processmanager/manager_test.go @@ -24,7 +24,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/process" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/support" diff --git a/processmanager/types.go b/processmanager/types.go index 9807aaae8..f21279727 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -14,7 +14,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/metrics" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" eim "go.opentelemetry.io/ebpf-profiler/processmanager/execinfomanager" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" diff --git a/tools/coredump/ebpfmaps.go b/tools/coredump/ebpfmaps.go index 412864b4f..2f5dd390b 100644 --- a/tools/coredump/ebpfmaps.go +++ b/tools/coredump/ebpfmaps.go @@ -14,7 +14,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/lpm" "go.opentelemetry.io/ebpf-profiler/metrics" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpfapi" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/util" ) diff --git a/tracer/tracer.go b/tracer/tracer.go index 7dbd0bc03..fcca906b6 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -32,7 +32,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/nativeunwind/elfunwindinfo" "go.opentelemetry.io/ebpf-profiler/periodiccaller" pm "go.opentelemetry.io/ebpf-profiler/processmanager" - pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf/impl" + pmebpf "go.opentelemetry.io/ebpf-profiler/processmanager/ebpf" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" From 72c0c756deb190ac64ed9df4a64f78d1ada6b90a Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Tue, 29 Jul 2025 10:32:56 +0700 Subject: [PATCH 11/12] move machine --- {process => libpf/pfelf}/machine_amd64.go | 4 ++-- {process => libpf/pfelf}/machine_arm64.go | 4 ++-- process/process.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename {process => libpf/pfelf}/machine_amd64.go (51%) rename {process => libpf/pfelf}/machine_arm64.go (51%) diff --git a/process/machine_amd64.go b/libpf/pfelf/machine_amd64.go similarity index 51% rename from process/machine_amd64.go rename to libpf/pfelf/machine_amd64.go index 6313e9a49..b0c78ed70 100644 --- a/process/machine_amd64.go +++ b/libpf/pfelf/machine_amd64.go @@ -3,8 +3,8 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package process // import "go.opentelemetry.io/ebpf-profiler/process" +package pfelf // import "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" import "debug/elf" -const currentMachine = elf.EM_X86_64 +const CurrentMachine = elf.EM_X86_64 diff --git a/process/machine_arm64.go b/libpf/pfelf/machine_arm64.go similarity index 51% rename from process/machine_arm64.go rename to libpf/pfelf/machine_arm64.go index 9c4caa689..84a72e40b 100644 --- a/process/machine_arm64.go +++ b/libpf/pfelf/machine_arm64.go @@ -3,8 +3,8 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package process // import "go.opentelemetry.io/ebpf-profiler/process" +package pfelf // import "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" import "debug/elf" -const currentMachine = elf.EM_AARCH64 +const CurrentMachine = elf.EM_AARCH64 diff --git a/process/process.go b/process/process.go index e39159a25..ac9c91351 100644 --- a/process/process.go +++ b/process/process.go @@ -72,7 +72,7 @@ func (sp *systemProcess) PID() libpf.PID { } func (sp *systemProcess) GetMachineData() MachineData { - return MachineData{Machine: currentMachine} + return MachineData{Machine: pfelf.CurrentMachine} } func trimMappingPath(path string) string { From c5568a74518581a6c2c0c28612473203d3b0b378 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Tue, 29 Jul 2025 10:50:35 +0700 Subject: [PATCH 12/12] update gh actions cache key for macos job --- .github/workflows/unit-test-on-pull-request.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index 0013195bd..04bcc8446 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -152,9 +152,9 @@ jobs: uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: tools/coredump/modulecache - key: coredumps-macos-${{ hashFiles('tools/coredump/testdata/*/*.json') }} + key: coredumps-arm64-${{ hashFiles('tools/coredump/testdata/*/*.json') }} restore-keys: | - coredumps-macos + coredumps-arm64 coredumps- - name: Run coredump tests run: GODEBUG=asyncpreemptoff=1 go test -v ./tools/coredump/