diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index 150ce557d..054214395 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -145,7 +145,6 @@ jobs: # https://github.com/cilium/ci-kernels/pkgs/container/ci-kernels/versions?filters%5Bversion_type%5D=tagged # AMD64 - - { target_arch: amd64, kernel: 5.4.276 } - { target_arch: amd64, kernel: 5.10.217 } - { target_arch: amd64, kernel: 5.15.159 } - { target_arch: amd64, kernel: 6.1.91 } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 130eb1a3c..010fe205b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ slack channel for discussions and questions. ## Pre-requisites -- Linux (5.4+ for x86-64, 5.5+ for ARM64) with eBPF enabled (the profiler currently only runs on Linux) +- Linux (5.10+) with eBPF enabled (the profiler currently only runs on Linux) - Go as specified in [go.mod](https://github.com/open-telemetry/opentelemetry-ebpf-profiler/blob/main/go.mod) - docker - Rust as specified in [Cargo.toml](https://github.com/open-telemetry/opentelemetry-ebpf-profiler/blob/main/Cargo.toml) diff --git a/README.md b/README.md index f3cc46006..79134b9a5 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,10 @@ Since the profiler is Linux-only, macOS and Windows users need to set up a Linux ## Supported Linux kernel version -[7ddc23ea](https://github.com/open-telemetry/opentelemetry-ebpf-profiler/commit/7ddc23ea135a2e00fffc17850ab90534e9b63108) is the last commit with support for 4.19. Changes after this commit may require a minimal Linux kernel version of 5.4. +The minimum required Linux kernel version has increased with certain commits. Specifically: + +- Commit [8047150e](https://github.com/open-telemetry/opentelemetry-ebpf-profiler/commit/8047150e3f325f852874591356c69d0487b67d7c) was the last to support kernel version 5.4. Subsequent changes may require a minimal Linux kernel version of 5.10 or greater. +- Commit [7ddc23ea](https://github.com/open-telemetry/opentelemetry-ebpf-profiler/commit/7ddc23ea135a2e00fffc17850ab90534e9b63108) was the last to support kernel version 4.19. Subsequent changes may require a minimal Linux kernel version of at least 5.4. ### Updating the supported Linux kernel version diff --git a/collector/config/config.go b/collector/config/config.go index 220e1908f..1f4ebe96d 100644 --- a/collector/config/config.go +++ b/collector/config/config.go @@ -6,7 +6,6 @@ package config // import "go.opentelemetry.io/ebpf-profiler/collector/config" import ( "errors" "fmt" - "runtime" "strings" "time" @@ -122,17 +121,7 @@ func (cfg *Config) Validate() error { } var minMajor, minMinor uint32 - switch runtime.GOARCH { - case "amd64": - minMajor, minMinor = 5, 2 - case "arm64": - // Older ARM64 kernel versions have broken bpf_probe_read. - // https://github.com/torvalds/linux/commit/6ae08ae3dea2cfa03dd3665a3c8475c2d429ef47 - minMajor, minMinor = 5, 5 - default: - return fmt.Errorf("unsupported architecture: %s", runtime.GOARCH) - } - + minMajor, minMinor = 5, 10 if major < minMajor || (major == minMajor && minor < minMinor) { return fmt.Errorf("host Agent requires kernel version "+ "%d.%d or newer but got %d.%d.%d", minMajor, minMinor, major, minor, patch) diff --git a/doc/KNOWN_KERNEL_LIMITATIONS.md b/doc/KNOWN_KERNEL_LIMITATIONS.md deleted file mode 100644 index 030f0fd81..000000000 --- a/doc/KNOWN_KERNEL_LIMITATIONS.md +++ /dev/null @@ -1,33 +0,0 @@ -Known limitations -================= -The Linux kernel is constantly evolving and so is eBPF. To be able to load our eBPF code with older kernel versions we have to write code to avoid some limitations. This file documents the restrictions we ran into while writing the code. - -Number of tracepoints ---------------------- -Affects kernel < 4.15. - -There was a limit of 1 eBPF program per tracepoint/kprobe. -This limit no longer holds and was removed with commit [e87c6bc3852b981e71c757be20771546ce9f76f3](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e87c6bc3852b981e71c757be20771546ce9f76f3). - - -Kernel version check --------------------- -Affects kernel < 5.0. - -As part of the verification of eBPF programs, the `kern_version` attribute was checked and it needed to match with the currently running kernel version. -This check was removed with commit [6c4fc209fcf9d27efbaa48368773e4d2bfbd59aa](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6c4fc209fcf9d27efbaa48368773e4d2bfbd59aa). - - -eBPF instruction limit ----------------------- -Affects kernel < 5.2. - -The number of eBPF instructions per program was limited to 4096 instructions. -This limit was raised to 1 million eBPF instructions with commit [c04c0d2b968ac45d6ef020316808ef6c82325a82](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c04c0d2b968ac45d6ef020316808ef6c82325a82). - - -eBPF inner arrays (map-in-map) must be of same size ---------------------------------------------------- -Affects kernel < 5.10. - -This restriction was removed with commit[4a8f87e60f6db40e640f1db555d063b2c4dea5f1](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4a8f87e60f6db40e640f1db555d063b2c4dea5f1). diff --git a/doc/internals.md b/doc/internals.md index a0a52fc2e..299698a97 100644 --- a/doc/internals.md +++ b/doc/internals.md @@ -206,16 +206,10 @@ of these limitations are significantly relaxed in newer kernel versions, but we still have to stick to the old limits because we wish to continue supporting older kernels. -The minimum supported Linux kernel versions are -- 5.4 for amd64/x86_64 -- 5.5 for arm64/aarch64 +The minimum supported Linux kernel versions is 5.10 for amd64/x86_64 and arm64/aarch64. The most notable limitations are the following two: -- **4096 instructions per program**\ - A single BPF program can consist of a maximum of 4096 instructions, otherwise - older kernels will refuse to load it. Since BPF does not allow for loops, they - instead need to be unrolled. - **32 tail-calls**\ Linux allows BPF programs to do a tail-call to another BPF program. A tail call is essentially a `jmp` into another BPF program, ending execution of the diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index 0302f9d4d..5bbfbfaab 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -66,9 +66,9 @@ type ebpfMapsImpl struct { errCounterLock sync.Mutex errCounter map[metrics.MetricID]int64 - hasGenericBatchOperations bool - hasGenericBatchLookupAndDelete bool - hasLPMTrieBatchOperations bool + // Support for batch operations on LPM eBPF maps was only + // introduced with Linux kernel 5.13. + hasLPMTrieBatchOperations bool updateWorkers *asyncMapUpdaterPool } @@ -117,16 +117,6 @@ func LoadMaps(ctx context.Context, includeTracers types.IncludedTracers, impl.ExeIDToStackDeltaMaps[i-support.StackDeltaBucketSmallest] = deltasMap } - if err := probeBatchOperations(cebpf.Hash); err == nil { - log.Infof("Supports generic eBPF map batch operations") - impl.hasGenericBatchOperations = true - } - - if err := probeBatchLookupAndDelete(cebpf.Hash); err == nil { - log.Infof("Supports generic eBPF map batch lookup-and-delete") - impl.hasGenericBatchLookupAndDelete = true - } - if err := probeBatchOperations(cebpf.LPMTrie); err == nil { log.Infof("Supports LPM trie eBPF map batch operations") impl.hasLPMTrieBatchOperations = true @@ -449,6 +439,12 @@ func probeBatchOperations(mapType cebpf.MapType) error { return probeMapOperations(mapType, probeBatchOperationsInner[uint64]) } +// SupportsLPMTrieBatchOperations returns true if the kernel supports eBPF batch operations +// on LPM trie maps. +func (impl *ebpfMapsImpl) SupportsLPMTrieBatchOperations() bool { + return impl.hasLPMTrieBatchOperations +} + // getMapID returns the mapID number to use for given number of stack deltas. func getMapID(numDeltas uint32) (uint16, error) { significantBits := 32 - bits.LeadingZeros32(numDeltas) @@ -538,44 +534,26 @@ func (impl *ebpfMapsImpl) UpdateExeIDToStackDeltas(fileID host.FileID, impl.updateWorkers.EnqueueUpdate(outerMap, fileID, innerMapCloned) - if impl.hasGenericBatchOperations { - innerKeys := make([]uint32, numDeltas) - stackDeltas := make([]support.StackDelta, numDeltas) - - // Prepare values for batch update. - for index, delta := range deltas { - innerKeys[index] = uint32(index) - stackDeltas[index].AddrLow = delta.AddressLow - stackDeltas[index].UnwindInfo = delta.UnwindInfo - } - - _, err := innerMap.BatchUpdate( - ptrCastMarshaler[uint32](innerKeys), - ptrCastMarshaler[support.StackDelta](stackDeltas), - &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateAny)}) - if err != nil { - return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasBatchUpdate, - fmt.Errorf("failed to batch insert %d elements for 0x%x "+ - "into exeIDTostack_deltas: %v", - numDeltas, fileID, err)) - } - return mapID, nil - } + innerKeys := make([]uint32, numDeltas) + stackDeltas := make([]support.StackDelta, numDeltas) - innerKey := uint32(0) - stackDelta := support.StackDelta{} + // Prepare values for batch update. for index, delta := range deltas { - stackDelta.AddrLow = delta.AddressLow - stackDelta.UnwindInfo = delta.UnwindInfo - innerKey = uint32(index) - if err := innerMap.Update(unsafe.Pointer(&innerKey), unsafe.Pointer(&stackDelta), - cebpf.UpdateAny); err != nil { - return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasUpdate, fmt.Errorf( - "failed to insert element %d for 0x%x into exeIDTostack_deltas: %v", - index, fileID, err)) - } + innerKeys[index] = uint32(index) + stackDeltas[index].AddrLow = delta.AddressLow + stackDeltas[index].UnwindInfo = delta.UnwindInfo } + _, err = innerMap.BatchUpdate( + ptrCastMarshaler[uint32](innerKeys), + ptrCastMarshaler[support.StackDelta](stackDeltas), + &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateAny)}) + if err != nil { + return 0, impl.trackMapError(metrics.IDExeIDToStackDeltasBatchUpdate, + fmt.Errorf("failed to batch insert %d elements for 0x%x "+ + "into exeIDTostack_deltas: %v", + numDeltas, fileID, err)) + } return mapID, nil } @@ -617,22 +595,12 @@ func (impl *ebpfMapsImpl) UpdateStackDeltaPages(fileID host.FileID, numDeltasPer firstDelta += uint32(numDeltas) } - if impl.hasGenericBatchOperations { - _, err := impl.StackDeltaPageToInfo.BatchUpdate( - ptrCastMarshaler[support.StackDeltaPageKey](keys), - ptrCastMarshaler[support.StackDeltaPageInfo](values), - &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateNoExist)}) - return impl.trackMapError(metrics.IDStackDeltaPageToInfoBatchUpdate, err) - } + _, err := impl.StackDeltaPageToInfo.BatchUpdate( + ptrCastMarshaler[support.StackDeltaPageKey](keys), + ptrCastMarshaler[support.StackDeltaPageInfo](values), + &cebpf.BatchOptions{Flags: uint64(cebpf.UpdateNoExist)}) + return impl.trackMapError(metrics.IDStackDeltaPageToInfoBatchUpdate, err) - for index := range keys { - if err := impl.trackMapError(metrics.IDStackDeltaPageToInfoUpdate, - impl.StackDeltaPageToInfo.Update(unsafe.Pointer(&keys[index]), - unsafe.Pointer(&values[index]), cebpf.UpdateNoExist)); err != nil { - return err - } - } - return nil } // DeleteStackDeltaPage removes the entry specified by fileID and page from the eBPF map. @@ -737,24 +705,6 @@ func (impl *ebpfMapsImpl) LookupPidPageInformation(pid libpf.PID, page uint64) ( return host.FileID(cValue.File_id), bias, nil } -// SupportsGenericBatchOperations returns true if the kernel supports eBPF batch operations -// on hash and array maps. -func (impl *ebpfMapsImpl) SupportsGenericBatchOperations() bool { - return impl.hasGenericBatchOperations -} - -// SupportsGenericBatchLookupAndDelete returns true if the kernel supports eBPF batch -// lookup-and-delete operations on hash and array maps. -func (impl *ebpfMapsImpl) SupportsGenericBatchLookupAndDelete() bool { - return impl.hasGenericBatchLookupAndDelete -} - -// SupportsLPMTrieBatchOperations returns true if the kernel supports eBPF batch operations -// on LPM trie maps. -func (impl *ebpfMapsImpl) SupportsLPMTrieBatchOperations() bool { - return impl.hasLPMTrieBatchOperations -} - // ptrCastMarshaler is a small wrapper type intended to be used with cilium's BatchUpdate and // BackDelete functions. // diff --git a/processmanager/ebpf/ebpf_integration_test.go b/processmanager/ebpf/ebpf_integration_test.go index 8d7915dbc..59a7f7861 100644 --- a/processmanager/ebpf/ebpf_integration_test.go +++ b/processmanager/ebpf/ebpf_integration_test.go @@ -15,6 +15,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/lpm" + "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" ) @@ -34,6 +35,7 @@ func loadTracers(t *testing.T) *ebpfMapsImpl { return &ebpfMapsImpl{ PidPageToMappingInfo: pidPageToMappingInfo, + errCounter: make(map[metrics.MetricID]int64), } } diff --git a/processmanager/ebpfapi/ebpf.go b/processmanager/ebpfapi/ebpf.go index a750d2fef..0f8398138 100644 --- a/processmanager/ebpfapi/ebpf.go +++ b/processmanager/ebpfapi/ebpf.go @@ -56,14 +56,6 @@ type EbpfHandler interface { // CollectMetrics returns gathered errors for changes to eBPF maps. CollectMetrics() []metrics.Metric - // SupportsGenericBatchOperations returns true if the kernel supports eBPF batch operations - // on hash and array maps. - SupportsGenericBatchOperations() bool - - // SupportsGenericBatchLookupAndDelete returns true if the kernel supports eBPF batch - // lookup-and-delete operations on hash and array maps. - SupportsGenericBatchLookupAndDelete() bool - // SupportsLPMTrieBatchOperations returns true if the kernel supports eBPF batch operations // on LPM trie maps. SupportsLPMTrieBatchOperations() bool diff --git a/support/ebpf/bpfdefs.h b/support/ebpf/bpfdefs.h index 0a786df80..b153c8dc0 100644 --- a/support/ebpf/bpfdefs.h +++ b/support/ebpf/bpfdefs.h @@ -122,12 +122,9 @@ static long (*bpf_probe_read_user)(void *dst, int size, const void *unsafe_ptr) static long (*bpf_probe_read_kernel)(void *dst, int size, const void *unsafe_ptr) = (void *) BPF_FUNC_probe_read_kernel; - // The sizeof in bpf_trace_printk() must include \0, else no output - // is generated. The \n is not needed on 5.8+ kernels, but definitely on - // 5.4 kernels. #define printt(fmt, ...) \ ({ \ - const char ____fmt[] = fmt "\n"; \ + const char ____fmt[] = fmt; \ bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \ }) diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index f24cb4dd0..2b04fdc2a 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index 42aa86950..6c77b07d0 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/tracer/tracer.go b/tracer/tracer.go index 5a626f71b..f64d9ff39 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -96,10 +96,6 @@ type Tracer struct { // tracePool is cache of libpf.EbpfTrace to avoid GC pressure tracePool sync.Pool - // monitorPIDEventsMap iterates over the eBPF map pid_events, collects PIDs and - // writes them to the keys slice. The implementation is selected on creation. - monitorPIDEventsMapMethod func(keys *[]libpf.PIDTID) error - // triggerPIDProcessing is used as manual trigger channel to request immediate // processing of pending PIDs. This is requested on notifications from eBPF code // when process events take place (new, exit, unknown PC). @@ -114,8 +110,6 @@ type Tracer struct { // intervals provides access to globally configured timers and counters. intervals Intervals - hasBatchOperations bool - // samplesPerSecond holds the configured number of samples per second. samplesPerSecond int @@ -252,9 +246,6 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) { return nil, fmt.Errorf("failed to load eBPF maps: %v", err) } - hasBatchOperations := ebpfHandler.SupportsGenericBatchOperations() - hasBatchLookupAndDelete := ebpfHandler.SupportsGenericBatchLookupAndDelete() - processManager, err := pm.New(ctx, cfg.IncludeTracers, cfg.Intervals.MonitorInterval(), cfg.Intervals.ExecutableUnloadDelay(), ebpfHandler, cfg.TraceReporter, cfg.ExecutableReporter, elfunwindinfo.NewStackDeltaProvider(), @@ -275,7 +266,6 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) { ebpfProgs: ebpfProgs, hooks: make(map[hookPoint]link.Link), intervals: cfg.Intervals, - hasBatchOperations: hasBatchOperations, perfEntrypoints: xsync.NewRWMutex(perfEventList), samplesPerSecond: cfg.SamplesPerSecond, probabilisticInterval: cfg.ProbabilisticInterval, @@ -283,13 +273,6 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) { done: make(chan libpf.Void), } - // Use an optimized version if available - if hasBatchLookupAndDelete { - tracer.monitorPIDEventsMapMethod = (*tracer).monitorPIDEventsMapBatch - } else { - tracer.monitorPIDEventsMapMethod = (*tracer).monitorPIDEventsMapSingle - } - return tracer, nil } @@ -887,72 +870,9 @@ func (t *Tracer) enableEvent(eventType int) { _ = inhibitEventsMap.Delete(unsafe.Pointer(&et)) } -// monitorPIDEventsMapSingle iterates over the eBPF map pid_events, collects PIDs -// and writes them to the keys slice. -func (t *Tracer) monitorPIDEventsMapSingle(keys *[]libpf.PIDTID) error { - eventsMap := t.ebpfMaps["pid_events"] - var key, nextKey uint64 - var value bool - keyFound := true - deleteBatch := make(libpf.Set[uint64]) - - // Key 0 retrieves the very first element in the hash map as - // it is guaranteed not to exist in pid_events. - key = 0 - if err := eventsMap.NextKey(unsafe.Pointer(&key), unsafe.Pointer(&nextKey)); err != nil { - if errors.Is(err, cebpf.ErrKeyNotExist) { - return nil - } - return fmt.Errorf("Failed to read from pid_events map: %v", err) - } - - for keyFound { - key = nextKey - - if err := eventsMap.Lookup(unsafe.Pointer(&key), unsafe.Pointer(&value)); err != nil { - return fmt.Errorf("Failed to lookup '%v' in pid_events: %v", key, err) - } - - // Lookup the next map entry before deleting the current one. - if err := eventsMap.NextKey(unsafe.Pointer(&key), unsafe.Pointer(&nextKey)); err != nil { - if !errors.Is(err, cebpf.ErrKeyNotExist) { - return fmt.Errorf("Failed to read from pid_events map: %v", err) - } - keyFound = false - } - - if !t.hasBatchOperations { - // Now that we have the next key, we can delete the current one. - if err := eventsMap.Delete(unsafe.Pointer(&key)); err != nil { - return fmt.Errorf("Failed to delete '%v' from pid_events: %v", key, err) - } - } else { - // Store to-be-deleted keys in a map so we can delete them all with a single - // bpf syscall. - deleteBatch[key] = libpf.Void{} - } - - // If we process keys inline with iteration (e.g. by sending them to t.pidEvents at this - // exact point), we may block sending to the channel, delay the iteration and may introduce - // race conditions (related to deletion). For that reason, keys are first collected and, - // after the iteration has finished, sent to the channel. - *keys = append(*keys, libpf.PIDTID(key)) - } - - keysToDelete := len(deleteBatch) - if keysToDelete != 0 { - keys := libpf.MapKeysToSlice(deleteBatch) - if _, err := eventsMap.BatchDelete(keys, nil); err != nil { - return fmt.Errorf("Failed to batch delete %d entries from pid_events map: %v", - keysToDelete, err) - } - } - return nil -} - -// monitorPIDEventsMapBatch iterates over the eBPF map pid_events in batches, +// monitorPIDEventsMap iterates over the eBPF map pid_events in batches, // collects PIDs and writes them to the keys slice. -func (t *Tracer) monitorPIDEventsMapBatch(keys *[]libpf.PIDTID) error { +func (t *Tracer) monitorPIDEventsMap(keys *[]libpf.PIDTID) error { eventsMap := t.ebpfMaps["pid_events"] removed := make([]uint64, 128) @@ -1132,7 +1052,7 @@ func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *libp periodiccaller.StartWithManualTrigger(ctx, t.intervals.MonitorInterval(), t.triggerPIDProcessing, func(_ bool) bool { t.enableEvent(support.EventTypeGenericPID) - err := t.monitorPIDEventsMapMethod(&pidEvents) + err := t.monitorPIDEventsMap(&pidEvents) if err != nil { log.Errorf("Failed to monitor PID events: %v", err) t.signalDone()