diff --git a/docs/sources/reference/components/pyroscope/pyroscope.ebpf.md b/docs/sources/reference/components/pyroscope/pyroscope.ebpf.md index 62054d1408d..fa6155d50de 100644 --- a/docs/sources/reference/components/pyroscope/pyroscope.ebpf.md +++ b/docs/sources/reference/components/pyroscope/pyroscope.ebpf.md @@ -111,6 +111,63 @@ Several arguments are marked as "Deprecated (no-op)". These arguments were previ * `pyroscope_ebpf_profiling_sessions_total` (counter): Number of profiling sessions completed. * `pyroscope_fanout_latency` (histogram): Write latency for sending to direct and indirect components. +### eBPF profiler internal metrics + +The component also exposes internal metrics from the embedded eBPF profiler. +These metrics are only emitted after they have been recorded at least once, so not all metrics appear on every host. +The metrics carry an `otel_scope_name="pyroscope.ebpf"` label. + +Notable metrics include: + +#### Native unwinding + +* `UnwindNativeAttempts_total` (counter): Unwind attempts since the previous check. +* `UnwindNativeFrames_total` (counter): Unwound frames since the previous check. +* `UnwindNativeStackDeltaStop_total` (counter): Number of stop stack deltas in the native unwinder (success). +* `UnwindNativeSmallPC_total` (counter): Number of times PC held a value smaller than 0x1000. +* `UnwindErrStackLengthExceeded_total` (counter): Number of times MAX_FRAME_UNWINDS has been exceeded. + +#### Interpreter unwinding + +* `UnwindPythonAttempts_total` (counter): Number of attempted Python unwinds. +* `UnwindPythonFrames_total` (counter): Number of unwound Python frames. +* `UnwindHotspotAttempts_total` (counter): Number of attempted Hotspot JVM unwinds. +* `UnwindHotspotFrames_total` (counter): Number of unwound Hotspot JVM frames. +* `UnwindRubyAttempts_total` (counter): Number of attempted Ruby unwinds. +* `UnwindRubyFrames_total` (counter): Number of unwound Ruby frames. +* `UnwindPHPAttempts_total` (counter): Number of attempted PHP unwinds. +* `UnwindPHPFrames_total` (counter): Number of unwound PHP frames. +* `UnwindPerlAttempts_total` (counter): Number of attempted Perl unwinds. +* `UnwindPerlFrames_total` (counter): Number of unwound Perl frames. +* `UnwindV8Attempts_total` (counter): Number of attempted V8 unwinds. +* `UnwindV8Frames_total` (counter): Number of unwound V8 frames. +* `UnwindDotnetAttempts_total` (counter): Number of attempted .NET unwinds. +* `UnwindDotnetFrames_total` (counter): Number of unwound .NET frames. + +#### Symbolization + +* `PythonSymbolizationSuccesses_total` (counter): Number of successfully symbolized Python frames. +* `PythonSymbolizationFailures_total` (counter): Number of Python frames that failed symbolization. +* `HotspotSymbolizationSuccesses_total` (counter): Number of successfully symbolized Hotspot frames. +* `HotspotSymbolizationFailures_total` (counter): Number of Hotspot frames that failed symbolization. +* `RubySymbolizationSuccess_total` (counter): Number of successfully symbolized Ruby frames. +* `RubySymbolizationFailure_total` (counter): Number of Ruby frames that failed symbolization. + +#### Process management + +* `NumProcNew_total` (counter): Number of new PID events. +* `NumProcExit_total` (counter): Number of exit PID events. +* `NumGenericPID_total` (counter): Number of generic PID events. + +#### eBPF map state + +* `NumExeIDLoadedToEBPF` (gauge): The number of executables loaded to eBPF maps. +* `HashmapPidPageToMappingInfo` (gauge): Current size of the pid_page_to_mapping_info hash map. +* `HashmapNumStackDeltaPages` (gauge): Current size of the stack delta pages hash map. +* `UnwindInfoArraySize` (gauge): Current size of the unwind info array. + +The full list of ~213 metrics is defined in the [`opentelemetry-ebpf-profiler` metrics.json](https://github.com/grafana/opentelemetry-ebpf-profiler/blob/main/metrics/metrics.json). + ## Profile collecting behavior The `pyroscope.ebpf` component collects stack traces associated with a process running on the current host. diff --git a/internal/component/pyroscope/ebpf/ebpf_linux.go b/internal/component/pyroscope/ebpf/ebpf_linux.go index 593586ccce1..39712e5e0a6 100644 --- a/internal/component/pyroscope/ebpf/ebpf_linux.go +++ b/internal/component/pyroscope/ebpf/ebpf_linux.go @@ -4,6 +4,7 @@ package ebpf import ( "context" + "fmt" "os" "path" "path/filepath" @@ -29,7 +30,8 @@ import ( "go.opentelemetry.io/ebpf-profiler/pyroscope/internalshim/controller" reporter2 "go.opentelemetry.io/ebpf-profiler/reporter" - metricnoop "go.opentelemetry.io/otel/metric/noop" + sdkprometheus "go.opentelemetry.io/otel/exporters/prometheus" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" "github.com/grafana/alloy/internal/component" "github.com/grafana/alloy/internal/component/pyroscope" @@ -53,11 +55,37 @@ func init() { }, }) python.NoContinueWithNextUnwinder.Store(true) - // Disable ebpf profiler metrics - ebpfmetrics.Start(metricnoop.Meter{}) } +var ( + ebpfMetricsOnce sync.Once + ebpfMetricsRegistry *prometheus.Registry // reused by all instances + ebpfMetricsErr error // stored for all instances to check +) + func New(logger log.Logger, reg prometheus.Registerer, id string, args Arguments) (*Component, error) { + // ebpfmetrics.Start writes to package-level globals in the upstream library, + // so it must only be called once. All instances share the same OTel registry. + ebpfMetricsOnce.Do(func() { + ebpfMetricsRegistry = prometheus.NewRegistry() + promExporter, err := sdkprometheus.New( + sdkprometheus.WithRegisterer(ebpfMetricsRegistry), + sdkprometheus.WithoutTargetInfo(), + ) + if err != nil { + ebpfMetricsErr = fmt.Errorf("creating OTel prometheus exporter: %w", err) + return + } + mp := sdkmetric.NewMeterProvider(sdkmetric.WithReader(promExporter)) + ebpfmetrics.Start(mp.Meter("pyroscope.ebpf")) + }) + if ebpfMetricsErr != nil { + return nil, ebpfMetricsErr + } + if reg != nil { + reg.MustRegister(ebpfMetricsRegistry) + } + cfg, err := args.Convert() if err != nil { return nil, err