diff --git a/cli_flags.go b/cli_flags.go index fbc770127..bb2282ad4 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -12,7 +12,6 @@ import ( "github.com/peterbourgon/ff/v3" "go.opentelemetry.io/ebpf-profiler/internal/controller" - "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/tracer" ) @@ -64,10 +63,10 @@ var ( "If zero, monotonic-realtime clock sync will be performed once, " + "on agent startup, but not periodically." sendErrorFramesHelp = "Send error frames (devfiler only, breaks Kibana)" - offCPUThresholdHelp = fmt.Sprintf("The per-mille chance for an off-cpu event being recorded. "+ - "Valid values are in the range [1..%d], and 0 to disable off-cpu profiling."+ + offCPUThresholdHelp = fmt.Sprintf("The probability for an off-cpu event being recorded. "+ + "Valid values are in the range [0..1]. 0 disables off-cpu profiling. "+ "Default is %d.", - support.OffCPUThresholdMax, defaultOffCPUThreshold) + defaultOffCPUThreshold) envVarsHelp = "Comma separated list of environment variables that will be reported with the" + "captured profiling samples." ) @@ -123,7 +122,7 @@ func parseArgs() (*controller.Config, error) { fs.BoolVar(&args.VerboseMode, "verbose", false, verboseModeHelp) fs.BoolVar(&args.Version, "version", false, versionHelp) - fs.UintVar(&args.OffCPUThreshold, "off-cpu-threshold", + fs.Float64Var(&args.OffCPUThreshold, "off-cpu-threshold", defaultOffCPUThreshold, offCPUThresholdHelp) fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp) diff --git a/internal/controller/config.go b/internal/controller/config.go index cf457100a..16c85321c 100644 --- a/internal/controller/config.go +++ b/internal/controller/config.go @@ -10,7 +10,6 @@ import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/ebpf-profiler/reporter" - "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/tracer" ) @@ -32,7 +31,7 @@ type Config struct { Tracers string VerboseMode bool Version bool - OffCPUThreshold uint + OffCPUThreshold float64 Reporter reporter.Reporter @@ -89,12 +88,10 @@ func (cfg *Config) Validate() error { ) } - if cfg.OffCPUThreshold > support.OffCPUThresholdMax { - return fmt.Errorf( - "invalid argument for off-cpu-threshold. Value "+ - "should be between 1 and %d, or 0 to disable off-cpu profiling", - support.OffCPUThresholdMax, - ) + if cfg.OffCPUThreshold < 0.0 || cfg.OffCPUThreshold > 1.0 { + return errors.New( + "invalid argument for off-cpu-threshold. The value " + + "should be in the range [0..1]. 0 disables off-cpu profiling") } if !cfg.NoKernelVersionCheck { diff --git a/internal/controller/controller.go b/internal/controller/controller.go index bb3b0e8ae..f9eeac858 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -3,6 +3,7 @@ package controller // import "go.opentelemetry.io/ebpf-profiler/internal/control import ( "context" "fmt" + "math" "strings" "time" @@ -95,7 +96,7 @@ func (c *Controller) Start(ctx context.Context) error { BPFVerifierLogLevel: uint32(c.config.BpfVerifierLogLevel), ProbabilisticInterval: c.config.ProbabilisticInterval, ProbabilisticThreshold: c.config.ProbabilisticThreshold, - OffCPUThreshold: uint32(c.config.OffCPUThreshold), + OffCPUThreshold: uint32(c.config.OffCPUThreshold * float64(math.MaxUint32)), IncludeEnvVars: envVars, }) if err != nil { @@ -117,11 +118,11 @@ func (c *Controller) Start(ctx context.Context) error { } log.Info("Attached tracer program") - if c.config.OffCPUThreshold > 0 { + if c.config.OffCPUThreshold > 0.0 { if err := trc.StartOffCPUProfiling(); err != nil { return fmt.Errorf("failed to start off-cpu profiling: %v", err) } - log.Printf("Enabled off-cpu profiling") + log.Printf("Enabled off-cpu profiling with p=%f", c.config.OffCPUThreshold) } if c.config.ProbabilisticThreshold < tracer.ProbabilisticThresholdMax { diff --git a/support/ebpf/off_cpu.ebpf.c b/support/ebpf/off_cpu.ebpf.c index 156c7850d..d260bc2fb 100644 --- a/support/ebpf/off_cpu.ebpf.c +++ b/support/ebpf/off_cpu.ebpf.c @@ -37,7 +37,7 @@ int tracepoint__sched_switch(void *ctx) return ERR_UNREACHABLE; } - if (bpf_get_prandom_u32() % OFF_CPU_THRESHOLD_MAX >= syscfg->off_cpu_threshold) { + if (bpf_get_prandom_u32() > syscfg->off_cpu_threshold) { return 0; } diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index b8d948e91..93676d910 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index d980ed81b..8da947615 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/support/ebpf/types.h b/support/ebpf/types.h index 4fd4a575f..c4635f4e3 100644 --- a/support/ebpf/types.h +++ b/support/ebpf/types.h @@ -346,9 +346,6 @@ typedef enum TraceOrigin { TRACE_OFF_CPU, } TraceOrigin; -// OFF_CPU_THRESHOLD_MAX defines the maximum threshold. -#define OFF_CPU_THRESHOLD_MAX 1000 - // MAX_FRAME_UNWINDS defines the maximum number of frames per // Trace we can unwind and respect the limit of eBPF instructions, // limit of tail calls and limit of stack size per eBPF program. diff --git a/support/types.go b/support/types.go index 836f10a2e..e21b8fc53 100644 --- a/support/types.go +++ b/support/types.go @@ -85,8 +85,6 @@ const ( TraceOriginOffCPU = 0x2 ) -const OffCPUThresholdMax = 0x3e8 - type ApmIntProcInfo struct { Offset uint64 } diff --git a/support/types_def.go b/support/types_def.go index d04b153e1..a141a2340 100644 --- a/support/types_def.go +++ b/support/types_def.go @@ -95,8 +95,6 @@ const ( TraceOriginOffCPU = C.TRACE_OFF_CPU ) -const OffCPUThresholdMax = C.OFF_CPU_THRESHOLD_MAX - type ApmIntProcInfo C.ApmIntProcInfo type DotnetProcInfo C.DotnetProcInfo type PHPProcInfo C.PHPProcInfo diff --git a/testutils/helpers.go b/testutils/helpers.go index 3cc04847a..7d68323a7 100644 --- a/testutils/helpers.go +++ b/testutils/helpers.go @@ -5,6 +5,7 @@ import ( "context" "errors" "io" + "math" "os" "strings" "testing" @@ -12,6 +13,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/reporter" @@ -49,6 +51,7 @@ func StartTracer(ctx context.Context, t *testing.T, et tracertypes.IncludedTrace SamplesPerSecond: 20, ProbabilisticInterval: 100, ProbabilisticThreshold: 100, + OffCPUThreshold: uint32(math.MaxUint32 / 100), DebugTracer: true, }) require.NoError(t, err) diff --git a/tracer/ebpf_integration_test.go b/tracer/ebpf_integration_test.go index 9e85ea6bd..eb7bb202d 100644 --- a/tracer/ebpf_integration_test.go +++ b/tracer/ebpf_integration_test.go @@ -7,6 +7,7 @@ package tracer_test import ( "context" + "math" "runtime" "sync" "testing" @@ -114,7 +115,7 @@ func TestTraceTransmissionAndParsing(t *testing.T) { BPFVerifierLogLevel: 0, ProbabilisticInterval: 100, ProbabilisticThreshold: 100, - OffCPUThreshold: support.OffCPUThresholdMax, + OffCPUThreshold: 1 * math.MaxUint32, DebugTracer: true, }) require.NoError(t, err) diff --git a/tracer/tracer.go b/tracer/tracer.go index e8754e4d1..20e5b8ff7 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -9,6 +9,7 @@ import ( "context" "errors" "fmt" + "math" "math/rand/v2" "strings" "sync/atomic" @@ -448,11 +449,7 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config, adaption["stack_delta_page_to_info"] = 1 << uint32(stackDeltaPageToInfoSize+cfg.MapScaleFactor) - // To not lose too many scheduling events but also not oversize sched_times, - // calculate a size based on an assumed upper bound of scheduler events per - // second (1000hz) multiplied by an average time a task remains off CPU (3s), - // scaled by the probability of capturing a trace. - adaption["sched_times"] = (4096 * cfg.OffCPUThreshold) / support.OffCPUThresholdMax + adaption["sched_times"] = schedTimesSize(cfg.OffCPUThreshold) for i := support.StackDeltaBucketSmallest; i <= support.StackDeltaBucketLargest; i++ { mapName := fmt.Sprintf("exe_id_to_%d_stack_deltas", i) @@ -478,6 +475,25 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config, return nil } +// schedTimesSize calculates the size of the sched_times map based on the +// configured off-cpu threshold. +// To not lose too many scheduling events but also not oversize sched_times, +// calculate a size based on an assumed upper bound of scheduler events per +// second (1000hz) multiplied by an average time a task remains off CPU (3s), +// scaled by the probability of capturing a trace. +func schedTimesSize(threshold uint32) uint32 { + size := uint32((4096 * uint64(threshold)) / math.MaxUint32) + if size < 16 { + // Guarantee a minimal size of 16. + return 16 + } + if size > 4096 { + // Guarantee a maximum size of 4096. + return 4096 + } + return size +} + // loadPerfUnwinders loads all perf eBPF Programs and their tail call targets. func loadPerfUnwinders(coll *cebpf.CollectionSpec, ebpfProgs map[string]*cebpf.Program, tailcallMap *cebpf.Map, tailCallProgs []progLoaderHelper,