Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions cli_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"github.com/peterbourgon/ff/v3"

"go.opentelemetry.io/ebpf-profiler/internal/controller"
"go.opentelemetry.io/ebpf-profiler/support"
"go.opentelemetry.io/ebpf-profiler/tracer"
)

Expand Down Expand Up @@ -64,10 +63,10 @@ var (
"If zero, monotonic-realtime clock sync will be performed once, " +
"on agent startup, but not periodically."
sendErrorFramesHelp = "Send error frames (devfiler only, breaks Kibana)"
offCPUThresholdHelp = fmt.Sprintf("The per-mille chance for an off-cpu event being recorded. "+
"Valid values are in the range [1..%d], and 0 to disable off-cpu profiling."+
offCPUThresholdHelp = fmt.Sprintf("The probability for an off-cpu event being recorded. "+
"Valid values are in the range [0..1]. 0 disables off-cpu profiling. "+
"Default is %d.",
support.OffCPUThresholdMax, defaultOffCPUThreshold)
defaultOffCPUThreshold)
envVarsHelp = "Comma separated list of environment variables that will be reported with the" +
"captured profiling samples."
)
Expand Down Expand Up @@ -123,7 +122,7 @@ func parseArgs() (*controller.Config, error) {
fs.BoolVar(&args.VerboseMode, "verbose", false, verboseModeHelp)
fs.BoolVar(&args.Version, "version", false, versionHelp)

fs.UintVar(&args.OffCPUThreshold, "off-cpu-threshold",
fs.Float64Var(&args.OffCPUThreshold, "off-cpu-threshold",
defaultOffCPUThreshold, offCPUThresholdHelp)

fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp)
Expand Down
13 changes: 5 additions & 8 deletions internal/controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
log "github.com/sirupsen/logrus"

"go.opentelemetry.io/ebpf-profiler/reporter"
"go.opentelemetry.io/ebpf-profiler/support"
"go.opentelemetry.io/ebpf-profiler/tracer"
)

Expand All @@ -32,7 +31,7 @@ type Config struct {
Tracers string
VerboseMode bool
Version bool
OffCPUThreshold uint
OffCPUThreshold float64

Reporter reporter.Reporter

Expand Down Expand Up @@ -89,12 +88,10 @@ func (cfg *Config) Validate() error {
)
}

if cfg.OffCPUThreshold > support.OffCPUThresholdMax {
return fmt.Errorf(
"invalid argument for off-cpu-threshold. Value "+
"should be between 1 and %d, or 0 to disable off-cpu profiling",
support.OffCPUThresholdMax,
)
if cfg.OffCPUThreshold < 0.0 || cfg.OffCPUThreshold > 1.0 {
return errors.New(
"invalid argument for off-cpu-threshold. The value " +
"should be in the range [0..1]. 0 disables off-cpu profiling")
}

if !cfg.NoKernelVersionCheck {
Expand Down
7 changes: 4 additions & 3 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controller // import "go.opentelemetry.io/ebpf-profiler/internal/control
import (
"context"
"fmt"
"math"
"strings"
"time"

Expand Down Expand Up @@ -95,7 +96,7 @@ func (c *Controller) Start(ctx context.Context) error {
BPFVerifierLogLevel: uint32(c.config.BpfVerifierLogLevel),
ProbabilisticInterval: c.config.ProbabilisticInterval,
ProbabilisticThreshold: c.config.ProbabilisticThreshold,
OffCPUThreshold: uint32(c.config.OffCPUThreshold),
OffCPUThreshold: uint32(c.config.OffCPUThreshold * float64(math.MaxUint32)),
IncludeEnvVars: envVars,
})
if err != nil {
Expand All @@ -117,11 +118,11 @@ func (c *Controller) Start(ctx context.Context) error {
}
log.Info("Attached tracer program")

if c.config.OffCPUThreshold > 0 {
if c.config.OffCPUThreshold > 0.0 {
if err := trc.StartOffCPUProfiling(); err != nil {
return fmt.Errorf("failed to start off-cpu profiling: %v", err)
}
log.Printf("Enabled off-cpu profiling")
log.Printf("Enabled off-cpu profiling with p=%f", c.config.OffCPUThreshold)
}

if c.config.ProbabilisticThreshold < tracer.ProbabilisticThresholdMax {
Expand Down
2 changes: 1 addition & 1 deletion support/ebpf/off_cpu.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int tracepoint__sched_switch(void *ctx)
return ERR_UNREACHABLE;
}

if (bpf_get_prandom_u32() % OFF_CPU_THRESHOLD_MAX >= syscfg->off_cpu_threshold) {
if (bpf_get_prandom_u32() > syscfg->off_cpu_threshold) {
return 0;
}

Expand Down
Binary file modified support/ebpf/tracer.ebpf.amd64
Binary file not shown.
Binary file modified support/ebpf/tracer.ebpf.arm64
Binary file not shown.
3 changes: 0 additions & 3 deletions support/ebpf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,6 @@ typedef enum TraceOrigin {
TRACE_OFF_CPU,
} TraceOrigin;

// OFF_CPU_THRESHOLD_MAX defines the maximum threshold.
#define OFF_CPU_THRESHOLD_MAX 1000

// MAX_FRAME_UNWINDS defines the maximum number of frames per
// Trace we can unwind and respect the limit of eBPF instructions,
// limit of tail calls and limit of stack size per eBPF program.
Expand Down
2 changes: 0 additions & 2 deletions support/types.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions support/types_def.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ const (
TraceOriginOffCPU = C.TRACE_OFF_CPU
)

const OffCPUThresholdMax = C.OFF_CPU_THRESHOLD_MAX

type ApmIntProcInfo C.ApmIntProcInfo
type DotnetProcInfo C.DotnetProcInfo
type PHPProcInfo C.PHPProcInfo
Expand Down
3 changes: 3 additions & 0 deletions testutils/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ import (
"context"
"errors"
"io"
"math"
"os"
"strings"
"testing"
"time"

log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"

"go.opentelemetry.io/ebpf-profiler/host"
"go.opentelemetry.io/ebpf-profiler/libpf"
"go.opentelemetry.io/ebpf-profiler/reporter"
Expand Down Expand Up @@ -49,6 +51,7 @@ func StartTracer(ctx context.Context, t *testing.T, et tracertypes.IncludedTrace
SamplesPerSecond: 20,
ProbabilisticInterval: 100,
ProbabilisticThreshold: 100,
OffCPUThreshold: uint32(math.MaxUint32 / 100),
DebugTracer: true,
})
require.NoError(t, err)
Expand Down
3 changes: 2 additions & 1 deletion tracer/ebpf_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package tracer_test

import (
"context"
"math"
"runtime"
"sync"
"testing"
Expand Down Expand Up @@ -114,7 +115,7 @@ func TestTraceTransmissionAndParsing(t *testing.T) {
BPFVerifierLogLevel: 0,
ProbabilisticInterval: 100,
ProbabilisticThreshold: 100,
OffCPUThreshold: support.OffCPUThresholdMax,
OffCPUThreshold: 1 * math.MaxUint32,
DebugTracer: true,
})
require.NoError(t, err)
Expand Down
26 changes: 21 additions & 5 deletions tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"context"
"errors"
"fmt"
"math"
"math/rand/v2"
"strings"
"sync/atomic"
Expand Down Expand Up @@ -448,11 +449,7 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config,
adaption["stack_delta_page_to_info"] =
1 << uint32(stackDeltaPageToInfoSize+cfg.MapScaleFactor)

// To not lose too many scheduling events but also not oversize sched_times,
// calculate a size based on an assumed upper bound of scheduler events per
// second (1000hz) multiplied by an average time a task remains off CPU (3s),
// scaled by the probability of capturing a trace.
adaption["sched_times"] = (4096 * cfg.OffCPUThreshold) / support.OffCPUThresholdMax
adaption["sched_times"] = schedTimesSize(cfg.OffCPUThreshold)

for i := support.StackDeltaBucketSmallest; i <= support.StackDeltaBucketLargest; i++ {
mapName := fmt.Sprintf("exe_id_to_%d_stack_deltas", i)
Expand All @@ -478,6 +475,25 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config,
return nil
}

// schedTimesSize calculates the size of the sched_times map based on the
// configured off-cpu threshold.
// To not lose too many scheduling events but also not oversize sched_times,
// calculate a size based on an assumed upper bound of scheduler events per
// second (1000hz) multiplied by an average time a task remains off CPU (3s),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[..] an average time a task remains off CPU (3s) - is there some evidence for this number? From local workload I see significant different (lower) values.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't change the text, just moved it. Maybe @christos68k can give some background how exactly he measured this number.

Copy link
Copy Markdown
Contributor Author

@rockdaboot rockdaboot May 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise, what is your preferred number here @florianl?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went for relaxed rather than tight sizing here.

// scaled by the probability of capturing a trace.
func schedTimesSize(threshold uint32) uint32 {
size := uint32((4096 * uint64(threshold)) / math.MaxUint32)
if size < 16 {
// Guarantee a minimal size of 16.
return 16
}
if size > 4096 {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With a given probability value of 1.0 I would expect every scheduling event to show up. With this change, this is not possible, as the size of sched_times becomes the limiting factor.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The behavior for 1.0 is the same as for 1000 before.
Before: adaption["sched_times"] = (4096 * cfg.OffCPUThreshold) / support.OffCPUThresholdMax results in 4096 for all (=1000).
This PR: User enters 1.0, which is threshold = math.MaxUint32, so the result is also 4096.

// Guarantee a maximum size of 4096.
return 4096
}
return size
}

// loadPerfUnwinders loads all perf eBPF Programs and their tail call targets.
func loadPerfUnwinders(coll *cebpf.CollectionSpec, ebpfProgs map[string]*cebpf.Program,
tailcallMap *cebpf.Map, tailCallProgs []progLoaderHelper,
Expand Down