Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions interpreter/python/python.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,10 +483,13 @@ func (p *pythonInstance) getCodeObject(addr libpf.Address,
if addr == 0 {
return nil, errors.New("failed to read code object: null pointer")
}
if value, ok := p.addrToCodeObject.Get(addr); ok {
m := value
if m.ebpfChecksum == ebpfChecksum {
return m, nil
if ebpfChecksum != 0 {
// A zero checksum indicates code object read failed in the kernel (e.g. paged out).
if value, ok := p.addrToCodeObject.Get(addr); ok {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if value, ok := p.addrToCodeObject.Get(addr); ok {
// A zero checksum indicates code object read failed during the kernel read attempt (e.g. paged out).
if value, ok := p.addrToCodeObject.Get(addr); ok {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the previous suggestion got wiped in the rebase?

Suggested change
if value, ok := p.addrToCodeObject.Get(addr); ok {
// A zero checksum indicates code object read failed in the kernel (e.g. paged out).
if value, ok := p.addrToCodeObject.Get(addr); ok {

m := value
if m.ebpfChecksum == ebpfChecksum {
return m, nil
}
}
}

Expand Down Expand Up @@ -541,7 +544,7 @@ func (p *pythonInstance) getCodeObject(addr libpf.Address,

ebpfChecksumCalculated := (argCount << 25) + (kwonlyArgCount << 18) +
(flags << 10) + firstLineNo
if ebpfChecksum != ebpfChecksumCalculated {
if ebpfChecksum != 0 && ebpfChecksum != ebpfChecksumCalculated {
return nil, fmt.Errorf("read code object was stale: %x != %x",
ebpfChecksum, ebpfChecksumCalculated)
}
Expand All @@ -562,7 +565,7 @@ func (p *pythonInstance) getCodeObject(addr libpf.Address,
sourceFileName: libpf.Intern(sourceFileName),
firstLineNo: firstLineNo,
lineTable: lineTable,
ebpfChecksum: ebpfChecksum,
ebpfChecksum: ebpfChecksumCalculated,
}
p.addrToCodeObject.Add(addr, pco)
return pco, nil
Expand Down
8 changes: 8 additions & 0 deletions support/ebpf/bpfdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ static inline long bpf_probe_read_user(void *buf, u32 sz, const void *ptr)
return __bpf_probe_read_user(__cgo_ctx->id, buf, sz, ptr);
}

static inline long bpf_probe_read_user_with_test_fault(void *buf, u32 sz, const void *ptr)
{
long __bpf_probe_read_user_with_test_fault(u64, void *, u32, const void *);
return __bpf_probe_read_user_with_test_fault(__cgo_ctx->id, buf, sz, ptr);
}

static inline long bpf_probe_read_kernel(UNUSED void *buf, UNUSED u32 sz, UNUSED const void *ptr)
{
return -1;
Expand Down Expand Up @@ -122,6 +128,8 @@ static long (*bpf_probe_read_user)(void *dst, int size, const void *unsafe_ptr)
static long (*bpf_probe_read_kernel)(void *dst, int size, const void *unsafe_ptr) = (void *)
BPF_FUNC_probe_read_kernel;

#define bpf_probe_read_user_with_test_fault bpf_probe_read_user

#define printt(fmt, ...) \
({ \
const char ____fmt[] = fmt; \
Expand Down
9 changes: 7 additions & 2 deletions support/ebpf/python_tracer.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,15 @@ static EBPF_INLINE ErrorCode process_python_frame(
}

// Read PyCodeObject
if (bpf_probe_read_user(pss->code, sizeof(pss->code), py_codeobject)) {
if (bpf_probe_read_user_with_test_fault(pss->code, sizeof(pss->code), py_codeobject)) {
DEBUG_PRINT("Failed to read PyCodeObject at 0x%lx", (unsigned long)(py_codeobject));
increment_metric(metricID_UnwindPythonErrBadCodeObjectArgCountAddr);
return ERR_PYTHON_BAD_CODE_OBJECT_ADDR;
// Push the frame with the code object address so the agent can try to
// read it in userspace (which can take page faults unlike BPF).
// codeobject_id=0 distinguishes this from a successful read.
file_id = (u64)py_codeobject;
lineno = py_encode_lineno(0, (u32)py_f_lasti);
goto push_frame;
}

int py_argcount = *(int *)(&pss->code[pyinfo->PyCodeObject_co_argcount]);
Expand Down
Binary file modified support/ebpf/tracer.ebpf.amd64
Binary file not shown.
Binary file modified support/ebpf/tracer.ebpf.arm64
Binary file not shown.
2 changes: 1 addition & 1 deletion tools/coredump/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (cmd *analyzeCmd) exec(context.Context, []string) (err error) {
}
defer proc.Close()

threads, err := ExtractTraces(context.Background(), proc, cmd.debugEbpf, lwpFilter)
threads, err := ExtractTraces(context.Background(), proc, cmd.debugEbpf, lwpFilter, nil)
if err != nil {
return fmt.Errorf("failed to extract traces: %w", err)
}
Expand Down
4 changes: 2 additions & 2 deletions tools/coredump/coredump.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (t *traceReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.Trace
}

func ExtractTraces(ctx context.Context, pr process.Process, debug bool,
lwpFilter libpf.Set[libpf.PID]) ([]ThreadInfo, error) {
lwpFilter libpf.Set[libpf.PID], faultAddresses map[uintptr]int) ([]ThreadInfo, error) {
todo, cancel := context.WithCancel(ctx)
defer cancel()

Expand Down Expand Up @@ -159,7 +159,7 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool,
}

// Interfaces for the managers
ebpfCtx := newEBPFContext(pr)
ebpfCtx := newEBPFContext(pr, faultAddresses)
defer ebpfCtx.release()

inverse_pac_mask := ^(pr.GetMachineData().CodePACMask)
Expand Down
35 changes: 34 additions & 1 deletion tools/coredump/coredump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,33 @@
package main

import (
"strconv"
"testing"

"github.com/stretchr/testify/require"
"go.opentelemetry.io/ebpf-profiler/tools/coredump/cloudstore"
"go.opentelemetry.io/ebpf-profiler/tools/coredump/modulestore"
)

// parseFaultAddresses converts the hex/decimal address strings from a test
// case JSON into the uintptr-keyed map consumed by the ebpfContext. The int
// values are hit counters initialized to 0; ExtractTraces will fail the test
// if any remain 0 after the unwind. ParseUint with base=0 honors a "0x"
// prefix, so both "0x7f12..." and decimal forms work.
func parseFaultAddresses(t *testing.T, raw []string) map[uintptr]int {
t.Helper()
if len(raw) == 0 {
return nil
}
out := make(map[uintptr]int, len(raw))
for _, s := range raw {
v, err := strconv.ParseUint(s, 0, 64)
require.NoErrorf(t, err, "invalid fault-address %q", s)
out[uintptr(v)] = 0
}
return out
}

func TestCoreDumps(t *testing.T) {
cases, err := findTestCases(true)
require.NoError(t, err)
Expand All @@ -34,10 +54,23 @@ func TestCoreDumps(t *testing.T) {
require.NoError(t, err)
defer core.Close()

data, err := ExtractTraces(t.Context(), core, false, nil)
faults := parseFaultAddresses(t, testCase.FaultAddresses)
data, err := ExtractTraces(t.Context(), core, false, nil, faults)

require.NoError(t, err)
require.Equal(t, testCase.Threads, data)

// Every fault address listed in the test case must have been
// visited at least once by bpf_probe_read_user_with_test_fault;
// otherwise the test isn't actually exercising the recovery path
// it claims to (e.g. a stale address that the unwinder no longer
// reads). The map is mutated in place by the helper, so we can
// just iterate the post-run state.
for addr, hits := range faults {
require.Greaterf(t, hits, 0,
"fault address 0x%x was never visited by "+
"bpf_probe_read_user_with_test_fault", addr)
}
})
}
}
16 changes: 14 additions & 2 deletions tools/coredump/ebpfcontext.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ type ebpfContext struct {

// stackDeltaFileID is context variable for nested map lookups
stackDeltaFileID C.u64

// faultAddresses maps user-space addresses on which
// bpf_probe_read_user_with_test_fault should pretend the kernel could not
// read (returns -1) to a hit counter. The presence of a key (regardless of
// value) is what triggers the fault; the int value records how many times
// the helper visited that address during the unwind so tests can assert
// every injected fault actually exercised the code path under test.
faultAddresses map[uintptr]int
}

// ebpfContextMap is global mapping of EBPFContext id (PIDandTGID) to the actual data.
Expand All @@ -65,8 +73,11 @@ type ebpfContext struct {
// passed directly to the C code).
var ebpfContextMap = map[C.u64]*ebpfContext{}

// newEBPFContext creates new EBPF Context from given core dump image
func newEBPFContext(pr process.Process) *ebpfContext {
// newEBPFContext creates new EBPF Context from given core dump image. The
// faultAddresses map, if non-empty, instructs bpf_probe_read_user_with_test_fault
// to return -1 for those addresses; the int value of each entry is incremented
// each time the helper visits the address.
func newEBPFContext(pr process.Process, faultAddresses map[uintptr]int) *ebpfContext {
pid := pr.PID()
ctx := &ebpfContext{
trace: libpf.EbpfTrace{PID: pid},
Expand All @@ -78,6 +89,7 @@ func newEBPFContext(pr process.Process) *ebpfContext {
maps: make(map[unsafe.Pointer]map[any]unsafe.Pointer),
perCPURecord: C.malloc(C.sizeof_PerCPURecord),
unwindInfoArray: C.malloc(C.sizeof_UnwindInfo * C.ulong(support.UnwindInfoMaxEntries)),
faultAddresses: faultAddresses,
}
ebpfContextMap[ctx.PIDandTGID] = ctx
return ctx
Expand Down
25 changes: 25 additions & 0 deletions tools/coredump/ebpfhelpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,31 @@ func __bpf_probe_read_user(id C.u64, buf unsafe.Pointer, sz C.int, ptr unsafe.Po
return 0
}

//export __bpf_probe_read_user_with_test_fault
func __bpf_probe_read_user_with_test_fault(
id C.u64, buf unsafe.Pointer, sz C.int, ptr unsafe.Pointer,
) C.long {
ctx := ebpfContextMap[id]
addr := uintptr(ptr)
// Trace every call so coredump test authors can grep the test output to
// pick a candidate address (e.g. the 192-byte read of a PyCodeObject) when
// constructing a fault-injection test case.
log.Debugf("bpf_probe_read_user_with_test_fault: sz=%d ptr=0x%x", int(sz), addr)
if _, ok := ctx.faultAddresses[addr]; ok {
// This log line stays at Info level so it's visible in CI when a
// fault-injection test actually exercises the recovery path.
log.Infof("bpf_probe_read_user_with_test_fault: injecting fault at 0x%x (sz=%d)",
addr, int(sz))
ctx.faultAddresses[addr]++
return -1
}
dst := sliceBuffer(buf, sz)
if _, err := ctx.remoteMemory.ReadAt(dst, int64(addr)); err != nil {
return -1
}
return 0
}

// stackDeltaInnerMap is a special map returned to C code to indicate that
// we are accessing one of nested maps in the exe_id_to_X_stack_deltas maps
var stackDeltaInnerMap = C.malloc(1)
Expand Down
5 changes: 5 additions & 0 deletions tools/coredump/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ type CoredumpTestCase struct {
Skip string `json:"skip,omitempty"`
Threads []ThreadInfo `json:"threads"`
Modules []ModuleInfo `json:"modules"`
// FaultAddresses is an optional list of user-space addresses (hex strings,
// e.g. "0x7f1234567000") at which the test harness should make
// bpf_probe_read_user_with_test_fault return -1, simulating a BPF read
// failure. Used to exercise recovery paths.
FaultAddresses []string `json:"fault-addresses,omitempty"`
}

// ModuleInfo stores information about a module that was loaded when the coredump was created.
Expand Down
2 changes: 1 addition & 1 deletion tools/coredump/new.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (cmd *newCmd) exec(context.Context, []string) (err error) {

testCase := &CoredumpTestCase{}

testCase.Threads, err = ExtractTraces(context.Background(), core, cmd.debugEbpf, nil)
testCase.Threads, err = ExtractTraces(context.Background(), core, cmd.debugEbpf, nil, nil)
if err != nil {
return fmt.Errorf("failed to extract traces: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion tools/coredump/rebase.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (cmd *rebaseCmd) exec(context.Context, []string) (err error) {
return fmt.Errorf("failed to open coredump: %w", err)
}

testCase.Threads, err = ExtractTraces(context.Background(), core, false, nil)
testCase.Threads, err = ExtractTraces(context.Background(), core, false, nil, nil)
_ = core.Close()
if err != nil {
return fmt.Errorf("failed to extract traces: %w", err)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"coredump-ref": "a69bc3dea27b197295177ab1645783ddf27e682fafc9102ef7723b05c528285c",
"fault-addresses": [
"0x7fb421b7cb70",
"0x7fb421b51c30"
],
"threads": [
{
"lwp": 28509,
"frames": [
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"recur_fibo+4 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:7",
"<module>+9 in /tmp/opentelemetry-ebpf-profiler/tools/coredump/testsources/python/fib.py:10",
"?+0x0",
"python3.14+0x5454c6",
"python3.14+0x53ffa0",
"python3.14+0x6b741e",
"python3.14+0x6b3dcf",
"python3.14+0x6b377d",
"python3.14+0x6b3271",
"python3.14+0x6b0552",
"python3.14+0x65ea68",
"libc.so.6+0x29f74",
"libc.so.6+0x2a026",
"python3.14+0x65de70"
]
}
],
"modules": [
{
"ref": "85590dd58edf5445e18bc7193e5ebc01ac5841f1ae187e97705a662e90c6421e",
"local-path": "/usr/lib/x86_64-linux-gnu/libz.so.1.3.1"
},
{
"ref": "e9a55da498abf2f190abd96115c7b9381d51968729b35e9d351e6a16cc951e5c",
"local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6"
},
{
"ref": "f5ffb9a6143905c445c980b7430ea94ad800b3c15dc83d5ec7667c6807f595dd",
"local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2"
},
{
"ref": "af6ab77528a4732280b9e158c987893a492ae315929277493fb60f0b4d381d8c",
"local-path": "/usr/bin/python3.14"
},
{
"ref": "7d771b3a3182ca9193afafa461e099af06ec929b6171bb1c20920c11af3e5850",
"local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6"
},
{
"ref": "01ed980b9420653bfa242cdddcfed0b59c5dc066c79d95b8637a52d0135a64f1",
"local-path": "/usr/lib/x86_64-linux-gnu/libexpat.so.1.11.1"
}
]
}
Loading