diff --git a/LICENSES/golang.org/x/arch/arm64/arm64asm/LICENSE b/LICENSES/golang.org/x/arch/LICENSE similarity index 100% rename from LICENSES/golang.org/x/arch/arm64/arm64asm/LICENSE rename to LICENSES/golang.org/x/arch/LICENSE diff --git a/asm/amd/insn.go b/asm/amd/insn.go new file mode 100644 index 000000000..784b1d931 --- /dev/null +++ b/asm/amd/insn.go @@ -0,0 +1,20 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" +import "bytes" + +// https://www.felixcloutier.com/x86/endbr64 +var opcodeEndBr64 = []byte{0xf3, 0x0f, 0x1e, 0xfa} + +// DecodeSkippable decodes an instruction that we don't care much about and are going to skip, +// as golang.org/x/arch/x86/x86asm fails to decode it. +// The second returned argument is the size of the decoded instruction to skip. +func DecodeSkippable(code []byte) (ok bool, size int) { + switch { + case bytes.HasPrefix(code, opcodeEndBr64): + return true, len(opcodeEndBr64) + default: + return false, 0 + } +} diff --git a/asm/amd/insn_test.go b/asm/amd/insn_test.go new file mode 100644 index 000000000..3dd3708f4 --- /dev/null +++ b/asm/amd/insn_test.go @@ -0,0 +1,19 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package amd + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestEndBr64(t *testing.T) { + res, n := DecodeSkippable([]byte{0xF3, 0x0F, 0x1E, 0xFA}) + assert.True(t, res) + assert.Equal(t, 4, n) + + res, _ = DecodeSkippable([]byte{}) + assert.False(t, res) +} diff --git a/asm/amd/regs_state.go b/asm/amd/regs_state.go new file mode 100644 index 000000000..92b36ee39 --- /dev/null +++ b/asm/amd/regs_state.go @@ -0,0 +1,66 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" + +import "golang.org/x/arch/x86/x86asm" + +// regIndex returns index into RegsState.regs +func regIndex(reg x86asm.Reg) int { + switch reg { + case x86asm.RAX, x86asm.EAX: + return 1 + case x86asm.RBX, x86asm.EBX: + return 2 + case x86asm.RCX, x86asm.ECX: + return 3 + case x86asm.RDX, x86asm.EDX: + return 4 + case x86asm.RDI, x86asm.EDI: + return 5 + case x86asm.RSI, x86asm.ESI: + return 6 + case x86asm.RBP, x86asm.EBP: + return 7 + case x86asm.R8, x86asm.R8L: + return 8 + case x86asm.R9, x86asm.R9L: + return 9 + case x86asm.R10, x86asm.R10L: + return 10 + case x86asm.R11, x86asm.R11L: + return 11 + case x86asm.R12, x86asm.R12L: + return 12 + case x86asm.R13, x86asm.R13L: + return 13 + case x86asm.R14, x86asm.R14L: + return 14 + case x86asm.R15, x86asm.R15L: + return 15 + case x86asm.RSP, x86asm.ESP: + return 16 + case x86asm.RIP: + return 17 + default: + return 0 + } +} + +type RegsState struct { + regs [18]regState +} + +func (r *RegsState) Set(reg x86asm.Reg, value, loadedFrom uint64) { + r.regs[regIndex(reg)].Value = value + r.regs[regIndex(reg)].LoadedFrom = loadedFrom +} + +func (r *RegsState) Get(reg x86asm.Reg) (value, loadedFrom uint64) { + return r.regs[regIndex(reg)].Value, r.regs[regIndex(reg)].LoadedFrom +} + +type regState struct { + LoadedFrom uint64 + Value uint64 +} diff --git a/interpreter/python/decode.go b/interpreter/python/decode.go index 9412aa594..820ab0ffc 100644 --- a/interpreter/python/decode.go +++ b/interpreter/python/decode.go @@ -4,16 +4,22 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( - ah "go.opentelemetry.io/ebpf-profiler/armhelpers" - aa "golang.org/x/arch/arm64/arm64asm" + "errors" + "fmt" + "runtime" + ah "go.opentelemetry.io/ebpf-profiler/armhelpers" + "go.opentelemetry.io/ebpf-profiler/asm/amd" "go.opentelemetry.io/ebpf-profiler/libpf" + aa "golang.org/x/arch/arm64/arm64asm" + "golang.org/x/arch/x86/x86asm" ) -// decodeStubArgumentWrapperARM64 disassembles arm64 code and decodes the assumed value +// decodeStubArgumentARM64 disassembles arm64 code and decodes the assumed value // of requested argument. -func decodeStubArgumentWrapperARM64(code []byte, argNumber uint8, _, +func decodeStubArgumentARM64(code []byte, addrBase libpf.SymbolValue) libpf.SymbolValue { + const argNumber uint8 = 0 // The concept is to track the latest load offset for all X0..X30 registers. // These registers are used as the function arguments. Once the first branch // instruction (function call/tail jump) is found, the state of the requested @@ -100,3 +106,96 @@ func decodeStubArgumentWrapperARM64(code []byte, argNumber uint8, _, return libpf.SymbolValueInvalid } + +func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( + libpf.SymbolValue, error) { + targetRegister := x86asm.RDI + + instructionOffset := 0 + regs := amd.RegsState{} + + for instructionOffset < len(code) { + rem := code[instructionOffset:] + if ok, insnLen := amd.DecodeSkippable(rem); ok { + instructionOffset += insnLen + continue + } + + inst, err := x86asm.Decode(rem, 64) + if err != nil { + return 0, fmt.Errorf("failed to decode instruction at 0x%x : %w", + instructionOffset, err) + } + instructionOffset += inst.Len + regs.Set(x86asm.RIP, codeAddress+uint64(instructionOffset), 0) + + if inst.Op == x86asm.CALL || inst.Op == x86asm.JMP { + value, loadedFrom := regs.Get(targetRegister) + if loadedFrom != 0 { + return libpf.SymbolValue(loadedFrom), nil + } + return libpf.SymbolValue(value), nil + } + + if (inst.Op == x86asm.LEA || inst.Op == x86asm.MOV) && inst.Args[0] != nil { + if reg, ok := inst.Args[0].(x86asm.Reg); ok { + var value uint64 + var loadedFrom uint64 + + switch src := inst.Args[1].(type) { + case x86asm.Imm: + value = uint64(src) + case x86asm.Mem: + baseAddr, _ := regs.Get(src.Base) + displacement := uint64(src.Disp) + + if inst.Op == x86asm.MOV { + value = memoryBase + loadedFrom = baseAddr + displacement + if src.Index != 0 { + indexValue, _ := regs.Get(src.Index) + loadedFrom += indexValue * uint64(src.Scale) + } + } else if inst.Op == x86asm.LEA { + value = baseAddr + displacement + if src.Index != 0 { + indexValue, _ := regs.Get(src.Index) + value += indexValue * uint64(src.Scale) + } + } + + case x86asm.Reg: + value, _ = regs.Get(src) + } + + regs.Set(reg, value, loadedFrom) + } + } + + if inst.Op == x86asm.ADD && inst.Args[0] != nil && inst.Args[1] != nil { + if reg, ok0 := inst.Args[0].(x86asm.Reg); ok0 { + if _, ok1 := inst.Args[1].(x86asm.Mem); ok1 { + oldValue, _ := regs.Get(reg) + value := oldValue + memoryBase + regs.Set(reg, value, 0) + } + } + } + } + return 0, errors.New("no call/jump instructions found") +} + +func decodeStubArgumentWrapper( + code []byte, + codeAddress libpf.SymbolValue, + memoryBase libpf.SymbolValue, +) (libpf.SymbolValue, error) { + switch runtime.GOARCH { + case "arm64": + return decodeStubArgumentARM64(code, memoryBase), nil + case "amd64": + return decodeStubArgumentAMD64(code, uint64(codeAddress), uint64(memoryBase)) + default: + return libpf.SymbolValueInvalid, fmt.Errorf("unsupported arch %s", runtime.GOARCH) + } +} diff --git a/interpreter/python/decode_amd64.c b/interpreter/python/decode_amd64.c deleted file mode 100644 index 1cf7c2dd8..000000000 --- a/interpreter/python/decode_amd64.c +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build amd64 - -#include "../../zydis/Zydis.h" -#include "decode_amd64.h" - -#include - -// decode_stub_argument() will decode instructions from given code blob until an assignment -// for the given argument register is found. The value loaded is then determined from the -// opcode. A call/jump instruction will terminate the finding as we are finding the argument -// to first function call (or tail call). -// Currently the following addressing schemes for the assignment are supported: -// 1) Loading virtual address with immediate value. This happens for non-PIC globals. -// 2) Loading RIP-relative virtual address. Happens for PIC/PIE globals. -// 3) Loading via pointer + displacement. Happens when the main state is given as argument, -// and the value is loaded from it. In this case 'memory_base' should be the address of -// the global state variable. -uint64_t decode_stub_argument(const uint8_t* code, size_t codesz, uint8_t argument_no, - uint64_t rip_base, uint64_t memory_base) { - ZydisDecoder decoder; - ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - - // Argument number to x86_64 calling convention register mapping. - ZydisRegister target_register64, target_register32; - switch (argument_no) { - case 0: - target_register64 = ZYDIS_REGISTER_RDI; - target_register32 = ZYDIS_REGISTER_EDI; - break; - case 1: - target_register64 = ZYDIS_REGISTER_RSI; - target_register32 = ZYDIS_REGISTER_ESI; - break; - case 2: - target_register64 = ZYDIS_REGISTER_RDX; - target_register32 = ZYDIS_REGISTER_EDX; - break; - default: - return 0; - } - - // Iterate instructions - ZydisDecodedInstruction instr; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - ZyanUSize instruction_offset = 0; - while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(&decoder, code + instruction_offset, - codesz - instruction_offset, &instr, operands))) { - instruction_offset += instr.length; - if (instr.mnemonic == ZYDIS_MNEMONIC_CALL || - instr.mnemonic == ZYDIS_MNEMONIC_JMP) { - // Unexpected call/jmp indicating end of stub code - return 0; - } - if (!(instr.mnemonic == ZYDIS_MNEMONIC_LEA || - instr.mnemonic == ZYDIS_MNEMONIC_MOV) || - operands[0].type != ZYDIS_OPERAND_TYPE_REGISTER || - (operands[0].reg.value != target_register64 && - operands[0].reg.value != target_register32)) { - // Only "LEA/MOV target_reg, ..." meaningful - continue; - } - if (operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { - // MOV target_reg, immediate - return operands[1].imm.value.u; - } - if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && - operands[1].mem.disp.has_displacement) { - if (operands[1].mem.base == ZYDIS_REGISTER_RIP) { - // MOV/LEA target_reg, [RIP + XXXX] - return rip_base + instruction_offset + operands[1].mem.disp.value; - } else if (memory_base) { - // MOV/LEA target_reg, [REG + XXXX] - return memory_base + operands[1].mem.disp.value; - } - continue; - } - } - - return 0; -} diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go deleted file mode 100644 index 55080f246..000000000 --- a/interpreter/python/decode_amd64.go +++ /dev/null @@ -1,30 +0,0 @@ -//go:build amd64 - -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" - -import ( - "unsafe" - - "go.opentelemetry.io/ebpf-profiler/libpf" - _ "go.opentelemetry.io/ebpf-profiler/zydis" // links Zydis -) - -// #cgo CFLAGS: -g -Wall -// #include "decode_amd64.h" -// #include "../../support/ebpf/types.h" -import "C" - -func decodeStubArgumentWrapperX64(code []byte, argNumber uint8, symbolValue, - addrBase libpf.SymbolValue) libpf.SymbolValue { - return libpf.SymbolValue(C.decode_stub_argument( - (*C.uint8_t)(unsafe.Pointer(&code[0])), C.size_t(len(code)), - C.uint8_t(argNumber), C.uint64_t(symbolValue), C.uint64_t(addrBase))) -} - -func decodeStubArgumentWrapper(code []byte, argNumber uint8, symbolValue, - addrBase libpf.SymbolValue) libpf.SymbolValue { - return decodeStubArgumentWrapperX64(code, argNumber, symbolValue, addrBase) -} diff --git a/interpreter/python/decode_amd64.h b/interpreter/python/decode_amd64.h deleted file mode 100644 index ffa9e737a..000000000 --- a/interpreter/python/decode_amd64.h +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build amd64 - -#ifndef __PYTHON_DECODE_X86_64__ -#define __PYTHON_DECODE_X86_64__ - -#include - -uint64_t decode_stub_argument(const uint8_t* code, size_t codesz, uint8_t argument_no, uint64_t rip_base, uint64_t memory_base); - -#endif diff --git a/interpreter/python/decode_arm64.go b/interpreter/python/decode_arm64.go deleted file mode 100644 index a2461af11..000000000 --- a/interpreter/python/decode_arm64.go +++ /dev/null @@ -1,15 +0,0 @@ -//go:build arm64 - -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" - -import ( - "go.opentelemetry.io/ebpf-profiler/libpf" -) - -func decodeStubArgumentWrapper(code []byte, argNumber uint8, symbolValue, - addrBase libpf.SymbolValue) libpf.SymbolValue { - return decodeStubArgumentWrapperARM64(code, argNumber, symbolValue, addrBase) -} diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 96ae9dabb..e2cec192d 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -1,34 +1,36 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +//nolint:lll package python import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.opentelemetry.io/ebpf-profiler/libpf" ) func TestAnalyzeArm64Stubs(t *testing.T) { - val := decodeStubArgumentWrapperARM64( + val := decodeStubArgumentARM64( []byte{ 0x40, 0x0a, 0x00, 0x90, 0x01, 0xd4, 0x43, 0xf9, 0x22, 0x60, 0x17, 0x91, 0x40, 0x00, 0x40, 0xf9, 0xa2, 0xff, 0xff, 0x17}, - 0, 0, 0) + 0) assert.Equal(t, libpf.SymbolValue(1496), val, "PyEval_ReleaseLock stub test") - val = decodeStubArgumentWrapperARM64( + val = decodeStubArgumentARM64( []byte{ 0x80, 0x12, 0x00, 0xb0, 0x02, 0xd4, 0x43, 0xf9, 0x41, 0xf4, 0x42, 0xf9, 0x61, 0x00, 0x00, 0xb4, 0x40, 0xc0, 0x17, 0x91, 0xad, 0xe4, 0xfe, 0x17}, - 0, 0, 0) + 0) assert.Equal(t, libpf.SymbolValue(1520), val, "PyGILState_GetThisThreadState test") // Python 3.10.12 on ARM64 Nix - val = decodeStubArgumentWrapperARM64( + val = decodeStubArgumentARM64( []byte{ 0x40, 0x1a, 0x00, 0xd0, // adrp x0, 0xffffa0eff000 0x00, 0xa0, 0x46, 0xf9, // ldr x0, [x0, #3392] @@ -39,6 +41,259 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0x00, 0x00, 0x80, 0xd2, // mov x0, #0x0 0xc0, 0x03, 0x5f, 0xd6, // ret }, - 0, 0, 0) + 0) assert.Equal(t, libpf.SymbolValue(604), val, "PyGILState_GetThisThreadState test") } + +func BenchmarkDecodeAmd64(b *testing.B) { + for i := 0; i < b.N; i++ { + code := []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 + 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> + 0x00, // 1bbbab: + 0x74, 0x0b, // 1bbbac: je 1bbbb9 + 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> + 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 + } + rip := uint64(0x1bbba0) + val, _ := decodeStubArgumentAMD64( + code, + rip, + 0, + ) + if val != 0x3a4c2c { + b.Fail() + } + } +} + +func TestAmd64DecodeStub(t *testing.T) { + testdata := []struct { + name string + code []byte + rip uint64 + expected uint64 + expectedError string + }{ + { + name: "3.10.16 gcc12 enable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 + 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> + 0x00, // 1bbbab: + 0x74, 0x0b, // 1bbbac: je 1bbbb9 + 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> + 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 + }, + rip: 0x1bbba0, + expected: 0x3a4c2c, + }, + { + name: "3.10.16 gcc12 disable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 172e50: endbr64 + 0x48, 0x83, 0x3d, 0x04, 0xef, 0x24, 0x00, // 172e54: cmp QWORD PTR [rip+0x24ef04],0x0 # 3c1d60 <_PyRuntime+0x240> + 0x00, // 172e5b: + 0x74, 0x12, // 172e5c: je 172e70 + 0x48, 0x8d, 0x3d, 0x03, 0xef, 0x24, 0x00, // 172e5e: lea rdi,[rip+0x24ef03] # 3c1d68 <_PyRuntime+0x248> + 0xe9, 0x86, 0x1e, 0x01, 0x00, // 172e65: jmp 184cf0 + }, + rip: 0x172e50, + expected: 0x3c1d68, + }, + { + name: "3.10.16 clang16 disable-optimizations enabled-shared", + code: []byte{ + 0x48, 0x8b, 0x05, 0x99, 0x70, 0x16, 0x00, // 1adc90: mov rax,QWORD PTR [rip+0x167099] # 314d30 <_PyRuntime@@Base-0x33668> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1adc97: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 1adc9e: + 0x74, 0x11, // 1adc9f: je 1adcb2 + 0xbf, 0x48, 0x02, 0x00, 0x00, // 1adca1: mov edi,0x248 + 0x48, 0x03, 0x3d, 0x83, 0x70, 0x16, 0x00, // 1adca6: add rdi,QWORD PTR [rip+0x167083] # 314d30 <_PyRuntime@@Base-0x33668> + 0xe9, 0x2e, 0x41, 0xeb, 0xff, // 1adcad: jmp 61de0 + }, + rip: 0x1adc90, + expected: 0x248, + }, + { + name: "3.12.8 gcc12 disable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2e25d0: endbr64 + 0x48, 0x8b, 0x05, 0x25, 0x27, 0x27, 0x00, // 2e25d4: mov rax,QWORD PTR [rip+0x272725] # 554d00 <_PyRuntime@@Base-0x1004e0> + 0x53, // 2e25db: push rbx + 0x48, 0x8d, 0x98, 0x08, 0x06, 0x00, 0x00, // 2e25dc: lea rbx,[rax+0x608] + 0x48, 0x89, 0xdf, // 2e25e3: mov rdi,rbx + 0xe8, 0x95, 0x78, 0xe2, 0xff, // 2e25e6: call 109e80 + }, + rip: 0x2e25d0, + expected: 0x608, + }, + { + name: "3.10.16 clang18 enable-optimizations enabled-shared", + code: []byte{ + 0x48, 0x8b, 0x05, 0xd9, 0x80, 0x31, 0x00, // cac50: mov rax,QWORD PTR [rip+0x3180d9] # 3e2d30 <_PyRuntime@@Base-0x32c28> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // cac57: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // cac5e: + 0x74, 0x0b, // cac5f: je cac6c + 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // cac61: mov edi,DWORD PTR [rax+0x24c] + 0xe9, 0x24, 0x55, 0xf9, 0xff, // cac67: jmp 60190 + }, + rip: 0xcac50, + expected: 0x24c, + }, + { + name: "3.10.16 clang18 enable-optimizations disable-shared", + code: []byte{ + 0x48, 0x83, 0x3d, 0x98, 0xc5, 0x36, 0x00, // 92000: cmp QWORD PTR [rip+0x36c598],0x0 # 3fe5a0 <_PyRuntime+0x240> + 0x00, // 92007: + 0x74, 0x0b, // 92008: je 92015 + 0x8b, 0x3d, 0x9c, 0xc5, 0x36, 0x00, // 9200a: mov edi,DWORD PTR [rip+0x36c59c] # 3fe5ac <_PyRuntime+0x24c> + 0xe9, 0x4b, 0x70, 0xfc, 0xff, // 92010: jmp 59060 + }, + rip: 0x92000, + expected: 0x3fe5ac, + }, + { + name: "3.10.16 clang16 disable-optimizations disable-shared", + code: []byte{ + 0x48, 0x8d, 0x05, 0x69, 0x19, 0x21, 0x00, // 129bc0: lea rax,[rip+0x211969] # 33b530 <_PyRuntime> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 129bc7: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 129bce: + 0x74, 0x11, // 129bcf: je 129be2 + 0xbf, 0x48, 0x02, 0x00, 0x00, // 129bd1: mov edi,0x248 + 0x48, 0x03, 0x3d, 0x53, 0x03, 0x1e, 0x00, // 129bd6: add rdi,QWORD PTR [rip+0x1e0353] # 309f30 <_DYNAMIC+0x328> + 0xe9, 0x8e, 0xec, 0x00, 0x00, // 129bdd: jmp 138870 + }, + rip: 0x129bc0, + expected: 0x248, + }, + { + name: "3.12.8 clang16 disable-optimizations disable-shared", + code: []byte{ + 0x53, // 2a20d0: push rbx + 0xbb, 0x08, 0x06, 0x00, 0x00, // 2a20d1: mov ebx,0x608 + 0x48, 0x03, 0x1d, 0x0b, 0x1e, 0x25, 0x00, // 2a20d6: add rbx,QWORD PTR [rip+0x251e0b] # 4f3ee8 <_DYNAMIC+0x368> + 0x48, 0x89, 0xdf, // 2a20dd: mov rdi,rbx + 0xe8, 0x7b, 0x41, 0x01, 0x00, // 2a20e0: call 2b6260 + }, + rip: 0x2a20d0, + expected: 0x608, + }, + { + name: "3.10.16 clang16 disable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2079c0: endbr64 + 0x48, 0x8b, 0x05, 0x65, 0x03, 0x18, 0x00, // 2079c4: mov rax,QWORD PTR [rip+0x180365] # 387d30 <_PyRuntime@@Base-0x34950> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 2079cb: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 2079d2: + 0x74, 0x13, // 2079d3: je 2079e8 + 0x48, 0x8d, 0xb8, 0x48, 0x02, 0x00, 0x00, // 2079d5: lea rdi,[rax+0x248] + 0xe9, 0x8f, 0x1f, 0xe6, 0xff, // 2079dc: jmp 69970 + }, + rip: 0x2079c0, + expected: 0x248, + }, + { + name: "3.12.8 gcc12 disable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2eb960: endbr64 + 0x53, // 2eb964: push rbx + 0x48, 0x8d, 0x1d, 0xbc, 0x21, 0x37, 0x00, // 2eb965: lea rbx,[rip+0x3721bc] # 65db28 <_PyRuntime+0x608> + 0x48, 0x89, 0xdf, // 2eb96c: mov rdi,rbx + 0xe8, 0x0c, 0x7f, 0x01, 0x00, // 2eb96f: call 303880 + }, + rip: 0x2eb960, + expected: 0x65db28, + }, + { + name: "3.10.16 gcc12 enable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1c03c0: endbr64 + 0x48, 0x8b, 0x05, 0x5d, 0x69, 0x1b, 0x00, // 1c03c4: mov rax,QWORD PTR [rip+0x1b695d] # 376d28 <_PyRuntime@@Base-0x32758> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1c03cb: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 1c03d2: + 0x74, 0x0b, // 1c03d3: je 1c03e0 + 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // 1c03d5: mov edi,DWORD PTR [rax+0x24c] + 0xe9, 0x10, 0xb4, 0xe9, 0xff, // 1c03db: jmp 5b7f0 + }, + rip: 0x1c03c0, + expected: 0x24c, + }, + { + name: "empty code", + code: nil, + expectedError: "no call/jump instructions found", + }, + { + name: "no call/jump instructions found", + code: []byte{ + 0x48, 0xC7, 0xC7, 0xEF, 0xEF, 0xEF, 0x00, // mov rdi, 0xefefef + }, + expectedError: "no call/jump instructions found", + }, + { + name: "bad instruction", + code: []byte{ + 0x48, 0xC7, 0xC7, 0xEF, 0xEF, 0xEF, 0x00, // mov rdi, 0xefefef + 0xea, // :shrug: + }, + expectedError: "failed to decode instruction at 0x7", + }, + { + name: "synthetic mov scale index", + code: []byte{ + 0x48, 0xC7, 0xC0, 0xCA, 0xCA, 0x00, 0x00, // mov rax, 0xcaca + 0xBB, 0x00, 0x00, 0x00, 0x5E, // mov ebx, 0x5e000000 + 0x67, 0x48, 0x8B, 0x7C, 0x43, 0x05, // mov rdi, qword ptr [ebx + eax*2 + 5] + 0xEB, 0x00, // jmp 0x14 + }, + expected: 0xCACA*2 + 0x5E000000 + 5, + }, + { + name: "synthetic lea scale index", + code: []byte{ + 0x48, 0xC7, 0xC0, 0xFE, 0xCA, 0x00, 0x00, // mov rax, 0xcafe + 0xBB, 0x00, 0x00, 0x00, 0x6E, // mov ebx, 0x6e000000 + 0x67, 0x48, 0x8D, 0x7C, 0x43, 0x07, // lea rdi, [ebx + eax*2 + 7] + 0xE8, 0xFB, 0xFF, 0xFF, 0xFF, // call 0x12 + }, + expected: 0xCAFE*2 + 0x6E000000 + 7, + }, + { + name: "synthetic lea edi, ... scale index", + code: []byte{ + 0xB8, 0xEF, 0x00, 0x00, 0x00, // mov eax, 0xef + 0xBB, 0x2A, 0x00, 0x00, 0x00, // mov ebx, 0x2a + 0x67, 0x8D, 0x7C, 0x43, 0x07, // lea edi, [ebx + eax*2 + 7] + 0xEB, 0xEF, // jmp 0 + }, + expected: 0xEF*2 + 0x2a + 7, + }, + } + + for _, td := range testdata { + t.Run(td.name, func(t *testing.T) { + val, err := decodeStubArgumentAMD64( + td.code, + td.rip, + 0, // NULL pointer as mem + ) + if td.expectedError != "" { + require.Error(t, err) + require.Contains(t, err.Error(), td.expectedError) + } else { + require.NoError(t, err) + } + assert.Equal(t, td.expected, uint64(val)) + }) + } +} + +func FuzzDecodeAmd(f *testing.F) { + f.Fuzz(func(_ *testing.T, code []byte, rip uint64) { + _, err := decodeStubArgumentAMD64(code, rip, 0) + if err != nil { + return + } + }) +} diff --git a/interpreter/python/python.go b/interpreter/python/python.go index e5db89604..0020615f6 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -6,6 +6,7 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( "bytes" "debug/elf" + "encoding/hex" "errors" "fmt" "hash/fnv" @@ -655,33 +656,39 @@ func (d *pythonData) readIntrospectionData(ef *pfelf.File, symbol libpf.SymbolNa // decodeStub will resolve a given symbol, extract the code for it, and analyze // the code to resolve specified argument parameter to the first jump/call. -func decodeStub(ef *pfelf.File, addrBase libpf.SymbolValue, symbolName libpf.SymbolName, - argNumber uint8) libpf.SymbolValue { - symbolValue, err := ef.LookupSymbolAddress(symbolName) +func decodeStub( + ef *pfelf.File, + memoryBase libpf.SymbolValue, + symbolName libpf.SymbolName, +) (libpf.SymbolValue, error) { + codeAddress, err := ef.LookupSymbolAddress(symbolName) if err != nil { - return libpf.SymbolValueInvalid + return libpf.SymbolValueInvalid, fmt.Errorf("lookup %s failed: %v", + symbolName, err) } code := make([]byte, 64) - if _, err := ef.ReadVirtualMemory(code, int64(symbolValue)); err != nil { - return libpf.SymbolValueInvalid + if _, err = ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { + return libpf.SymbolValueInvalid, fmt.Errorf("reading %s 0x%x code failed: %v", + symbolName, codeAddress, err) } - - value := decodeStubArgumentWrapper(code, argNumber, symbolValue, addrBase) + value, err := decodeStubArgumentWrapper(code, codeAddress, memoryBase) // Sanity check the value range and alignment - if value%4 != 0 { - return libpf.SymbolValueInvalid + if err != nil || value%4 != 0 { + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s 0x%x %s failed (0x%x): %v", + symbolName, codeAddress, hex.Dump(code), value, err) } // If base symbol (_PyRuntime) is not provided, accept any found value. - if addrBase == 0 && value != 0 { - return value + if memoryBase == 0 && value != 0 { + return value, nil } // Check that the found value is within reasonable distance from the given symbol. - if value > addrBase && value < addrBase+4096 { - return value + if value > memoryBase && value < memoryBase+4096 { + return value, nil } - return libpf.SymbolValueInvalid + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s 0x%x %s failed (0x%x)", + symbolName, codeAddress, hex.Dump(code), value) } func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { @@ -736,9 +743,9 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr } // Calls first: PyThread_tss_get(autoTSSKey) - autoTLSKey = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState", 0) + autoTLSKey, err = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState") if autoTLSKey == libpf.SymbolValueInvalid { - return nil, errors.New("unable to resolve autoTLSKey") + return nil, fmt.Errorf("unable to resolve autoTLSKey %v", err) } if version >= pythonVer(3, 7) && autoTLSKey%8 == 0 { // On Python 3.7+, the call is to PyThread_tss_get, but can get optimized to diff --git a/tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json b/tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json new file mode 100644 index 000000000..1cf798807 --- /dev/null +++ b/tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json @@ -0,0 +1,76 @@ +{ + "coredump-ref": "1c875546fb8c3b22bb7ed7b86c3a7f4da9a68a3ec93bb4732f0bff6ee793d23a", + "threads": [ + { + "lwp": 11, + "frames": [ + "fib+1 in /mnt/trash/fib.py:2", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "+6 in /mnt/trash/fib.py:7", + "libpython3.11.so.1.0+0x32b580", + "libpython3.11.so.1.0+0x3259e3", + "libpython3.11.so.1.0+0x37a8f9", + "libpython3.11.so.1.0+0x378e32", + "libpython3.11.so.1.0+0x378972", + "libpython3.11.so.1.0+0x397899", + "libpython3.11.so.1.0+0x397102", + "libpython3.11.so.1.0+0x39739e", + "libpython3.11.so.1.0+0x3973fb", + "libc.so.6+0x27249", + "libc.so.6+0x27304", + "python3+0x1088" + ] + } + ], + "modules": [ + { + "ref": "11ce00a6490d5e4ef941e1f51faaddf40c088a1376f028cbc001985b779397ce", + "local-path": "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/lib/libpython3.11.so.1.0" + }, + { + "ref": "df8e371a04bcf4ea2d455277ecc9cd47fc9b4c58ed27a7f4e6c8343122a4d270", + "local-path": "/usr/lib/x86_64-linux-gnu/libpthread.so.0" + }, + { + "ref": "067650d84b8f554cedf0b9ff26137bdd10cd03d4bbcdba1029a543c59d1798e5", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "fe279657c804dcec88728eeb27187f983f6e5dc0c89575c4bd01aa6a8147b3a1", + "local-path": "/usr/lib/x86_64-linux-gnu/libutil.so.1" + }, + { + "ref": "2a7334caf9516a482110c769e92985b30e9d7d9d96a4227b93d04cce8af0701e", + "local-path": "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/bin/python3" + }, + { + "ref": "6445c275f2477ebf619b1e4ec6fe5a0e460b9745e360ef9b671cb5a2f9f362ae", + "local-path": "/usr/lib/x86_64-linux-gnu/librt.so.1" + }, + { + "ref": "1d25fd63234b59e4c581564c7a6d8f5c6cf36eee757e3d26f4b0808dd36a4896", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "582f2d3d4edab86d601c54b37f04bd18fa2cda28be30e9f8c87df73c1c581354", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + }, + { + "ref": "d71263682766154c159a63504fec543e3ea64a932e5f30d5f50758fab0405fa2", + "local-path": "/usr/lib/x86_64-linux-gnu/libdl.so.2" + } + ] +} diff --git a/tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json b/tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json new file mode 100644 index 000000000..7d7f152fd --- /dev/null +++ b/tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json @@ -0,0 +1,66 @@ +{ + "coredump-ref": "b21ceabeabae6be900f44f56bb52dd89065c71903603938135fb19b128575c6b", + "threads": [ + { + "lwp": 144534, + "frames": [ + "fib+1 in /home/korniltsev/trash/fib.py:2", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "+6 in /home/korniltsev/trash/fib.py:7", + "+0 in :1", + "libpython3.12.so.1.0+0x11243a", + "libpython3.12.so.1.0+0x28e7de", + "libpython3.12.so.1.0+0x2e9dc5", + "libpython3.12.so.1.0+0x2e9ed8", + "libpython3.12.so.1.0+0x2ecdbf", + "libpython3.12.so.1.0+0x2ed39f", + "libpython3.12.so.1.0+0x314d53", + "libpython3.12.so.1.0+0x315199", + "libpython3.12.so.1.0+0x31535d", + "libc.so.6+0x2a1c9", + "libc.so.6+0x2a28a", + "python3.12+0x1094" + ] + } + ], + "modules": [ + { + "ref": "474c778ae8a8baf4d26717c9e1011846268d7f0a3767f73b30a31d124a65d169", + "local-path": "/home/korniltsev/.pyenv/versions/3.12.9/bin/python3.12" + }, + { + "ref": "1a2eb220c22ae7ba8aaf8b243e57dbc25542f8c9c269ed6100c7ad5aea7c3ada", + "local-path": "/home/korniltsev/.pyenv/versions/3.12.9/lib/libpython3.12.so.1.0" + }, + { + "ref": "e7a914a33fd4f6d25057b8d48c7c5f3d55ab870ec4ee27693d6c5f3a532e6226", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "c09178edd7fbc58aa8415f4bbe54dd76c5ff6c6398ba3e56e5a4743fd7e9adfc", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "6c5e1b4528b704dc7081aa45b5037bda4ea9cad78ca562b4fb6b0dbdbfc7e7e7", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + } + ] +} diff --git a/tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json b/tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json new file mode 100644 index 000000000..c7bb497be --- /dev/null +++ b/tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json @@ -0,0 +1,62 @@ +{ + "coredump-ref": "dc8dd740e0456edc70077e8e453facfae0775a8bf50387f05e9f4e948c3ae700", + "threads": [ + { + "lwp": 167127, + "frames": [ + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "+6 in /home/korniltsev/trash/fib.py:7", + "ffffffffffffffffffffffffffffffff+0x0", + "libpython3.13.so.1.0+0x1f9740", + "libpython3.13.so.1.0+0x1f450c", + "libpython3.13.so.1.0+0x260fdc", + "libpython3.13.so.1.0+0x260cd7", + "libpython3.13.so.1.0+0x25e751", + "libpython3.13.so.1.0+0x25e262", + "libpython3.13.so.1.0+0x283a63", + "libpython3.13.so.1.0+0x2832a2", + "libpython3.13.so.1.0+0x283589", + "libpython3.13.so.1.0+0x2835dc", + "libc.so.6+0x2a1c9", + "libc.so.6+0x2a28a", + "python3.13+0x1074" + ] + } + ], + "modules": [ + { + "ref": "b76cc07b46f4a2f32a16f3a4df617353d454f7890ebd92f49a96f8f7410613f4", + "local-path": "/home/korniltsev/.pyenv/versions/3.13.2/bin/python3.13" + }, + { + "ref": "e7a914a33fd4f6d25057b8d48c7c5f3d55ab870ec4ee27693d6c5f3a532e6226", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "c09178edd7fbc58aa8415f4bbe54dd76c5ff6c6398ba3e56e5a4743fd7e9adfc", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "67997ac257675599247dc0445f4d2705f67e203678fb9920162bc2cd7f9d0009", + "local-path": "/home/korniltsev/.pyenv/versions/3.13.2/lib/libpython3.13.so.1.0" + }, + { + "ref": "6c5e1b4528b704dc7081aa45b5037bda4ea9cad78ca562b4fb6b0dbdbfc7e7e7", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + } + ] +}