From b8f926a4081711a69447acaec0da043c1948572d Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Thu, 20 Mar 2025 11:39:13 +0700 Subject: [PATCH 01/26] python stub decoding improvements --- interpreter/python/decode_amd64.c | 152 ++++++++++++++++++++--------- interpreter/python/decode_amd64.go | 13 +-- interpreter/python/decode_amd64.h | 2 +- interpreter/python/decode_arm64.go | 4 +- interpreter/python/decode_test.go | 102 +++++++++++++++++++ interpreter/python/python.go | 7 +- 6 files changed, 219 insertions(+), 61 deletions(-) diff --git a/interpreter/python/decode_amd64.c b/interpreter/python/decode_amd64.c index 1cf7c2dd8..684dcf9e0 100644 --- a/interpreter/python/decode_amd64.c +++ b/interpreter/python/decode_amd64.c @@ -6,7 +6,40 @@ #include "../../zydis/Zydis.h" #include "decode_amd64.h" +// #define DECODE_AMD_DEBUG + +#if defined(DECODE_AMD_DEBUG) #include +#endif + +static int reg_index(ZydisRegister reg) +{ + switch (reg) { + case ZYDIS_REGISTER_RAX: + case ZYDIS_REGISTER_EAX: return 1; + case ZYDIS_REGISTER_RBX: + case ZYDIS_REGISTER_EBX: return 2; + case ZYDIS_REGISTER_RCX: + case ZYDIS_REGISTER_ECX: return 3; + case ZYDIS_REGISTER_RDX: + case ZYDIS_REGISTER_EDX: return 4; + case ZYDIS_REGISTER_RDI: + case ZYDIS_REGISTER_EDI: return 5; + case ZYDIS_REGISTER_RSI: + case ZYDIS_REGISTER_ESI: return 6; + case ZYDIS_REGISTER_RBP: + case ZYDIS_REGISTER_EBP: return 7; + case ZYDIS_REGISTER_RSP: + case ZYDIS_REGISTER_ESP: return 8; + case ZYDIS_REGISTER_RIP: return 9; + default: return 0; + } +} + +struct reg_state { + ZyanU64 loaded_from; + ZyanU64 value; +}; // decode_stub_argument() will decode instructions from given code blob until an assignment // for the given argument register is found. The value loaded is then determined from the @@ -18,66 +51,89 @@ // 3) Loading via pointer + displacement. Happens when the main state is given as argument, // and the value is loaded from it. In this case 'memory_base' should be the address of // the global state variable. -uint64_t decode_stub_argument(const uint8_t* code, size_t codesz, uint8_t argument_no, - uint64_t rip_base, uint64_t memory_base) { +// todo update comment +// todo rewrite in go +// todo add coredump tests +uint64_t decode_stub_argument( + const uint8_t *code, + size_t code_sz, + uint64_t code_address, + uint64_t memory_base) +{ ZydisDecoder decoder; ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); // Argument number to x86_64 calling convention register mapping. - ZydisRegister target_register64, target_register32; - switch (argument_no) { - case 0: - target_register64 = ZYDIS_REGISTER_RDI; - target_register32 = ZYDIS_REGISTER_EDI; - break; - case 1: - target_register64 = ZYDIS_REGISTER_RSI; - target_register32 = ZYDIS_REGISTER_ESI; - break; - case 2: - target_register64 = ZYDIS_REGISTER_RDX; - target_register32 = ZYDIS_REGISTER_EDX; - break; - default: - return 0; - } + ZydisRegister target_register64 = ZYDIS_REGISTER_RDI; // Iterate instructions ZydisDecodedInstruction instr; ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; ZyanUSize instruction_offset = 0; - while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(&decoder, code + instruction_offset, - codesz - instruction_offset, &instr, operands))) { - instruction_offset += instr.length; - if (instr.mnemonic == ZYDIS_MNEMONIC_CALL || - instr.mnemonic == ZYDIS_MNEMONIC_JMP) { - // Unexpected call/jmp indicating end of stub code - return 0; - } - if (!(instr.mnemonic == ZYDIS_MNEMONIC_LEA || - instr.mnemonic == ZYDIS_MNEMONIC_MOV) || - operands[0].type != ZYDIS_OPERAND_TYPE_REGISTER || - (operands[0].reg.value != target_register64 && - operands[0].reg.value != target_register32)) { - // Only "LEA/MOV target_reg, ..." meaningful - continue; + struct reg_state regs[32] = {}; + while (ZYAN_SUCCESS( + ZydisDecoderDecodeFull( + &decoder, code + instruction_offset, code_sz - instruction_offset, &instr, operands))) { + #if defined(DECODE_AMD_DEBUG) + ZydisDisassembledInstruction dbgi = {}; + if (ZYAN_SUCCESS( + ZydisDisassembleIntel( + ZYDIS_MACHINE_MODE_LONG_64, + code_address + instruction_offset, + code + instruction_offset, + code_sz - instruction_offset, + &dbgi))) { + printf("%-12p %s\n", (void *)(code_address + instruction_offset), dbgi.text); + fflush(stdout); } - if (operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { - // MOV target_reg, immediate - return operands[1].imm.value.u; + #endif + instruction_offset += instr.length; + regs[reg_index(ZYDIS_REGISTER_RIP)].value = code_address + instruction_offset; + if (instr.mnemonic == ZYDIS_MNEMONIC_CALL || instr.mnemonic == ZYDIS_MNEMONIC_JMP) { + if (regs[reg_index(target_register64)].loaded_from) { + return regs[reg_index(target_register64)].loaded_from; + } + return regs[reg_index(target_register64)].value; } - if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && - operands[1].mem.disp.has_displacement) { - if (operands[1].mem.base == ZYDIS_REGISTER_RIP) { - // MOV/LEA target_reg, [RIP + XXXX] - return rip_base + instruction_offset + operands[1].mem.disp.value; - } else if (memory_base) { - // MOV/LEA target_reg, [REG + XXXX] - return memory_base + operands[1].mem.disp.value; + if ( + (instr.mnemonic == ZYDIS_MNEMONIC_LEA || instr.mnemonic == ZYDIS_MNEMONIC_MOV) && + operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER) { + + ZyanU64 v = 0; + ZyanU64 loaded_from = 0; + if (operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { + v = operands[1].imm.value.u; + } + if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && operands[1].mem.disp.has_displacement) { + ZyanU64 at = regs[reg_index(operands[1].mem.base)].value + operands[1].mem.disp.value; + if (instr.mnemonic == ZYDIS_MNEMONIC_MOV) { + // todo: do not assume that we're reading a ptr to memory_base + v = memory_base; + loaded_from = at; + } + if (instr.mnemonic == ZYDIS_MNEMONIC_LEA) { + v = at; + } } - continue; + if (operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER) { + v = regs[reg_index(operands[1].reg.value)].value; + } + #if defined(DECODE_AMD_DEBUG) + printf(" | regs[%d] = %lx\n", reg_index(operands[0].reg.value), v); + #endif + regs[reg_index(operands[0].reg.value)].value = v; + regs[reg_index(operands[0].reg.value)].loaded_from = loaded_from; + } + if (instr.mnemonic == ZYDIS_MNEMONIC_ADD && instr.operand_count == 3 && operands[0].type == + ZYDIS_OPERAND_TYPE_REGISTER && operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) { + // todo: do not assume that we're reading a ptr to memory_base + ZyanU64 v = regs[reg_index(operands[0].reg.value)].value + memory_base; + regs[reg_index(operands[0].reg.value)].value = v; + regs[reg_index(operands[0].reg.value)].loaded_from = 0; + #if defined(DECODE_AMD_DEBUG) + printf(" | regs[%d] = %lx\n", reg_index(operands[0].reg.value), v); + #endif } } - return 0; -} +} \ No newline at end of file diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index 55080f246..4ba5d7cac 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -17,14 +17,15 @@ import ( // #include "../../support/ebpf/types.h" import "C" -func decodeStubArgumentWrapperX64(code []byte, argNumber uint8, symbolValue, +func decodeStubArgumentWrapperX64(code []byte, symbolValue, addrBase libpf.SymbolValue) libpf.SymbolValue { + if len(code) == 0 { + return 0 + } return libpf.SymbolValue(C.decode_stub_argument( - (*C.uint8_t)(unsafe.Pointer(&code[0])), C.size_t(len(code)), - C.uint8_t(argNumber), C.uint64_t(symbolValue), C.uint64_t(addrBase))) + (*C.uint8_t)(unsafe.Pointer(&code[0])), C.size_t(len(code)), C.uint64_t(symbolValue), C.uint64_t(addrBase))) } -func decodeStubArgumentWrapper(code []byte, argNumber uint8, symbolValue, - addrBase libpf.SymbolValue) libpf.SymbolValue { - return decodeStubArgumentWrapperX64(code, argNumber, symbolValue, addrBase) +func decodeStubArgumentWrapper(code []byte, symbolValue, addrBase libpf.SymbolValue) libpf.SymbolValue { + return decodeStubArgumentWrapperX64(code, symbolValue, addrBase) } diff --git a/interpreter/python/decode_amd64.h b/interpreter/python/decode_amd64.h index ffa9e737a..dc3991b8a 100644 --- a/interpreter/python/decode_amd64.h +++ b/interpreter/python/decode_amd64.h @@ -8,6 +8,6 @@ #include -uint64_t decode_stub_argument(const uint8_t* code, size_t codesz, uint8_t argument_no, uint64_t rip_base, uint64_t memory_base); +uint64_t decode_stub_argument(const uint8_t* code, size_t code_sz, uint64_t code_address, uint64_t memory_base); #endif diff --git a/interpreter/python/decode_arm64.go b/interpreter/python/decode_arm64.go index a2461af11..749312f66 100644 --- a/interpreter/python/decode_arm64.go +++ b/interpreter/python/decode_arm64.go @@ -9,7 +9,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/libpf" ) -func decodeStubArgumentWrapper(code []byte, argNumber uint8, symbolValue, +func decodeStubArgumentWrapper(code []byte, symbolValue, addrBase libpf.SymbolValue) libpf.SymbolValue { - return decodeStubArgumentWrapperARM64(code, argNumber, symbolValue, addrBase) + return decodeStubArgumentWrapperARM64(code, 0, symbolValue, addrBase) } diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 96ae9dabb..5239726e9 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -42,3 +42,105 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0, 0, 0) assert.Equal(t, libpf.SymbolValue(604), val, "PyGILState_GetThisThreadState test") } + +func TestAmd64(t *testing.T) { + testdata := []struct { + name string + code []byte + mem uint64 + rip uint64 + expected uint64 + }{ + { + name: "cpython 3.12 ", + code: []byte{ + + 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 + 0x53, // push rbx + 0x48, 0x8D, 0x1D, 0x1C, 0x42, 0x37, // lea rbx, __TMC_END__.autoTSSkey + 0x00, // + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0xE8, 0x8C, 0x83, 0x01, 0x00, // call PyThread_tss_is_created + 0x85, 0xC0, // test eax, eax + 0x74, 0x10, // jz short loc_2F1928 + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0x5B, // pop rbx + }, + mem: 0, // not unused + rip: 0x2F1900, + expected: 0x665B28, + }, + { + name: "cpython 3.10 PyGILState_GetThisThreadState", + code: []byte{ + 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 + 0x48, 0x8B, 0x05, 0xB5, 0x2D, // mov rax, cs:runtime << mem + 0x18, 0x00, // + 0x48, 0x83, 0xB8, 0x40, 0x02, // cmp qword ptr [rax+240h], 0 + 0x00, 0x00, 0x00, // + 0x74, 0x13, // jz short loc_209F98 + 0x48, 0x8D, 0xB8, 0x48, 0x02, // lea rdi, [rax+248h] ; key + 0x00, 0x00, // + 0xE9, 0xFF, 0xF9, 0xE5, 0xFF, // jmp _PyThread_tss_get + }, + mem: 0x3C1680, + rip: 0x209F70, + expected: 0x3C18C8, + }, + { + name: "cpython 3.11.2 PyGILState_GetThisThreadState google/cloud-sdk:502.0.0-slim", + code: []byte{ + 0x48, 0x83, 0x3D, 0x00, 0x47, // cmp cs:qword_A5C968, 0 + 0x56, 0x00, 0x00, // + 0x0F, 0x84, 0xEF, 0xC1, 0xF2, // jz loc_42445D + 0xFF, // + 0x8B, 0x3D, 0x00, 0x47, 0x56, // mov edi, cs:dword_A5C974 + 0x00, // + 0xE9, 0x77, 0x84, 0xF2, 0xFF, // jmp _pthread_getspecific + }, + mem: 0, // not used + rip: 0x4F8260, + expected: 0xA5C974, + }, + { + name: "gcloud-sdk 515.0.0 3.12 bundled", + code: []byte{ + 0x53, // push rbx + 0xBB, 0x08, 0x06, 0x00, 0x00, // mov ebx, 608h + 0x48, 0x03, 0x1D, 0xBB, 0x10, 0x1A, 0x01, // add rbx, cs:_PyRuntime_ptr << mem + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0xE8, 0x6B, 0x81, 0xE5, 0xFF, // call _PyThread_tss_is_created + 0x85, 0xC0, // test eax, eax + 0x74, 0x09, // jz short loc_3C89E2 + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0x5B, // pop rbx + 0xE9, 0x3E, 0xA3, 0xE5, 0xFF, // jmp _PyThread_tss_get + }, + mem: 0x16905C0, + rip: 0x3C89C0, + expected: 0x16905C0 + 0x608, + }, + { + name: "gcloud-sdk 502 3.11 bundled", + code: []byte{ + 0x48, 0x8B, 0x05, 0x61, 0x9D, 0x12, 0x01, // mov rax, cs:_PyRuntime_ptr + 0x48, 0x83, 0xB8, 0x48, 0x02, 0x00, 0x00, 0x00, // cmp qword ptr [rax+248h], 0 + 0x74, 0x11, // jz short loc_377D42 + 0xBF, 0x50, 0x02, 0x00, 0x00, // mov edi, 250h + 0x48, 0x03, 0x3D, 0x4B, 0x9D, 0x12, 0x01, // add rdi, cs:_PyRuntime_ptr << mem + 0xE9, 0x5E, 0x6E, 0xE9, 0xFF, // jmp _PyThread_tss_get + }, + mem: 0x15D36F0, + rip: 0x77D20, + expected: 0x15D36F0 + 0x250, + }, + } + + for _, td := range testdata { + t.Run(td.name, func(t *testing.T) { + val := decodeStubArgumentWrapperX64( + td.code, libpf.SymbolValue(td.rip), libpf.SymbolValue(td.mem)) + assert.Equal(t, libpf.SymbolValue(td.expected), val) + }) + } +} diff --git a/interpreter/python/python.go b/interpreter/python/python.go index c73813ead..1c3f7cfc9 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -652,8 +652,7 @@ func (d *pythonData) readIntrospectionData(ef *pfelf.File, symbol libpf.SymbolNa // decodeStub will resolve a given symbol, extract the code for it, and analyze // the code to resolve specified argument parameter to the first jump/call. -func decodeStub(ef *pfelf.File, addrBase libpf.SymbolValue, symbolName libpf.SymbolName, - argNumber uint8) libpf.SymbolValue { +func decodeStub(ef *pfelf.File, addrBase libpf.SymbolValue, symbolName libpf.SymbolName) libpf.SymbolValue { symbolValue, err := ef.LookupSymbolAddress(symbolName) if err != nil { return libpf.SymbolValueInvalid @@ -664,7 +663,7 @@ func decodeStub(ef *pfelf.File, addrBase libpf.SymbolValue, symbolName libpf.Sym return libpf.SymbolValueInvalid } - value := decodeStubArgumentWrapper(code, argNumber, symbolValue, addrBase) + value := decodeStubArgumentWrapper(code, symbolValue, addrBase) // Sanity check the value range and alignment if value%4 != 0 { @@ -733,7 +732,7 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr } // Calls first: PyThread_tss_get(autoTSSKey) - autoTLSKey = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState", 0) + autoTLSKey = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState") if autoTLSKey == libpf.SymbolValueInvalid { return nil, errors.New("unable to resolve autoTLSKey") } From 6a8e9fcb84527ccc35d5c831c9167fffed4df7c2 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Thu, 20 Mar 2025 14:37:15 +0700 Subject: [PATCH 02/26] lint --- interpreter/python/decode_amd64.c | 47 +++++----- interpreter/python/decode_amd64.go | 19 +++- interpreter/python/decode_amd64.h | 3 +- interpreter/python/decode_amd64_test.go | 118 ++++++++++++++++++++++++ interpreter/python/decode_test.go | 102 -------------------- interpreter/python/python.go | 16 ++-- 6 files changed, 165 insertions(+), 140 deletions(-) create mode 100644 interpreter/python/decode_amd64_test.go diff --git a/interpreter/python/decode_amd64.c b/interpreter/python/decode_amd64.c index 684dcf9e0..c6235b092 100644 --- a/interpreter/python/decode_amd64.c +++ b/interpreter/python/decode_amd64.c @@ -9,7 +9,7 @@ // #define DECODE_AMD_DEBUG #if defined(DECODE_AMD_DEBUG) -#include + #include #endif static int reg_index(ZydisRegister reg) @@ -55,10 +55,7 @@ struct reg_state { // todo rewrite in go // todo add coredump tests uint64_t decode_stub_argument( - const uint8_t *code, - size_t code_sz, - uint64_t code_address, - uint64_t memory_base) + const uint8_t *code, size_t code_sz, uint64_t code_address, uint64_t memory_base) { ZydisDecoder decoder; ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); @@ -71,22 +68,20 @@ uint64_t decode_stub_argument( ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; ZyanUSize instruction_offset = 0; struct reg_state regs[32] = {}; - while (ZYAN_SUCCESS( - ZydisDecoderDecodeFull( - &decoder, code + instruction_offset, code_sz - instruction_offset, &instr, operands))) { - #if defined(DECODE_AMD_DEBUG) + while (ZYAN_SUCCESS(ZydisDecoderDecodeFull( + &decoder, code + instruction_offset, code_sz - instruction_offset, &instr, operands))) { +#if defined(DECODE_AMD_DEBUG) ZydisDisassembledInstruction dbgi = {}; - if (ZYAN_SUCCESS( - ZydisDisassembleIntel( - ZYDIS_MACHINE_MODE_LONG_64, - code_address + instruction_offset, - code + instruction_offset, - code_sz - instruction_offset, - &dbgi))) { + if (ZYAN_SUCCESS(ZydisDisassembleIntel( + ZYDIS_MACHINE_MODE_LONG_64, + code_address + instruction_offset, + code + instruction_offset, + code_sz - instruction_offset, + &dbgi))) { printf("%-12p %s\n", (void *)(code_address + instruction_offset), dbgi.text); fflush(stdout); } - #endif +#endif instruction_offset += instr.length; regs[reg_index(ZYDIS_REGISTER_RIP)].value = code_address + instruction_offset; if (instr.mnemonic == ZYDIS_MNEMONIC_CALL || instr.mnemonic == ZYDIS_MNEMONIC_JMP) { @@ -107,7 +102,6 @@ uint64_t decode_stub_argument( if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && operands[1].mem.disp.has_displacement) { ZyanU64 at = regs[reg_index(operands[1].mem.base)].value + operands[1].mem.disp.value; if (instr.mnemonic == ZYDIS_MNEMONIC_MOV) { - // todo: do not assume that we're reading a ptr to memory_base v = memory_base; loaded_from = at; } @@ -118,21 +112,22 @@ uint64_t decode_stub_argument( if (operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER) { v = regs[reg_index(operands[1].reg.value)].value; } - #if defined(DECODE_AMD_DEBUG) +#if defined(DECODE_AMD_DEBUG) printf(" | regs[%d] = %lx\n", reg_index(operands[0].reg.value), v); - #endif +#endif regs[reg_index(operands[0].reg.value)].value = v; regs[reg_index(operands[0].reg.value)].loaded_from = loaded_from; } - if (instr.mnemonic == ZYDIS_MNEMONIC_ADD && instr.operand_count == 3 && operands[0].type == - ZYDIS_OPERAND_TYPE_REGISTER && operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) { - // todo: do not assume that we're reading a ptr to memory_base + if ( + instr.mnemonic == ZYDIS_MNEMONIC_ADD && instr.operand_count == 3 && + operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) { ZyanU64 v = regs[reg_index(operands[0].reg.value)].value + memory_base; - regs[reg_index(operands[0].reg.value)].value = v; + regs[reg_index(operands[0].reg.value)].value = v; regs[reg_index(operands[0].reg.value)].loaded_from = 0; - #if defined(DECODE_AMD_DEBUG) +#if defined(DECODE_AMD_DEBUG) printf(" | regs[%d] = %lx\n", reg_index(operands[0].reg.value), v); - #endif +#endif } } return 0; diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index 4ba5d7cac..e7c754601 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -17,15 +17,24 @@ import ( // #include "../../support/ebpf/types.h" import "C" -func decodeStubArgumentWrapperX64(code []byte, symbolValue, - addrBase libpf.SymbolValue) libpf.SymbolValue { +func decodeStubArgumentWrapperX64( + code []byte, + codeAddress, + memoryBase libpf.SymbolValue, +) libpf.SymbolValue { if len(code) == 0 { return 0 } return libpf.SymbolValue(C.decode_stub_argument( - (*C.uint8_t)(unsafe.Pointer(&code[0])), C.size_t(len(code)), C.uint64_t(symbolValue), C.uint64_t(addrBase))) + (*C.uint8_t)(unsafe.Pointer(&code[0])), + C.size_t(len(code)), + C.uint64_t(codeAddress), + C.uint64_t(memoryBase)), + ) } -func decodeStubArgumentWrapper(code []byte, symbolValue, addrBase libpf.SymbolValue) libpf.SymbolValue { - return decodeStubArgumentWrapperX64(code, symbolValue, addrBase) +func decodeStubArgumentWrapper(code []byte, + codeAddress, memoryBase libpf.SymbolValue, +) libpf.SymbolValue { + return decodeStubArgumentWrapperX64(code, codeAddress, memoryBase) } diff --git a/interpreter/python/decode_amd64.h b/interpreter/python/decode_amd64.h index dc3991b8a..6bd3e0f7a 100644 --- a/interpreter/python/decode_amd64.h +++ b/interpreter/python/decode_amd64.h @@ -8,6 +8,7 @@ #include -uint64_t decode_stub_argument(const uint8_t* code, size_t code_sz, uint64_t code_address, uint64_t memory_base); +uint64_t decode_stub_argument( + const uint8_t *code, size_t code_sz, uint64_t code_address, uint64_t memory_base); #endif diff --git a/interpreter/python/decode_amd64_test.go b/interpreter/python/decode_amd64_test.go new file mode 100644 index 000000000..14c06ea7e --- /dev/null +++ b/interpreter/python/decode_amd64_test.go @@ -0,0 +1,118 @@ +//go:build amd64 + +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package python + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/ebpf-profiler/libpf" +) + +func TestAmd64DecodeStub(t *testing.T) { + testdata := []struct { + name string + code []byte + mem uint64 + rip uint64 + expected uint64 + }{ + { + name: "cpython 3.12 ", + code: []byte{ + + 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 + 0x53, // push rbx + 0x48, 0x8D, 0x1D, 0x1C, 0x42, 0x37, // lea rbx, __TMC_END__.autoTSSkey + 0x00, // + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0xE8, 0x8C, 0x83, 0x01, 0x00, // call PyThread_tss_is_created + 0x85, 0xC0, // test eax, eax + 0x74, 0x10, // jz short loc_2F1928 + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0x5B, // pop rbx + }, + mem: 0, // not unused + rip: 0x2F1900, + expected: 0x665B28, + }, + { + name: "cpython 3.10 PyGILState_GetThisThreadState", + code: []byte{ + 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 + 0x48, 0x8B, 0x05, 0xB5, 0x2D, // mov rax, cs:runtime << mem + 0x18, 0x00, // + 0x48, 0x83, 0xB8, 0x40, 0x02, // cmp qword ptr [rax+240h], 0 + 0x00, 0x00, 0x00, // + 0x74, 0x13, // jz short loc_209F98 + 0x48, 0x8D, 0xB8, 0x48, 0x02, // lea rdi, [rax+248h] ; key + 0x00, 0x00, // + 0xE9, 0xFF, 0xF9, 0xE5, 0xFF, // jmp _PyThread_tss_get + }, + mem: 0x3C1680, + rip: 0x209F70, + expected: 0x3C18C8, + }, + { + name: "cpython 3.11.2 PyGILState_GetThisThreadState google/cloud-sdk:502.0.0-slim", + code: []byte{ + 0x48, 0x83, 0x3D, 0x00, 0x47, // cmp cs:qword_A5C968, 0 + 0x56, 0x00, 0x00, // + 0x0F, 0x84, 0xEF, 0xC1, 0xF2, // jz loc_42445D + 0xFF, // + 0x8B, 0x3D, 0x00, 0x47, 0x56, // mov edi, cs:dword_A5C974 + 0x00, // + 0xE9, 0x77, 0x84, 0xF2, 0xFF, // jmp _pthread_getspecific + }, + mem: 0, // not used + rip: 0x4F8260, + expected: 0xA5C974, + }, + { + name: "gcloud-sdk 515.0.0 3.12 bundled", + code: []byte{ + 0x53, // push rbx + 0xBB, 0x08, 0x06, 0x00, 0x00, // mov ebx, 608h + 0x48, 0x03, 0x1D, 0xBB, 0x10, 0x1A, 0x01, // add rbx, cs:_PyRuntime_ptr << mem + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0xE8, 0x6B, 0x81, 0xE5, 0xFF, // call _PyThread_tss_is_created + 0x85, 0xC0, // test eax, eax + 0x74, 0x09, // jz short loc_3C89E2 + 0x48, 0x89, 0xDF, // mov rdi, rbx + 0x5B, // pop rbx + 0xE9, 0x3E, 0xA3, 0xE5, 0xFF, // jmp _PyThread_tss_get + }, + mem: 0x16905C0, + rip: 0x3C89C0, + expected: 0x16905C0 + 0x608, + }, + { + name: "gcloud-sdk 502 3.11 bundled", + code: []byte{ + 0x48, 0x8B, 0x05, 0x61, 0x9D, 0x12, 0x01, // mov rax, cs:_PyRuntime_ptr + 0x48, 0x83, 0xB8, 0x48, 0x02, 0x00, 0x00, 0x00, // cmp qword ptr [rax+248h], 0 + 0x74, 0x11, // jz short loc_377D42 + 0xBF, 0x50, 0x02, 0x00, 0x00, // mov edi, 250h + 0x48, 0x03, 0x3D, 0x4B, 0x9D, 0x12, 0x01, // add rdi, cs:_PyRuntime_ptr << mem + 0xE9, 0x5E, 0x6E, 0xE9, 0xFF, // jmp _PyThread_tss_get + }, + mem: 0x15D36F0, + rip: 0x77D20, + expected: 0x15D36F0 + 0x250, + }, + } + + for _, td := range testdata { + t.Run(td.name, func(t *testing.T) { + val := decodeStubArgumentWrapperX64( + td.code, + libpf.SymbolValue(td.rip), + libpf.SymbolValue(td.mem), + ) + assert.Equal(t, libpf.SymbolValue(td.expected), val) + }) + } +} diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 5239726e9..96ae9dabb 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -42,105 +42,3 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0, 0, 0) assert.Equal(t, libpf.SymbolValue(604), val, "PyGILState_GetThisThreadState test") } - -func TestAmd64(t *testing.T) { - testdata := []struct { - name string - code []byte - mem uint64 - rip uint64 - expected uint64 - }{ - { - name: "cpython 3.12 ", - code: []byte{ - - 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 - 0x53, // push rbx - 0x48, 0x8D, 0x1D, 0x1C, 0x42, 0x37, // lea rbx, __TMC_END__.autoTSSkey - 0x00, // - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0xE8, 0x8C, 0x83, 0x01, 0x00, // call PyThread_tss_is_created - 0x85, 0xC0, // test eax, eax - 0x74, 0x10, // jz short loc_2F1928 - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0x5B, // pop rbx - }, - mem: 0, // not unused - rip: 0x2F1900, - expected: 0x665B28, - }, - { - name: "cpython 3.10 PyGILState_GetThisThreadState", - code: []byte{ - 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 - 0x48, 0x8B, 0x05, 0xB5, 0x2D, // mov rax, cs:runtime << mem - 0x18, 0x00, // - 0x48, 0x83, 0xB8, 0x40, 0x02, // cmp qword ptr [rax+240h], 0 - 0x00, 0x00, 0x00, // - 0x74, 0x13, // jz short loc_209F98 - 0x48, 0x8D, 0xB8, 0x48, 0x02, // lea rdi, [rax+248h] ; key - 0x00, 0x00, // - 0xE9, 0xFF, 0xF9, 0xE5, 0xFF, // jmp _PyThread_tss_get - }, - mem: 0x3C1680, - rip: 0x209F70, - expected: 0x3C18C8, - }, - { - name: "cpython 3.11.2 PyGILState_GetThisThreadState google/cloud-sdk:502.0.0-slim", - code: []byte{ - 0x48, 0x83, 0x3D, 0x00, 0x47, // cmp cs:qword_A5C968, 0 - 0x56, 0x00, 0x00, // - 0x0F, 0x84, 0xEF, 0xC1, 0xF2, // jz loc_42445D - 0xFF, // - 0x8B, 0x3D, 0x00, 0x47, 0x56, // mov edi, cs:dword_A5C974 - 0x00, // - 0xE9, 0x77, 0x84, 0xF2, 0xFF, // jmp _pthread_getspecific - }, - mem: 0, // not used - rip: 0x4F8260, - expected: 0xA5C974, - }, - { - name: "gcloud-sdk 515.0.0 3.12 bundled", - code: []byte{ - 0x53, // push rbx - 0xBB, 0x08, 0x06, 0x00, 0x00, // mov ebx, 608h - 0x48, 0x03, 0x1D, 0xBB, 0x10, 0x1A, 0x01, // add rbx, cs:_PyRuntime_ptr << mem - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0xE8, 0x6B, 0x81, 0xE5, 0xFF, // call _PyThread_tss_is_created - 0x85, 0xC0, // test eax, eax - 0x74, 0x09, // jz short loc_3C89E2 - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0x5B, // pop rbx - 0xE9, 0x3E, 0xA3, 0xE5, 0xFF, // jmp _PyThread_tss_get - }, - mem: 0x16905C0, - rip: 0x3C89C0, - expected: 0x16905C0 + 0x608, - }, - { - name: "gcloud-sdk 502 3.11 bundled", - code: []byte{ - 0x48, 0x8B, 0x05, 0x61, 0x9D, 0x12, 0x01, // mov rax, cs:_PyRuntime_ptr - 0x48, 0x83, 0xB8, 0x48, 0x02, 0x00, 0x00, 0x00, // cmp qword ptr [rax+248h], 0 - 0x74, 0x11, // jz short loc_377D42 - 0xBF, 0x50, 0x02, 0x00, 0x00, // mov edi, 250h - 0x48, 0x03, 0x3D, 0x4B, 0x9D, 0x12, 0x01, // add rdi, cs:_PyRuntime_ptr << mem - 0xE9, 0x5E, 0x6E, 0xE9, 0xFF, // jmp _PyThread_tss_get - }, - mem: 0x15D36F0, - rip: 0x77D20, - expected: 0x15D36F0 + 0x250, - }, - } - - for _, td := range testdata { - t.Run(td.name, func(t *testing.T) { - val := decodeStubArgumentWrapperX64( - td.code, libpf.SymbolValue(td.rip), libpf.SymbolValue(td.mem)) - assert.Equal(t, libpf.SymbolValue(td.expected), val) - }) - } -} diff --git a/interpreter/python/python.go b/interpreter/python/python.go index 1c3f7cfc9..0c74dc84b 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -652,29 +652,33 @@ func (d *pythonData) readIntrospectionData(ef *pfelf.File, symbol libpf.SymbolNa // decodeStub will resolve a given symbol, extract the code for it, and analyze // the code to resolve specified argument parameter to the first jump/call. -func decodeStub(ef *pfelf.File, addrBase libpf.SymbolValue, symbolName libpf.SymbolName) libpf.SymbolValue { - symbolValue, err := ef.LookupSymbolAddress(symbolName) +func decodeStub( + ef *pfelf.File, + memoryBase libpf.SymbolValue, + symbolName libpf.SymbolName, +) libpf.SymbolValue { + codeAddress, err := ef.LookupSymbolAddress(symbolName) if err != nil { return libpf.SymbolValueInvalid } code := make([]byte, 64) - if _, err := ef.ReadVirtualMemory(code, int64(symbolValue)); err != nil { + if _, err := ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { return libpf.SymbolValueInvalid } - value := decodeStubArgumentWrapper(code, symbolValue, addrBase) + value := decodeStubArgumentWrapper(code, codeAddress, memoryBase) // Sanity check the value range and alignment if value%4 != 0 { return libpf.SymbolValueInvalid } // If base symbol (_PyRuntime) is not provided, accept any found value. - if addrBase == 0 && value != 0 { + if memoryBase == 0 && value != 0 { return value } // Check that the found value is within reasonable distance from the given symbol. - if value > addrBase && value < addrBase+4096 { + if value > memoryBase && value < memoryBase+4096 { return value } return libpf.SymbolValueInvalid From aa5ef11acfbc05208e43aa2bc001ef12c880b62c Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Thu, 20 Mar 2025 23:53:53 +0700 Subject: [PATCH 03/26] more tests, debug dump code on failure --- interpreter/python/decode_amd64_test.go | 190 ++++++++++++++++-------- interpreter/python/python.go | 6 + 2 files changed, 131 insertions(+), 65 deletions(-) diff --git a/interpreter/python/decode_amd64_test.go b/interpreter/python/decode_amd64_test.go index 14c06ea7e..eedcd3c16 100644 --- a/interpreter/python/decode_amd64_test.go +++ b/interpreter/python/decode_amd64_test.go @@ -16,92 +16,152 @@ func TestAmd64DecodeStub(t *testing.T) { testdata := []struct { name string code []byte - mem uint64 rip uint64 expected uint64 }{ { - name: "cpython 3.12 ", + name: "3.10.16 gcc12 enable-optimizations disable-shared", code: []byte{ - - 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 - 0x53, // push rbx - 0x48, 0x8D, 0x1D, 0x1C, 0x42, 0x37, // lea rbx, __TMC_END__.autoTSSkey - 0x00, // - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0xE8, 0x8C, 0x83, 0x01, 0x00, // call PyThread_tss_is_created - 0x85, 0xC0, // test eax, eax - 0x74, 0x10, // jz short loc_2F1928 - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0x5B, // pop rbx + 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 + 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> + 0x00, // 1bbbab: + 0x74, 0x0b, // 1bbbac: je 1bbbb9 + 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> + 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 + }, + rip: 0x1bbba0, + expected: 0x3a4c2c, + }, + { + name: "3.10.16 gcc12 disable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 172e50: endbr64 + 0x48, 0x83, 0x3d, 0x04, 0xef, 0x24, 0x00, // 172e54: cmp QWORD PTR [rip+0x24ef04],0x0 # 3c1d60 <_PyRuntime+0x240> + 0x00, // 172e5b: + 0x74, 0x12, // 172e5c: je 172e70 + 0x48, 0x8d, 0x3d, 0x03, 0xef, 0x24, 0x00, // 172e5e: lea rdi,[rip+0x24ef03] # 3c1d68 <_PyRuntime+0x248> + 0xe9, 0x86, 0x1e, 0x01, 0x00, // 172e65: jmp 184cf0 + }, + rip: 0x172e50, + expected: 0x3c1d68, + }, + { + name: "3.10.16 clang16 disable-optimizations enabled-shared", + code: []byte{ + 0x48, 0x8b, 0x05, 0x99, 0x70, 0x16, 0x00, // 1adc90: mov rax,QWORD PTR [rip+0x167099] # 314d30 <_PyRuntime@@Base-0x33668> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1adc97: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 1adc9e: + 0x74, 0x11, // 1adc9f: je 1adcb2 + 0xbf, 0x48, 0x02, 0x00, 0x00, // 1adca1: mov edi,0x248 + 0x48, 0x03, 0x3d, 0x83, 0x70, 0x16, 0x00, // 1adca6: add rdi,QWORD PTR [rip+0x167083] # 314d30 <_PyRuntime@@Base-0x33668> + 0xe9, 0x2e, 0x41, 0xeb, 0xff, // 1adcad: jmp 61de0 + }, + rip: 0x1adc90, + expected: 0x248, + }, + { + name: "3.12.8 gcc12 disable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2e25d0: endbr64 + 0x48, 0x8b, 0x05, 0x25, 0x27, 0x27, 0x00, // 2e25d4: mov rax,QWORD PTR [rip+0x272725] # 554d00 <_PyRuntime@@Base-0x1004e0> + 0x53, // 2e25db: push rbx + 0x48, 0x8d, 0x98, 0x08, 0x06, 0x00, 0x00, // 2e25dc: lea rbx,[rax+0x608] + 0x48, 0x89, 0xdf, // 2e25e3: mov rdi,rbx + 0xe8, 0x95, 0x78, 0xe2, 0xff, // 2e25e6: call 109e80 + }, + rip: 0x2e25d0, + expected: 0x608, + }, + { + name: "3.10.16 clang18 enable-optimizations enabled-shared", + code: []byte{ + 0x48, 0x8b, 0x05, 0xd9, 0x80, 0x31, 0x00, // cac50: mov rax,QWORD PTR [rip+0x3180d9] # 3e2d30 <_PyRuntime@@Base-0x32c28> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // cac57: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // cac5e: + 0x74, 0x0b, // cac5f: je cac6c + 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // cac61: mov edi,DWORD PTR [rax+0x24c] + 0xe9, 0x24, 0x55, 0xf9, 0xff, // cac67: jmp 60190 + }, + rip: 0xcac50, + expected: 0x24c, + }, + { + name: "3.10.16 clang18 enable-optimizations disable-shared", + code: []byte{ + 0x48, 0x83, 0x3d, 0x98, 0xc5, 0x36, 0x00, // 92000: cmp QWORD PTR [rip+0x36c598],0x0 # 3fe5a0 <_PyRuntime+0x240> + 0x00, // 92007: + 0x74, 0x0b, // 92008: je 92015 + 0x8b, 0x3d, 0x9c, 0xc5, 0x36, 0x00, // 9200a: mov edi,DWORD PTR [rip+0x36c59c] # 3fe5ac <_PyRuntime+0x24c> + 0xe9, 0x4b, 0x70, 0xfc, 0xff, // 92010: jmp 59060 + }, + rip: 0x92000, + expected: 0x3fe5ac, + }, + { + name: "3.10.16 clang16 disable-optimizations disable-shared", + code: []byte{ + 0x48, 0x8d, 0x05, 0x69, 0x19, 0x21, 0x00, // 129bc0: lea rax,[rip+0x211969] # 33b530 <_PyRuntime> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 129bc7: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 129bce: + 0x74, 0x11, // 129bcf: je 129be2 + 0xbf, 0x48, 0x02, 0x00, 0x00, // 129bd1: mov edi,0x248 + 0x48, 0x03, 0x3d, 0x53, 0x03, 0x1e, 0x00, // 129bd6: add rdi,QWORD PTR [rip+0x1e0353] # 309f30 <_DYNAMIC+0x328> + 0xe9, 0x8e, 0xec, 0x00, 0x00, // 129bdd: jmp 138870 }, - mem: 0, // not unused - rip: 0x2F1900, - expected: 0x665B28, + rip: 0x129bc0, + expected: 0x248, }, { - name: "cpython 3.10 PyGILState_GetThisThreadState", + name: "3.12.8 clang16 disable-optimizations disable-shared", code: []byte{ - 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 - 0x48, 0x8B, 0x05, 0xB5, 0x2D, // mov rax, cs:runtime << mem - 0x18, 0x00, // - 0x48, 0x83, 0xB8, 0x40, 0x02, // cmp qword ptr [rax+240h], 0 - 0x00, 0x00, 0x00, // - 0x74, 0x13, // jz short loc_209F98 - 0x48, 0x8D, 0xB8, 0x48, 0x02, // lea rdi, [rax+248h] ; key - 0x00, 0x00, // - 0xE9, 0xFF, 0xF9, 0xE5, 0xFF, // jmp _PyThread_tss_get + 0x53, // 2a20d0: push rbx + 0xbb, 0x08, 0x06, 0x00, 0x00, // 2a20d1: mov ebx,0x608 + 0x48, 0x03, 0x1d, 0x0b, 0x1e, 0x25, 0x00, // 2a20d6: add rbx,QWORD PTR [rip+0x251e0b] # 4f3ee8 <_DYNAMIC+0x368> + 0x48, 0x89, 0xdf, // 2a20dd: mov rdi,rbx + 0xe8, 0x7b, 0x41, 0x01, 0x00, // 2a20e0: call 2b6260 }, - mem: 0x3C1680, - rip: 0x209F70, - expected: 0x3C18C8, + rip: 0x2a20d0, + expected: 0x608, }, { - name: "cpython 3.11.2 PyGILState_GetThisThreadState google/cloud-sdk:502.0.0-slim", + name: "3.10.16 clang16 disable-optimizations enabled-shared", code: []byte{ - 0x48, 0x83, 0x3D, 0x00, 0x47, // cmp cs:qword_A5C968, 0 - 0x56, 0x00, 0x00, // - 0x0F, 0x84, 0xEF, 0xC1, 0xF2, // jz loc_42445D - 0xFF, // - 0x8B, 0x3D, 0x00, 0x47, 0x56, // mov edi, cs:dword_A5C974 - 0x00, // - 0xE9, 0x77, 0x84, 0xF2, 0xFF, // jmp _pthread_getspecific + 0xf3, 0x0f, 0x1e, 0xfa, // 2079c0: endbr64 + 0x48, 0x8b, 0x05, 0x65, 0x03, 0x18, 0x00, // 2079c4: mov rax,QWORD PTR [rip+0x180365] # 387d30 <_PyRuntime@@Base-0x34950> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 2079cb: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 2079d2: + 0x74, 0x13, // 2079d3: je 2079e8 + 0x48, 0x8d, 0xb8, 0x48, 0x02, 0x00, 0x00, // 2079d5: lea rdi,[rax+0x248] + 0xe9, 0x8f, 0x1f, 0xe6, 0xff, // 2079dc: jmp 69970 }, - mem: 0, // not used - rip: 0x4F8260, - expected: 0xA5C974, + rip: 0x2079c0, + expected: 0x248, }, { - name: "gcloud-sdk 515.0.0 3.12 bundled", + name: "3.12.8 gcc12 disable-optimizations disable-shared", code: []byte{ - 0x53, // push rbx - 0xBB, 0x08, 0x06, 0x00, 0x00, // mov ebx, 608h - 0x48, 0x03, 0x1D, 0xBB, 0x10, 0x1A, 0x01, // add rbx, cs:_PyRuntime_ptr << mem - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0xE8, 0x6B, 0x81, 0xE5, 0xFF, // call _PyThread_tss_is_created - 0x85, 0xC0, // test eax, eax - 0x74, 0x09, // jz short loc_3C89E2 - 0x48, 0x89, 0xDF, // mov rdi, rbx - 0x5B, // pop rbx - 0xE9, 0x3E, 0xA3, 0xE5, 0xFF, // jmp _PyThread_tss_get + 0xf3, 0x0f, 0x1e, 0xfa, // 2eb960: endbr64 + 0x53, // 2eb964: push rbx + 0x48, 0x8d, 0x1d, 0xbc, 0x21, 0x37, 0x00, // 2eb965: lea rbx,[rip+0x3721bc] # 65db28 <_PyRuntime+0x608> + 0x48, 0x89, 0xdf, // 2eb96c: mov rdi,rbx + 0xe8, 0x0c, 0x7f, 0x01, 0x00, // 2eb96f: call 303880 }, - mem: 0x16905C0, - rip: 0x3C89C0, - expected: 0x16905C0 + 0x608, + rip: 0x2eb960, + expected: 0x65db28, }, { - name: "gcloud-sdk 502 3.11 bundled", + name: "3.10.16 gcc12 enable-optimizations enabled-shared", code: []byte{ - 0x48, 0x8B, 0x05, 0x61, 0x9D, 0x12, 0x01, // mov rax, cs:_PyRuntime_ptr - 0x48, 0x83, 0xB8, 0x48, 0x02, 0x00, 0x00, 0x00, // cmp qword ptr [rax+248h], 0 - 0x74, 0x11, // jz short loc_377D42 - 0xBF, 0x50, 0x02, 0x00, 0x00, // mov edi, 250h - 0x48, 0x03, 0x3D, 0x4B, 0x9D, 0x12, 0x01, // add rdi, cs:_PyRuntime_ptr << mem - 0xE9, 0x5E, 0x6E, 0xE9, 0xFF, // jmp _PyThread_tss_get + 0xf3, 0x0f, 0x1e, 0xfa, // 1c03c0: endbr64 + 0x48, 0x8b, 0x05, 0x5d, 0x69, 0x1b, 0x00, // 1c03c4: mov rax,QWORD PTR [rip+0x1b695d] # 376d28 <_PyRuntime@@Base-0x32758> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1c03cb: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 1c03d2: + 0x74, 0x0b, // 1c03d3: je 1c03e0 + 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // 1c03d5: mov edi,DWORD PTR [rax+0x24c] + 0xe9, 0x10, 0xb4, 0xe9, 0xff, // 1c03db: jmp 5b7f0 }, - mem: 0x15D36F0, - rip: 0x77D20, - expected: 0x15D36F0 + 0x250, + rip: 0x1c03c0, + expected: 0x24c, }, } @@ -110,7 +170,7 @@ func TestAmd64DecodeStub(t *testing.T) { val := decodeStubArgumentWrapperX64( td.code, libpf.SymbolValue(td.rip), - libpf.SymbolValue(td.mem), + libpf.SymbolValue(0), // NULL pointer as mem ) assert.Equal(t, libpf.SymbolValue(td.expected), val) }) diff --git a/interpreter/python/python.go b/interpreter/python/python.go index 0c74dc84b..d0fa89538 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -6,6 +6,7 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( "bytes" "debug/elf" + "encoding/hex" "errors" "fmt" "hash/fnv" @@ -666,11 +667,15 @@ func decodeStub( if _, err := ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { return libpf.SymbolValueInvalid } + dumpCode := func() { + log.Debugf("python stub code: %s", hex.Dump(code)) + } value := decodeStubArgumentWrapper(code, codeAddress, memoryBase) // Sanity check the value range and alignment if value%4 != 0 { + dumpCode() return libpf.SymbolValueInvalid } // If base symbol (_PyRuntime) is not provided, accept any found value. @@ -681,6 +686,7 @@ func decodeStub( if value > memoryBase && value < memoryBase+4096 { return value } + dumpCode() return libpf.SymbolValueInvalid } From b0d9b46a83b9cfe80a83288e64f047629bcbe6eb Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 21 Mar 2025 11:18:40 +0700 Subject: [PATCH 04/26] lint --- interpreter/python/decode_amd64_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/interpreter/python/decode_amd64_test.go b/interpreter/python/decode_amd64_test.go index eedcd3c16..9b267b7dc 100644 --- a/interpreter/python/decode_amd64_test.go +++ b/interpreter/python/decode_amd64_test.go @@ -3,6 +3,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +//nolint:lll package python import ( From d1bf67ce228e52a5ce1defa72edfa66ff4aafb19 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 21 Mar 2025 10:54:50 +0700 Subject: [PATCH 05/26] rewrite in go --- interpreter/python/decode_amd64.c | 134 ------------------- interpreter/python/decode_amd64.go | 164 +++++++++++++++++++++--- interpreter/python/decode_amd64.h | 14 -- interpreter/python/decode_amd64_test.go | 39 +++++- 4 files changed, 175 insertions(+), 176 deletions(-) delete mode 100644 interpreter/python/decode_amd64.c delete mode 100644 interpreter/python/decode_amd64.h diff --git a/interpreter/python/decode_amd64.c b/interpreter/python/decode_amd64.c deleted file mode 100644 index c6235b092..000000000 --- a/interpreter/python/decode_amd64.c +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build amd64 - -#include "../../zydis/Zydis.h" -#include "decode_amd64.h" - -// #define DECODE_AMD_DEBUG - -#if defined(DECODE_AMD_DEBUG) - #include -#endif - -static int reg_index(ZydisRegister reg) -{ - switch (reg) { - case ZYDIS_REGISTER_RAX: - case ZYDIS_REGISTER_EAX: return 1; - case ZYDIS_REGISTER_RBX: - case ZYDIS_REGISTER_EBX: return 2; - case ZYDIS_REGISTER_RCX: - case ZYDIS_REGISTER_ECX: return 3; - case ZYDIS_REGISTER_RDX: - case ZYDIS_REGISTER_EDX: return 4; - case ZYDIS_REGISTER_RDI: - case ZYDIS_REGISTER_EDI: return 5; - case ZYDIS_REGISTER_RSI: - case ZYDIS_REGISTER_ESI: return 6; - case ZYDIS_REGISTER_RBP: - case ZYDIS_REGISTER_EBP: return 7; - case ZYDIS_REGISTER_RSP: - case ZYDIS_REGISTER_ESP: return 8; - case ZYDIS_REGISTER_RIP: return 9; - default: return 0; - } -} - -struct reg_state { - ZyanU64 loaded_from; - ZyanU64 value; -}; - -// decode_stub_argument() will decode instructions from given code blob until an assignment -// for the given argument register is found. The value loaded is then determined from the -// opcode. A call/jump instruction will terminate the finding as we are finding the argument -// to first function call (or tail call). -// Currently the following addressing schemes for the assignment are supported: -// 1) Loading virtual address with immediate value. This happens for non-PIC globals. -// 2) Loading RIP-relative virtual address. Happens for PIC/PIE globals. -// 3) Loading via pointer + displacement. Happens when the main state is given as argument, -// and the value is loaded from it. In this case 'memory_base' should be the address of -// the global state variable. -// todo update comment -// todo rewrite in go -// todo add coredump tests -uint64_t decode_stub_argument( - const uint8_t *code, size_t code_sz, uint64_t code_address, uint64_t memory_base) -{ - ZydisDecoder decoder; - ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - - // Argument number to x86_64 calling convention register mapping. - ZydisRegister target_register64 = ZYDIS_REGISTER_RDI; - - // Iterate instructions - ZydisDecodedInstruction instr; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - ZyanUSize instruction_offset = 0; - struct reg_state regs[32] = {}; - while (ZYAN_SUCCESS(ZydisDecoderDecodeFull( - &decoder, code + instruction_offset, code_sz - instruction_offset, &instr, operands))) { -#if defined(DECODE_AMD_DEBUG) - ZydisDisassembledInstruction dbgi = {}; - if (ZYAN_SUCCESS(ZydisDisassembleIntel( - ZYDIS_MACHINE_MODE_LONG_64, - code_address + instruction_offset, - code + instruction_offset, - code_sz - instruction_offset, - &dbgi))) { - printf("%-12p %s\n", (void *)(code_address + instruction_offset), dbgi.text); - fflush(stdout); - } -#endif - instruction_offset += instr.length; - regs[reg_index(ZYDIS_REGISTER_RIP)].value = code_address + instruction_offset; - if (instr.mnemonic == ZYDIS_MNEMONIC_CALL || instr.mnemonic == ZYDIS_MNEMONIC_JMP) { - if (regs[reg_index(target_register64)].loaded_from) { - return regs[reg_index(target_register64)].loaded_from; - } - return regs[reg_index(target_register64)].value; - } - if ( - (instr.mnemonic == ZYDIS_MNEMONIC_LEA || instr.mnemonic == ZYDIS_MNEMONIC_MOV) && - operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER) { - - ZyanU64 v = 0; - ZyanU64 loaded_from = 0; - if (operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) { - v = operands[1].imm.value.u; - } - if (operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && operands[1].mem.disp.has_displacement) { - ZyanU64 at = regs[reg_index(operands[1].mem.base)].value + operands[1].mem.disp.value; - if (instr.mnemonic == ZYDIS_MNEMONIC_MOV) { - v = memory_base; - loaded_from = at; - } - if (instr.mnemonic == ZYDIS_MNEMONIC_LEA) { - v = at; - } - } - if (operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER) { - v = regs[reg_index(operands[1].reg.value)].value; - } -#if defined(DECODE_AMD_DEBUG) - printf(" | regs[%d] = %lx\n", reg_index(operands[0].reg.value), v); -#endif - regs[reg_index(operands[0].reg.value)].value = v; - regs[reg_index(operands[0].reg.value)].loaded_from = loaded_from; - } - if ( - instr.mnemonic == ZYDIS_MNEMONIC_ADD && instr.operand_count == 3 && - operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY) { - ZyanU64 v = regs[reg_index(operands[0].reg.value)].value + memory_base; - regs[reg_index(operands[0].reg.value)].value = v; - regs[reg_index(operands[0].reg.value)].loaded_from = 0; -#if defined(DECODE_AMD_DEBUG) - printf(" | regs[%d] = %lx\n", reg_index(operands[0].reg.value), v); -#endif - } - } - return 0; -} \ No newline at end of file diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index e7c754601..eb3e3e91e 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -1,40 +1,162 @@ -//go:build amd64 - // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( - "unsafe" + "fmt" "go.opentelemetry.io/ebpf-profiler/libpf" - _ "go.opentelemetry.io/ebpf-profiler/zydis" // links Zydis + "golang.org/x/arch/x86/x86asm" ) -// #cgo CFLAGS: -g -Wall -// #include "decode_amd64.h" -// #include "../../support/ebpf/types.h" -import "C" +const debugDecodeAMD64 = false + +func regIndex(reg x86asm.Reg) int { + switch reg { + case x86asm.RAX, x86asm.EAX: + return 1 + case x86asm.RBX, x86asm.EBX: + return 2 + case x86asm.RCX, x86asm.ECX: + return 3 + case x86asm.RDX, x86asm.EDX: + return 4 + case x86asm.RDI, x86asm.EDI: + return 5 + case x86asm.RSI, x86asm.ESI: + return 6 + case x86asm.RBP, x86asm.EBP: + return 7 + case x86asm.RSP, x86asm.ESP: + return 8 + case x86asm.RIP: + return 9 + default: + return 0 + } +} + +type regState struct { + loadedFrom uint64 + value uint64 +} + +func decodeStubArgumentAMD64(code []byte, codeAddress uint64, memoryBase uint64) uint64 { + targetRegister := x86asm.RDI + + instructionOffset := 0 + regs := [32]regState{} + + for instructionOffset < len(code) { + rem := code[instructionOffset:] + if len(rem) >= 4 && + code[instructionOffset] == 0xf3 && + code[instructionOffset+1] == 0x0f && + code[instructionOffset+2] == 0x1e && + code[instructionOffset+3] == 0xfa { + if debugDecodeAMD64 { + fmt.Printf("0x%x: endbr64 (special case)\n", codeAddress+uint64(instructionOffset)) + } + instructionOffset += 4 + continue + } + + inst, err := x86asm.Decode(rem, 64) + if err != nil { + if debugDecodeAMD64 { + fmt.Printf("Error decoding instruction at offset %d: %v\n", instructionOffset, err) + } + break + } + + if debugDecodeAMD64 { + fmt.Printf("0x%x: %s\n", codeAddress+uint64(instructionOffset), inst.String()) + } + + instructionOffset += inst.Len + regs[regIndex(x86asm.RIP)].value = codeAddress + uint64(instructionOffset) + + if inst.Op == x86asm.CALL || inst.Op == x86asm.JMP { + targetRegIdx := regIndex(targetRegister) + if regs[targetRegIdx].loadedFrom != 0 { + return regs[targetRegIdx].loadedFrom + } + return regs[targetRegIdx].value + } -func decodeStubArgumentWrapperX64( + if (inst.Op == x86asm.LEA || inst.Op == x86asm.MOV) && inst.Args[0] != nil { + if reg, ok := inst.Args[0].(x86asm.Reg); ok { + regIdx := regIndex(reg) + var value uint64 + var loadedFrom uint64 + + switch src := inst.Args[1].(type) { + case x86asm.Imm: + value = uint64(src) + case x86asm.Mem: + baseReg := src.Base + baseAddr := regs[regIndex(baseReg)].value + displacement := uint64(src.Disp) + + if baseReg == x86asm.RIP { + baseAddr = codeAddress + uint64(instructionOffset) + } + + if inst.Op == x86asm.MOV { + value = memoryBase + loadedFrom = baseAddr + displacement + } else if inst.Op == x86asm.LEA { + value = baseAddr + displacement + } + + if src.Index != 0 { + indexValue := regs[regIndex(src.Index)].value + value += indexValue * uint64(src.Scale) + if debugDecodeAMD64 { + fmt.Printf(" Adding scaled index: index=%s (0x%x) * scale=%d = 0x%x\n", + src.Index, indexValue, src.Scale, indexValue*uint64(src.Scale)) + fmt.Printf(" Updated value: 0x%x\n", value) + } + } + + case x86asm.Reg: + value = regs[regIndex(src)].value + } + + if debugDecodeAMD64 { + fmt.Printf(" Setting register %s: value=0x%x, loaded from=0x%x\n", reg, value, loadedFrom) + } + + regs[regIdx].value = value + regs[regIdx].loadedFrom = loadedFrom + } + } + + if inst.Op == x86asm.ADD && inst.Args[0] != nil && inst.Args[1] != nil { + if reg, ok := inst.Args[0].(x86asm.Reg); ok { + if _, ok := inst.Args[1].(x86asm.Mem); ok { + regIdx := regIndex(reg) + oldValue := regs[regIdx].value + value := oldValue + memoryBase + regs[regIdx].value = value + regs[regIdx].loadedFrom = 0 + } + } + } + } + return 0 +} + +func decodeStubArgumentWrapper( code []byte, - codeAddress, + codeAddress libpf.SymbolValue, memoryBase libpf.SymbolValue, ) libpf.SymbolValue { if len(code) == 0 { return 0 } - return libpf.SymbolValue(C.decode_stub_argument( - (*C.uint8_t)(unsafe.Pointer(&code[0])), - C.size_t(len(code)), - C.uint64_t(codeAddress), - C.uint64_t(memoryBase)), + return libpf.SymbolValue( + decodeStubArgumentAMD64(code, uint64(codeAddress), uint64(memoryBase)), ) } - -func decodeStubArgumentWrapper(code []byte, - codeAddress, memoryBase libpf.SymbolValue, -) libpf.SymbolValue { - return decodeStubArgumentWrapperX64(code, codeAddress, memoryBase) -} diff --git a/interpreter/python/decode_amd64.h b/interpreter/python/decode_amd64.h deleted file mode 100644 index 6bd3e0f7a..000000000 --- a/interpreter/python/decode_amd64.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build amd64 - -#ifndef __PYTHON_DECODE_X86_64__ -#define __PYTHON_DECODE_X86_64__ - -#include - -uint64_t decode_stub_argument( - const uint8_t *code, size_t code_sz, uint64_t code_address, uint64_t memory_base); - -#endif diff --git a/interpreter/python/decode_amd64_test.go b/interpreter/python/decode_amd64_test.go index 9b267b7dc..c1c6adc01 100644 --- a/interpreter/python/decode_amd64_test.go +++ b/interpreter/python/decode_amd64_test.go @@ -1,5 +1,3 @@ -//go:build amd64 - // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 @@ -10,9 +8,30 @@ import ( "testing" "github.com/stretchr/testify/assert" - "go.opentelemetry.io/ebpf-profiler/libpf" ) +func BenchmarkDecodeAmd64(b *testing.B) { + for i := 0; i < b.N; i++ { + code := []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 + 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> + 0x00, // 1bbbab: + 0x74, 0x0b, // 1bbbac: je 1bbbb9 + 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> + 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 + } + rip := uint64(0x1bbba0) + val := decodeStubArgumentAMD64( + code, + rip, + 0, + ) + if val != 0x3a4c2c { + b.Fail() + } + } +} + func TestAmd64DecodeStub(t *testing.T) { testdata := []struct { name string @@ -168,12 +187,18 @@ func TestAmd64DecodeStub(t *testing.T) { for _, td := range testdata { t.Run(td.name, func(t *testing.T) { - val := decodeStubArgumentWrapperX64( + val := decodeStubArgumentAMD64( td.code, - libpf.SymbolValue(td.rip), - libpf.SymbolValue(0), // NULL pointer as mem + td.rip, + 0, // NULL pointer as mem ) - assert.Equal(t, libpf.SymbolValue(td.expected), val) + assert.Equal(t, td.expected, val) }) } } + +func FuzzDecodeAmd(f *testing.F) { + f.Fuzz(func(t *testing.T, code []byte, rip uint64) { + decodeStubArgumentAMD64(code, rip, 0) + }) +} From 3eff3a8c955473c14763192614a51f86c531707c Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 4 Apr 2025 20:50:03 +0700 Subject: [PATCH 06/26] lint --- interpreter/python/decode_amd64.go | 5 +++-- interpreter/python/decode_amd64_test.go | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index eb3e3e91e..824f7acd4 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -42,7 +42,7 @@ type regState struct { value uint64 } -func decodeStubArgumentAMD64(code []byte, codeAddress uint64, memoryBase uint64) uint64 { +func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 { targetRegister := x86asm.RDI instructionOffset := 0 @@ -125,7 +125,8 @@ func decodeStubArgumentAMD64(code []byte, codeAddress uint64, memoryBase uint64) } if debugDecodeAMD64 { - fmt.Printf(" Setting register %s: value=0x%x, loaded from=0x%x\n", reg, value, loadedFrom) + fmt.Printf(" Setting register %s: value=0x%x, loaded from=0x%x\n", + reg, value, loadedFrom) } regs[regIdx].value = value diff --git a/interpreter/python/decode_amd64_test.go b/interpreter/python/decode_amd64_test.go index c1c6adc01..54a9f10a4 100644 --- a/interpreter/python/decode_amd64_test.go +++ b/interpreter/python/decode_amd64_test.go @@ -198,7 +198,7 @@ func TestAmd64DecodeStub(t *testing.T) { } func FuzzDecodeAmd(f *testing.F) { - f.Fuzz(func(t *testing.T, code []byte, rip uint64) { + f.Fuzz(func(_ *testing.T, code []byte, rip uint64) { decodeStubArgumentAMD64(code, rip, 0) }) } From 93e569ea8cc73dca2504d45a98494b7ac70f2df6 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Fri, 4 Apr 2025 20:55:27 +0700 Subject: [PATCH 07/26] legal --- LICENSES/golang.org/x/arch/{arm64/arm64asm => }/LICENSE | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename LICENSES/golang.org/x/arch/{arm64/arm64asm => }/LICENSE (100%) diff --git a/LICENSES/golang.org/x/arch/arm64/arm64asm/LICENSE b/LICENSES/golang.org/x/arch/LICENSE similarity index 100% rename from LICENSES/golang.org/x/arch/arm64/arm64asm/LICENSE rename to LICENSES/golang.org/x/arch/LICENSE From b4e7b576a7fd01391a2d0111428e4535bc14d28f Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 16:07:14 +0700 Subject: [PATCH 08/26] remove debug printings --- interpreter/python/decode_amd64.go | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index 824f7acd4..91a03e666 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -4,14 +4,10 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( - "fmt" - "go.opentelemetry.io/ebpf-profiler/libpf" "golang.org/x/arch/x86/x86asm" ) -const debugDecodeAMD64 = false - func regIndex(reg x86asm.Reg) int { switch reg { case x86asm.RAX, x86asm.EAX: @@ -55,25 +51,15 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 code[instructionOffset+1] == 0x0f && code[instructionOffset+2] == 0x1e && code[instructionOffset+3] == 0xfa { - if debugDecodeAMD64 { - fmt.Printf("0x%x: endbr64 (special case)\n", codeAddress+uint64(instructionOffset)) - } instructionOffset += 4 continue } inst, err := x86asm.Decode(rem, 64) - if err != nil { - if debugDecodeAMD64 { - fmt.Printf("Error decoding instruction at offset %d: %v\n", instructionOffset, err) - } + if err != nil { // todo return the error break } - if debugDecodeAMD64 { - fmt.Printf("0x%x: %s\n", codeAddress+uint64(instructionOffset), inst.String()) - } - instructionOffset += inst.Len regs[regIndex(x86asm.RIP)].value = codeAddress + uint64(instructionOffset) @@ -113,22 +99,12 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 if src.Index != 0 { indexValue := regs[regIndex(src.Index)].value value += indexValue * uint64(src.Scale) - if debugDecodeAMD64 { - fmt.Printf(" Adding scaled index: index=%s (0x%x) * scale=%d = 0x%x\n", - src.Index, indexValue, src.Scale, indexValue*uint64(src.Scale)) - fmt.Printf(" Updated value: 0x%x\n", value) - } } case x86asm.Reg: value = regs[regIndex(src)].value } - if debugDecodeAMD64 { - fmt.Printf(" Setting register %s: value=0x%x, loaded from=0x%x\n", - reg, value, loadedFrom) - } - regs[regIdx].value = value regs[regIdx].loadedFrom = loadedFrom } From f42edc8ea24cba6fd6cc04848142157024e11b16 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 16:15:32 +0700 Subject: [PATCH 09/26] decodeStub: return error, include hexdump of the code into the error --- interpreter/python/python.go | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/interpreter/python/python.go b/interpreter/python/python.go index c25d47b01..063ee65a4 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -660,37 +660,35 @@ func decodeStub( ef *pfelf.File, memoryBase libpf.SymbolValue, symbolName libpf.SymbolName, -) libpf.SymbolValue { +) (libpf.SymbolValue, error) { codeAddress, err := ef.LookupSymbolAddress(symbolName) if err != nil { - return libpf.SymbolValueInvalid + return libpf.SymbolValueInvalid, fmt.Errorf("lookup %s failed: %w", + symbolName, err) } code := make([]byte, 64) if _, err := ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { - return libpf.SymbolValueInvalid + return libpf.SymbolValueInvalid, fmt.Errorf("reading %s %x code failed: %w", + symbolName, codeAddress, err) } - dumpCode := func() { - log.Debugf("python stub code: %s", hex.Dump(code)) - } - value := decodeStubArgumentWrapper(code, codeAddress, memoryBase) // Sanity check the value range and alignment if value%4 != 0 { - dumpCode() - return libpf.SymbolValueInvalid + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s %x %s failed (%x)", + symbolName, codeAddress, hex.Dump(code), value) } // If base symbol (_PyRuntime) is not provided, accept any found value. if memoryBase == 0 && value != 0 { - return value + return value, nil } // Check that the found value is within reasonable distance from the given symbol. if value > memoryBase && value < memoryBase+4096 { - return value + return value, nil } - dumpCode() - return libpf.SymbolValueInvalid + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s %x %s failed (%x)", + symbolName, codeAddress, hex.Dump(code), value) } func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { @@ -745,9 +743,9 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr } // Calls first: PyThread_tss_get(autoTSSKey) - autoTLSKey = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState") + autoTLSKey, err = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState") if autoTLSKey == libpf.SymbolValueInvalid { - return nil, errors.New("unable to resolve autoTLSKey") + return nil, fmt.Errorf("unable to resolve autoTLSKey %w", err) } if version >= pythonVer(3, 7) && autoTLSKey%8 == 0 { // On Python 3.7+, the call is to PyThread_tss_get, but can get optimized to From 893a170b899a9ec0cd6d990a25ef9636d615a8b6 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 16:19:51 +0700 Subject: [PATCH 10/26] merge tests into decode_tet.go --- interpreter/python/decode_amd64_test.go | 204 ------------------------ interpreter/python/decode_test.go | 193 ++++++++++++++++++++++ 2 files changed, 193 insertions(+), 204 deletions(-) delete mode 100644 interpreter/python/decode_amd64_test.go diff --git a/interpreter/python/decode_amd64_test.go b/interpreter/python/decode_amd64_test.go deleted file mode 100644 index 54a9f10a4..000000000 --- a/interpreter/python/decode_amd64_test.go +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//nolint:lll -package python - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func BenchmarkDecodeAmd64(b *testing.B) { - for i := 0; i < b.N; i++ { - code := []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 - 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> - 0x00, // 1bbbab: - 0x74, 0x0b, // 1bbbac: je 1bbbb9 - 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> - 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 - } - rip := uint64(0x1bbba0) - val := decodeStubArgumentAMD64( - code, - rip, - 0, - ) - if val != 0x3a4c2c { - b.Fail() - } - } -} - -func TestAmd64DecodeStub(t *testing.T) { - testdata := []struct { - name string - code []byte - rip uint64 - expected uint64 - }{ - { - name: "3.10.16 gcc12 enable-optimizations disable-shared", - code: []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 - 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> - 0x00, // 1bbbab: - 0x74, 0x0b, // 1bbbac: je 1bbbb9 - 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> - 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 - }, - rip: 0x1bbba0, - expected: 0x3a4c2c, - }, - { - name: "3.10.16 gcc12 disable-optimizations disable-shared", - code: []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 172e50: endbr64 - 0x48, 0x83, 0x3d, 0x04, 0xef, 0x24, 0x00, // 172e54: cmp QWORD PTR [rip+0x24ef04],0x0 # 3c1d60 <_PyRuntime+0x240> - 0x00, // 172e5b: - 0x74, 0x12, // 172e5c: je 172e70 - 0x48, 0x8d, 0x3d, 0x03, 0xef, 0x24, 0x00, // 172e5e: lea rdi,[rip+0x24ef03] # 3c1d68 <_PyRuntime+0x248> - 0xe9, 0x86, 0x1e, 0x01, 0x00, // 172e65: jmp 184cf0 - }, - rip: 0x172e50, - expected: 0x3c1d68, - }, - { - name: "3.10.16 clang16 disable-optimizations enabled-shared", - code: []byte{ - 0x48, 0x8b, 0x05, 0x99, 0x70, 0x16, 0x00, // 1adc90: mov rax,QWORD PTR [rip+0x167099] # 314d30 <_PyRuntime@@Base-0x33668> - 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1adc97: cmp QWORD PTR [rax+0x240],0x0 - 0x00, // 1adc9e: - 0x74, 0x11, // 1adc9f: je 1adcb2 - 0xbf, 0x48, 0x02, 0x00, 0x00, // 1adca1: mov edi,0x248 - 0x48, 0x03, 0x3d, 0x83, 0x70, 0x16, 0x00, // 1adca6: add rdi,QWORD PTR [rip+0x167083] # 314d30 <_PyRuntime@@Base-0x33668> - 0xe9, 0x2e, 0x41, 0xeb, 0xff, // 1adcad: jmp 61de0 - }, - rip: 0x1adc90, - expected: 0x248, - }, - { - name: "3.12.8 gcc12 disable-optimizations enabled-shared", - code: []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 2e25d0: endbr64 - 0x48, 0x8b, 0x05, 0x25, 0x27, 0x27, 0x00, // 2e25d4: mov rax,QWORD PTR [rip+0x272725] # 554d00 <_PyRuntime@@Base-0x1004e0> - 0x53, // 2e25db: push rbx - 0x48, 0x8d, 0x98, 0x08, 0x06, 0x00, 0x00, // 2e25dc: lea rbx,[rax+0x608] - 0x48, 0x89, 0xdf, // 2e25e3: mov rdi,rbx - 0xe8, 0x95, 0x78, 0xe2, 0xff, // 2e25e6: call 109e80 - }, - rip: 0x2e25d0, - expected: 0x608, - }, - { - name: "3.10.16 clang18 enable-optimizations enabled-shared", - code: []byte{ - 0x48, 0x8b, 0x05, 0xd9, 0x80, 0x31, 0x00, // cac50: mov rax,QWORD PTR [rip+0x3180d9] # 3e2d30 <_PyRuntime@@Base-0x32c28> - 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // cac57: cmp QWORD PTR [rax+0x240],0x0 - 0x00, // cac5e: - 0x74, 0x0b, // cac5f: je cac6c - 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // cac61: mov edi,DWORD PTR [rax+0x24c] - 0xe9, 0x24, 0x55, 0xf9, 0xff, // cac67: jmp 60190 - }, - rip: 0xcac50, - expected: 0x24c, - }, - { - name: "3.10.16 clang18 enable-optimizations disable-shared", - code: []byte{ - 0x48, 0x83, 0x3d, 0x98, 0xc5, 0x36, 0x00, // 92000: cmp QWORD PTR [rip+0x36c598],0x0 # 3fe5a0 <_PyRuntime+0x240> - 0x00, // 92007: - 0x74, 0x0b, // 92008: je 92015 - 0x8b, 0x3d, 0x9c, 0xc5, 0x36, 0x00, // 9200a: mov edi,DWORD PTR [rip+0x36c59c] # 3fe5ac <_PyRuntime+0x24c> - 0xe9, 0x4b, 0x70, 0xfc, 0xff, // 92010: jmp 59060 - }, - rip: 0x92000, - expected: 0x3fe5ac, - }, - { - name: "3.10.16 clang16 disable-optimizations disable-shared", - code: []byte{ - 0x48, 0x8d, 0x05, 0x69, 0x19, 0x21, 0x00, // 129bc0: lea rax,[rip+0x211969] # 33b530 <_PyRuntime> - 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 129bc7: cmp QWORD PTR [rax+0x240],0x0 - 0x00, // 129bce: - 0x74, 0x11, // 129bcf: je 129be2 - 0xbf, 0x48, 0x02, 0x00, 0x00, // 129bd1: mov edi,0x248 - 0x48, 0x03, 0x3d, 0x53, 0x03, 0x1e, 0x00, // 129bd6: add rdi,QWORD PTR [rip+0x1e0353] # 309f30 <_DYNAMIC+0x328> - 0xe9, 0x8e, 0xec, 0x00, 0x00, // 129bdd: jmp 138870 - }, - rip: 0x129bc0, - expected: 0x248, - }, - { - name: "3.12.8 clang16 disable-optimizations disable-shared", - code: []byte{ - 0x53, // 2a20d0: push rbx - 0xbb, 0x08, 0x06, 0x00, 0x00, // 2a20d1: mov ebx,0x608 - 0x48, 0x03, 0x1d, 0x0b, 0x1e, 0x25, 0x00, // 2a20d6: add rbx,QWORD PTR [rip+0x251e0b] # 4f3ee8 <_DYNAMIC+0x368> - 0x48, 0x89, 0xdf, // 2a20dd: mov rdi,rbx - 0xe8, 0x7b, 0x41, 0x01, 0x00, // 2a20e0: call 2b6260 - }, - rip: 0x2a20d0, - expected: 0x608, - }, - { - name: "3.10.16 clang16 disable-optimizations enabled-shared", - code: []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 2079c0: endbr64 - 0x48, 0x8b, 0x05, 0x65, 0x03, 0x18, 0x00, // 2079c4: mov rax,QWORD PTR [rip+0x180365] # 387d30 <_PyRuntime@@Base-0x34950> - 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 2079cb: cmp QWORD PTR [rax+0x240],0x0 - 0x00, // 2079d2: - 0x74, 0x13, // 2079d3: je 2079e8 - 0x48, 0x8d, 0xb8, 0x48, 0x02, 0x00, 0x00, // 2079d5: lea rdi,[rax+0x248] - 0xe9, 0x8f, 0x1f, 0xe6, 0xff, // 2079dc: jmp 69970 - }, - rip: 0x2079c0, - expected: 0x248, - }, - { - name: "3.12.8 gcc12 disable-optimizations disable-shared", - code: []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 2eb960: endbr64 - 0x53, // 2eb964: push rbx - 0x48, 0x8d, 0x1d, 0xbc, 0x21, 0x37, 0x00, // 2eb965: lea rbx,[rip+0x3721bc] # 65db28 <_PyRuntime+0x608> - 0x48, 0x89, 0xdf, // 2eb96c: mov rdi,rbx - 0xe8, 0x0c, 0x7f, 0x01, 0x00, // 2eb96f: call 303880 - }, - rip: 0x2eb960, - expected: 0x65db28, - }, - { - name: "3.10.16 gcc12 enable-optimizations enabled-shared", - code: []byte{ - 0xf3, 0x0f, 0x1e, 0xfa, // 1c03c0: endbr64 - 0x48, 0x8b, 0x05, 0x5d, 0x69, 0x1b, 0x00, // 1c03c4: mov rax,QWORD PTR [rip+0x1b695d] # 376d28 <_PyRuntime@@Base-0x32758> - 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1c03cb: cmp QWORD PTR [rax+0x240],0x0 - 0x00, // 1c03d2: - 0x74, 0x0b, // 1c03d3: je 1c03e0 - 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // 1c03d5: mov edi,DWORD PTR [rax+0x24c] - 0xe9, 0x10, 0xb4, 0xe9, 0xff, // 1c03db: jmp 5b7f0 - }, - rip: 0x1c03c0, - expected: 0x24c, - }, - } - - for _, td := range testdata { - t.Run(td.name, func(t *testing.T) { - val := decodeStubArgumentAMD64( - td.code, - td.rip, - 0, // NULL pointer as mem - ) - assert.Equal(t, td.expected, val) - }) - } -} - -func FuzzDecodeAmd(f *testing.F) { - f.Fuzz(func(_ *testing.T, code []byte, rip uint64) { - decodeStubArgumentAMD64(code, rip, 0) - }) -} diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 96ae9dabb..511b2b0ab 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -42,3 +42,196 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0, 0, 0) assert.Equal(t, libpf.SymbolValue(604), val, "PyGILState_GetThisThreadState test") } + +func BenchmarkDecodeAmd64(b *testing.B) { + for i := 0; i < b.N; i++ { + code := []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 + 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> + 0x00, // 1bbbab: + 0x74, 0x0b, // 1bbbac: je 1bbbb9 + 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> + 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 + } + rip := uint64(0x1bbba0) + val := decodeStubArgumentAMD64( + code, + rip, + 0, + ) + if val != 0x3a4c2c { + b.Fail() + } + } +} + +func TestAmd64DecodeStub(t *testing.T) { + testdata := []struct { + name string + code []byte + rip uint64 + expected uint64 + }{ + { + name: "3.10.16 gcc12 enable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1bbba0: endbr64 + 0x48, 0x83, 0x3d, 0x74, 0x90, 0x1e, 0x00, // 1bbba4: cmp QWORD PTR [rip+0x1e9074],0x0 # 3a4c20 <_PyRuntime+0x240> + 0x00, // 1bbbab: + 0x74, 0x0b, // 1bbbac: je 1bbbb9 + 0x8b, 0x3d, 0x78, 0x90, 0x1e, 0x00, // 1bbbae: mov edi,DWORD PTR [rip+0x1e9078] # 3a4c2c <_PyRuntime+0x24c> + 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 + }, + rip: 0x1bbba0, + expected: 0x3a4c2c, + }, + { + name: "3.10.16 gcc12 disable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 172e50: endbr64 + 0x48, 0x83, 0x3d, 0x04, 0xef, 0x24, 0x00, // 172e54: cmp QWORD PTR [rip+0x24ef04],0x0 # 3c1d60 <_PyRuntime+0x240> + 0x00, // 172e5b: + 0x74, 0x12, // 172e5c: je 172e70 + 0x48, 0x8d, 0x3d, 0x03, 0xef, 0x24, 0x00, // 172e5e: lea rdi,[rip+0x24ef03] # 3c1d68 <_PyRuntime+0x248> + 0xe9, 0x86, 0x1e, 0x01, 0x00, // 172e65: jmp 184cf0 + }, + rip: 0x172e50, + expected: 0x3c1d68, + }, + { + name: "3.10.16 clang16 disable-optimizations enabled-shared", + code: []byte{ + 0x48, 0x8b, 0x05, 0x99, 0x70, 0x16, 0x00, // 1adc90: mov rax,QWORD PTR [rip+0x167099] # 314d30 <_PyRuntime@@Base-0x33668> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1adc97: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 1adc9e: + 0x74, 0x11, // 1adc9f: je 1adcb2 + 0xbf, 0x48, 0x02, 0x00, 0x00, // 1adca1: mov edi,0x248 + 0x48, 0x03, 0x3d, 0x83, 0x70, 0x16, 0x00, // 1adca6: add rdi,QWORD PTR [rip+0x167083] # 314d30 <_PyRuntime@@Base-0x33668> + 0xe9, 0x2e, 0x41, 0xeb, 0xff, // 1adcad: jmp 61de0 + }, + rip: 0x1adc90, + expected: 0x248, + }, + { + name: "3.12.8 gcc12 disable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2e25d0: endbr64 + 0x48, 0x8b, 0x05, 0x25, 0x27, 0x27, 0x00, // 2e25d4: mov rax,QWORD PTR [rip+0x272725] # 554d00 <_PyRuntime@@Base-0x1004e0> + 0x53, // 2e25db: push rbx + 0x48, 0x8d, 0x98, 0x08, 0x06, 0x00, 0x00, // 2e25dc: lea rbx,[rax+0x608] + 0x48, 0x89, 0xdf, // 2e25e3: mov rdi,rbx + 0xe8, 0x95, 0x78, 0xe2, 0xff, // 2e25e6: call 109e80 + }, + rip: 0x2e25d0, + expected: 0x608, + }, + { + name: "3.10.16 clang18 enable-optimizations enabled-shared", + code: []byte{ + 0x48, 0x8b, 0x05, 0xd9, 0x80, 0x31, 0x00, // cac50: mov rax,QWORD PTR [rip+0x3180d9] # 3e2d30 <_PyRuntime@@Base-0x32c28> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // cac57: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // cac5e: + 0x74, 0x0b, // cac5f: je cac6c + 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // cac61: mov edi,DWORD PTR [rax+0x24c] + 0xe9, 0x24, 0x55, 0xf9, 0xff, // cac67: jmp 60190 + }, + rip: 0xcac50, + expected: 0x24c, + }, + { + name: "3.10.16 clang18 enable-optimizations disable-shared", + code: []byte{ + 0x48, 0x83, 0x3d, 0x98, 0xc5, 0x36, 0x00, // 92000: cmp QWORD PTR [rip+0x36c598],0x0 # 3fe5a0 <_PyRuntime+0x240> + 0x00, // 92007: + 0x74, 0x0b, // 92008: je 92015 + 0x8b, 0x3d, 0x9c, 0xc5, 0x36, 0x00, // 9200a: mov edi,DWORD PTR [rip+0x36c59c] # 3fe5ac <_PyRuntime+0x24c> + 0xe9, 0x4b, 0x70, 0xfc, 0xff, // 92010: jmp 59060 + }, + rip: 0x92000, + expected: 0x3fe5ac, + }, + { + name: "3.10.16 clang16 disable-optimizations disable-shared", + code: []byte{ + 0x48, 0x8d, 0x05, 0x69, 0x19, 0x21, 0x00, // 129bc0: lea rax,[rip+0x211969] # 33b530 <_PyRuntime> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 129bc7: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 129bce: + 0x74, 0x11, // 129bcf: je 129be2 + 0xbf, 0x48, 0x02, 0x00, 0x00, // 129bd1: mov edi,0x248 + 0x48, 0x03, 0x3d, 0x53, 0x03, 0x1e, 0x00, // 129bd6: add rdi,QWORD PTR [rip+0x1e0353] # 309f30 <_DYNAMIC+0x328> + 0xe9, 0x8e, 0xec, 0x00, 0x00, // 129bdd: jmp 138870 + }, + rip: 0x129bc0, + expected: 0x248, + }, + { + name: "3.12.8 clang16 disable-optimizations disable-shared", + code: []byte{ + 0x53, // 2a20d0: push rbx + 0xbb, 0x08, 0x06, 0x00, 0x00, // 2a20d1: mov ebx,0x608 + 0x48, 0x03, 0x1d, 0x0b, 0x1e, 0x25, 0x00, // 2a20d6: add rbx,QWORD PTR [rip+0x251e0b] # 4f3ee8 <_DYNAMIC+0x368> + 0x48, 0x89, 0xdf, // 2a20dd: mov rdi,rbx + 0xe8, 0x7b, 0x41, 0x01, 0x00, // 2a20e0: call 2b6260 + }, + rip: 0x2a20d0, + expected: 0x608, + }, + { + name: "3.10.16 clang16 disable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2079c0: endbr64 + 0x48, 0x8b, 0x05, 0x65, 0x03, 0x18, 0x00, // 2079c4: mov rax,QWORD PTR [rip+0x180365] # 387d30 <_PyRuntime@@Base-0x34950> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 2079cb: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 2079d2: + 0x74, 0x13, // 2079d3: je 2079e8 + 0x48, 0x8d, 0xb8, 0x48, 0x02, 0x00, 0x00, // 2079d5: lea rdi,[rax+0x248] + 0xe9, 0x8f, 0x1f, 0xe6, 0xff, // 2079dc: jmp 69970 + }, + rip: 0x2079c0, + expected: 0x248, + }, + { + name: "3.12.8 gcc12 disable-optimizations disable-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 2eb960: endbr64 + 0x53, // 2eb964: push rbx + 0x48, 0x8d, 0x1d, 0xbc, 0x21, 0x37, 0x00, // 2eb965: lea rbx,[rip+0x3721bc] # 65db28 <_PyRuntime+0x608> + 0x48, 0x89, 0xdf, // 2eb96c: mov rdi,rbx + 0xe8, 0x0c, 0x7f, 0x01, 0x00, // 2eb96f: call 303880 + }, + rip: 0x2eb960, + expected: 0x65db28, + }, + { + name: "3.10.16 gcc12 enable-optimizations enabled-shared", + code: []byte{ + 0xf3, 0x0f, 0x1e, 0xfa, // 1c03c0: endbr64 + 0x48, 0x8b, 0x05, 0x5d, 0x69, 0x1b, 0x00, // 1c03c4: mov rax,QWORD PTR [rip+0x1b695d] # 376d28 <_PyRuntime@@Base-0x32758> + 0x48, 0x83, 0xb8, 0x40, 0x02, 0x00, 0x00, // 1c03cb: cmp QWORD PTR [rax+0x240],0x0 + 0x00, // 1c03d2: + 0x74, 0x0b, // 1c03d3: je 1c03e0 + 0x8b, 0xb8, 0x4c, 0x02, 0x00, 0x00, // 1c03d5: mov edi,DWORD PTR [rax+0x24c] + 0xe9, 0x10, 0xb4, 0xe9, 0xff, // 1c03db: jmp 5b7f0 + }, + rip: 0x1c03c0, + expected: 0x24c, + }, + } + + for _, td := range testdata { + t.Run(td.name, func(t *testing.T) { + val := decodeStubArgumentAMD64( + td.code, + td.rip, + 0, // NULL pointer as mem + ) + assert.Equal(t, td.expected, val) + }) + } +} + +func FuzzDecodeAmd(f *testing.F) { + f.Fuzz(func(_ *testing.T, code []byte, rip uint64) { + decodeStubArgumentAMD64(code, rip, 0) + }) +} From 2bd6bdd7c01fa43560107262ee2ffb5a43f979aa Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 16:47:17 +0700 Subject: [PATCH 11/26] extract regs state into a new amd package --- asm/amd/regs_state.go | 50 +++++++++++++++++++++ interpreter/python/decode_amd64.go | 70 +++++++----------------------- 2 files changed, 66 insertions(+), 54 deletions(-) create mode 100644 asm/amd/regs_state.go diff --git a/asm/amd/regs_state.go b/asm/amd/regs_state.go new file mode 100644 index 000000000..22f404fdc --- /dev/null +++ b/asm/amd/regs_state.go @@ -0,0 +1,50 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" + +import "golang.org/x/arch/x86/x86asm" + +// regIndex returns index into RegsState.regs +func regIndex(reg x86asm.Reg) int { + switch reg { + case x86asm.RAX, x86asm.EAX: + return 1 + case x86asm.RBX, x86asm.EBX: + return 2 + case x86asm.RCX, x86asm.ECX: + return 3 + case x86asm.RDX, x86asm.EDX: + return 4 + case x86asm.RDI, x86asm.EDI: + return 5 + case x86asm.RSI, x86asm.ESI: + return 6 + case x86asm.RBP, x86asm.EBP: + return 7 + case x86asm.RSP, x86asm.ESP: + return 8 + case x86asm.RIP: + return 9 + default: + return 0 + } +} + +type RegsState struct { + regs [10]regState +} + +func (r *RegsState) Set(reg x86asm.Reg, value, loadedFrom uint64) { + r.regs[regIndex(reg)].Value = value + r.regs[regIndex(reg)].LoadedFrom = loadedFrom +} + +func (r *RegsState) Get(reg x86asm.Reg) (value, loadedFrom uint64) { + return r.regs[regIndex(reg)].Value, r.regs[regIndex(reg)].LoadedFrom +} + +type regState struct { + LoadedFrom uint64 + Value uint64 +} diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index 91a03e666..486524c72 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -4,45 +4,16 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( + "go.opentelemetry.io/ebpf-profiler/asm/amd" "go.opentelemetry.io/ebpf-profiler/libpf" "golang.org/x/arch/x86/x86asm" ) -func regIndex(reg x86asm.Reg) int { - switch reg { - case x86asm.RAX, x86asm.EAX: - return 1 - case x86asm.RBX, x86asm.EBX: - return 2 - case x86asm.RCX, x86asm.ECX: - return 3 - case x86asm.RDX, x86asm.EDX: - return 4 - case x86asm.RDI, x86asm.EDI: - return 5 - case x86asm.RSI, x86asm.ESI: - return 6 - case x86asm.RBP, x86asm.EBP: - return 7 - case x86asm.RSP, x86asm.ESP: - return 8 - case x86asm.RIP: - return 9 - default: - return 0 - } -} - -type regState struct { - loadedFrom uint64 - value uint64 -} - func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 { targetRegister := x86asm.RDI instructionOffset := 0 - regs := [32]regState{} + regs := amd.RegsState{} for instructionOffset < len(code) { rem := code[instructionOffset:] @@ -61,19 +32,18 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 } instructionOffset += inst.Len - regs[regIndex(x86asm.RIP)].value = codeAddress + uint64(instructionOffset) + regs.Set(x86asm.RIP, codeAddress+uint64(instructionOffset), 0) if inst.Op == x86asm.CALL || inst.Op == x86asm.JMP { - targetRegIdx := regIndex(targetRegister) - if regs[targetRegIdx].loadedFrom != 0 { - return regs[targetRegIdx].loadedFrom + value, loadedFrom := regs.Get(targetRegister) + if loadedFrom != 0 { + return loadedFrom } - return regs[targetRegIdx].value + return value } if (inst.Op == x86asm.LEA || inst.Op == x86asm.MOV) && inst.Args[0] != nil { if reg, ok := inst.Args[0].(x86asm.Reg); ok { - regIdx := regIndex(reg) var value uint64 var loadedFrom uint64 @@ -81,14 +51,9 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 case x86asm.Imm: value = uint64(src) case x86asm.Mem: - baseReg := src.Base - baseAddr := regs[regIndex(baseReg)].value + baseAddr, _ := regs.Get(src.Base) displacement := uint64(src.Disp) - if baseReg == x86asm.RIP { - baseAddr = codeAddress + uint64(instructionOffset) - } - if inst.Op == x86asm.MOV { value = memoryBase loadedFrom = baseAddr + displacement @@ -96,28 +61,25 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 value = baseAddr + displacement } - if src.Index != 0 { - indexValue := regs[regIndex(src.Index)].value + if src.Index != 0 { // todo this is dead code according to test coverage, need a test or remove this + indexValue, _ := regs.Get(src.Index) value += indexValue * uint64(src.Scale) } case x86asm.Reg: - value = regs[regIndex(src)].value + value, _ = regs.Get(src) } - regs[regIdx].value = value - regs[regIdx].loadedFrom = loadedFrom + regs.Set(reg, value, loadedFrom) } } if inst.Op == x86asm.ADD && inst.Args[0] != nil && inst.Args[1] != nil { - if reg, ok := inst.Args[0].(x86asm.Reg); ok { - if _, ok := inst.Args[1].(x86asm.Mem); ok { - regIdx := regIndex(reg) - oldValue := regs[regIdx].value + if reg, ok0 := inst.Args[0].(x86asm.Reg); ok0 { + if _, ok1 := inst.Args[1].(x86asm.Mem); ok1 { + oldValue, _ := regs.Get(reg) value := oldValue + memoryBase - regs[regIdx].value = value - regs[regIdx].loadedFrom = 0 + regs.Set(reg, value, 0) } } } From d0c1b8c97edb7551ac1d4376f5fe34ef3bc50bbd Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 16:55:57 +0700 Subject: [PATCH 12/26] extract endbr64 to amd package --- asm/amd/insn.go | 12 ++++++++++++ asm/amd/insn_test.go | 15 +++++++++++++++ interpreter/python/decode_amd64.go | 12 ++++-------- 3 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 asm/amd/insn.go create mode 100644 asm/amd/insn_test.go diff --git a/asm/amd/insn.go b/asm/amd/insn.go new file mode 100644 index 000000000..21da8ef63 --- /dev/null +++ b/asm/amd/insn.go @@ -0,0 +1,12 @@ +package amd + +func IsEndbr64(code []byte) (bool, int) { + if len(code) >= 4 && + code[0] == 0xf3 && + code[1] == 0x0f && + code[2] == 0x1e && + code[3] == 0xfa { + return true, 4 + } + return false, 0 +} diff --git a/asm/amd/insn_test.go b/asm/amd/insn_test.go new file mode 100644 index 000000000..f36afd745 --- /dev/null +++ b/asm/amd/insn_test.go @@ -0,0 +1,15 @@ +package amd + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestEndBr64(t *testing.T) { + res, n := IsEndbr64([]byte{0xF3, 0x0F, 0x1E, 0xFA}) + assert.True(t, res) + assert.Equal(t, 4, n) + + res, _ = IsEndbr64([]byte{}) + assert.False(t, res) +} diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go index 486524c72..b38d2c1ca 100644 --- a/interpreter/python/decode_amd64.go +++ b/interpreter/python/decode_amd64.go @@ -17,18 +17,14 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 for instructionOffset < len(code) { rem := code[instructionOffset:] - if len(rem) >= 4 && - code[instructionOffset] == 0xf3 && - code[instructionOffset+1] == 0x0f && - code[instructionOffset+2] == 0x1e && - code[instructionOffset+3] == 0xfa { - instructionOffset += 4 + if endbr64, insnLen := amd.IsEndbr64(rem); endbr64 { + instructionOffset += insnLen continue } inst, err := x86asm.Decode(rem, 64) if err != nil { // todo return the error - break + break // todo cover this } instructionOffset += inst.Len @@ -84,7 +80,7 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 } } } - return 0 + return 0 // todo cover this } func decodeStubArgumentWrapper( From 9406305ff9776a1b51ebe3386c3beb6cc079206e Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 16:57:00 +0700 Subject: [PATCH 13/26] fmt --- asm/amd/insn.go | 5 ++++- asm/amd/insn_test.go | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/asm/amd/insn.go b/asm/amd/insn.go index 21da8ef63..e54533043 100644 --- a/asm/amd/insn.go +++ b/asm/amd/insn.go @@ -1,4 +1,7 @@ -package amd +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" func IsEndbr64(code []byte) (bool, int) { if len(code) >= 4 && diff --git a/asm/amd/insn_test.go b/asm/amd/insn_test.go index f36afd745..43994dfa9 100644 --- a/asm/amd/insn_test.go +++ b/asm/amd/insn_test.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package amd import ( From 507bfd5cbf93a329570d53602e72f5bf98732ae5 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 17:10:18 +0700 Subject: [PATCH 14/26] get rid of platform specific decode files --- interpreter/python/decode.go | 97 ++++++++++++++++++++++++++++++ interpreter/python/decode_amd64.go | 97 ------------------------------ interpreter/python/decode_arm64.go | 15 ----- interpreter/python/decode_test.go | 6 +- interpreter/python/python.go | 12 ++-- 5 files changed, 106 insertions(+), 121 deletions(-) delete mode 100644 interpreter/python/decode_amd64.go delete mode 100644 interpreter/python/decode_arm64.go diff --git a/interpreter/python/decode.go b/interpreter/python/decode.go index 9412aa594..a2cbd8711 100644 --- a/interpreter/python/decode.go +++ b/interpreter/python/decode.go @@ -4,8 +4,13 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( + "errors" + "fmt" ah "go.opentelemetry.io/ebpf-profiler/armhelpers" + "go.opentelemetry.io/ebpf-profiler/asm/amd" aa "golang.org/x/arch/arm64/arm64asm" + "golang.org/x/arch/x86/x86asm" + "runtime" "go.opentelemetry.io/ebpf-profiler/libpf" ) @@ -100,3 +105,95 @@ func decodeStubArgumentWrapperARM64(code []byte, argNumber uint8, _, return libpf.SymbolValueInvalid } + +func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) (libpf.SymbolValue, error) { + targetRegister := x86asm.RDI + + instructionOffset := 0 + regs := amd.RegsState{} + + for instructionOffset < len(code) { + rem := code[instructionOffset:] + if endbr64, insnLen := amd.IsEndbr64(rem); endbr64 { + instructionOffset += insnLen + continue + } + + inst, err := x86asm.Decode(rem, 64) + if err != nil { // todo return the error + return 0, fmt.Errorf("insn @ 0x%x failed: %w", + instructionOffset, err) + } + + instructionOffset += inst.Len + regs.Set(x86asm.RIP, codeAddress+uint64(instructionOffset), 0) + + if inst.Op == x86asm.CALL || inst.Op == x86asm.JMP { + value, loadedFrom := regs.Get(targetRegister) + if loadedFrom != 0 { + return libpf.SymbolValue(loadedFrom), nil + } + return libpf.SymbolValue(value), nil + } + + if (inst.Op == x86asm.LEA || inst.Op == x86asm.MOV) && inst.Args[0] != nil { + if reg, ok := inst.Args[0].(x86asm.Reg); ok { + var value uint64 + var loadedFrom uint64 + + switch src := inst.Args[1].(type) { + case x86asm.Imm: + value = uint64(src) + case x86asm.Mem: + baseAddr, _ := regs.Get(src.Base) + displacement := uint64(src.Disp) + + if inst.Op == x86asm.MOV { + value = memoryBase + loadedFrom = baseAddr + displacement + } else if inst.Op == x86asm.LEA { + value = baseAddr + displacement + } + + if src.Index != 0 { // todo this is dead code according to test coverage, need a test or remove this + indexValue, _ := regs.Get(src.Index) + value += indexValue * uint64(src.Scale) + } + + case x86asm.Reg: + value, _ = regs.Get(src) + } + + regs.Set(reg, value, loadedFrom) + } + } + + if inst.Op == x86asm.ADD && inst.Args[0] != nil && inst.Args[1] != nil { + if reg, ok0 := inst.Args[0].(x86asm.Reg); ok0 { + if _, ok1 := inst.Args[1].(x86asm.Mem); ok1 { + oldValue, _ := regs.Get(reg) + value := oldValue + memoryBase + regs.Set(reg, value, 0) + } + } + } + } + return 0, errors.New("no call/jump instructions found") // todo cover this +} + +func decodeStubArgumentWrapper( + code []byte, + codeAddress libpf.SymbolValue, + memoryBase libpf.SymbolValue, +) (libpf.SymbolValue, error) { + if len(code) == 0 { + return libpf.SymbolValueInvalid, errors.New("empty code") + } + if runtime.GOARCH == "arm64" { + return decodeStubArgumentWrapperARM64(code, 0, codeAddress, memoryBase), nil + } + if runtime.GOARCH == "amd64" { + return decodeStubArgumentAMD64(code, uint64(codeAddress), uint64(memoryBase)) + } + return libpf.SymbolValueInvalid, fmt.Errorf("unsupported arch %s", runtime.GOARCH) +} diff --git a/interpreter/python/decode_amd64.go b/interpreter/python/decode_amd64.go deleted file mode 100644 index b38d2c1ca..000000000 --- a/interpreter/python/decode_amd64.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" - -import ( - "go.opentelemetry.io/ebpf-profiler/asm/amd" - "go.opentelemetry.io/ebpf-profiler/libpf" - "golang.org/x/arch/x86/x86asm" -) - -func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) uint64 { - targetRegister := x86asm.RDI - - instructionOffset := 0 - regs := amd.RegsState{} - - for instructionOffset < len(code) { - rem := code[instructionOffset:] - if endbr64, insnLen := amd.IsEndbr64(rem); endbr64 { - instructionOffset += insnLen - continue - } - - inst, err := x86asm.Decode(rem, 64) - if err != nil { // todo return the error - break // todo cover this - } - - instructionOffset += inst.Len - regs.Set(x86asm.RIP, codeAddress+uint64(instructionOffset), 0) - - if inst.Op == x86asm.CALL || inst.Op == x86asm.JMP { - value, loadedFrom := regs.Get(targetRegister) - if loadedFrom != 0 { - return loadedFrom - } - return value - } - - if (inst.Op == x86asm.LEA || inst.Op == x86asm.MOV) && inst.Args[0] != nil { - if reg, ok := inst.Args[0].(x86asm.Reg); ok { - var value uint64 - var loadedFrom uint64 - - switch src := inst.Args[1].(type) { - case x86asm.Imm: - value = uint64(src) - case x86asm.Mem: - baseAddr, _ := regs.Get(src.Base) - displacement := uint64(src.Disp) - - if inst.Op == x86asm.MOV { - value = memoryBase - loadedFrom = baseAddr + displacement - } else if inst.Op == x86asm.LEA { - value = baseAddr + displacement - } - - if src.Index != 0 { // todo this is dead code according to test coverage, need a test or remove this - indexValue, _ := regs.Get(src.Index) - value += indexValue * uint64(src.Scale) - } - - case x86asm.Reg: - value, _ = regs.Get(src) - } - - regs.Set(reg, value, loadedFrom) - } - } - - if inst.Op == x86asm.ADD && inst.Args[0] != nil && inst.Args[1] != nil { - if reg, ok0 := inst.Args[0].(x86asm.Reg); ok0 { - if _, ok1 := inst.Args[1].(x86asm.Mem); ok1 { - oldValue, _ := regs.Get(reg) - value := oldValue + memoryBase - regs.Set(reg, value, 0) - } - } - } - } - return 0 // todo cover this -} - -func decodeStubArgumentWrapper( - code []byte, - codeAddress libpf.SymbolValue, - memoryBase libpf.SymbolValue, -) libpf.SymbolValue { - if len(code) == 0 { - return 0 - } - return libpf.SymbolValue( - decodeStubArgumentAMD64(code, uint64(codeAddress), uint64(memoryBase)), - ) -} diff --git a/interpreter/python/decode_arm64.go b/interpreter/python/decode_arm64.go deleted file mode 100644 index 749312f66..000000000 --- a/interpreter/python/decode_arm64.go +++ /dev/null @@ -1,15 +0,0 @@ -//go:build arm64 - -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" - -import ( - "go.opentelemetry.io/ebpf-profiler/libpf" -) - -func decodeStubArgumentWrapper(code []byte, symbolValue, - addrBase libpf.SymbolValue) libpf.SymbolValue { - return decodeStubArgumentWrapperARM64(code, 0, symbolValue, addrBase) -} diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 511b2b0ab..21867d351 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -54,7 +54,7 @@ func BenchmarkDecodeAmd64(b *testing.B) { 0xe9, 0xe7, 0xea, 0xe9, 0xff, // 1bbbb4: jmp 5a6a0 } rip := uint64(0x1bbba0) - val := decodeStubArgumentAMD64( + val, _ := decodeStubArgumentAMD64( code, rip, 0, @@ -220,12 +220,12 @@ func TestAmd64DecodeStub(t *testing.T) { for _, td := range testdata { t.Run(td.name, func(t *testing.T) { - val := decodeStubArgumentAMD64( + val, _ := decodeStubArgumentAMD64( td.code, td.rip, 0, // NULL pointer as mem ) - assert.Equal(t, td.expected, val) + assert.Equal(t, td.expected, uint64(val)) }) } } diff --git a/interpreter/python/python.go b/interpreter/python/python.go index 063ee65a4..bfa04a9cb 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -669,15 +669,15 @@ func decodeStub( code := make([]byte, 64) if _, err := ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { - return libpf.SymbolValueInvalid, fmt.Errorf("reading %s %x code failed: %w", + return libpf.SymbolValueInvalid, fmt.Errorf("reading %s 0x%x code failed: %w", symbolName, codeAddress, err) } - value := decodeStubArgumentWrapper(code, codeAddress, memoryBase) + value, err := decodeStubArgumentWrapper(code, codeAddress, memoryBase) // Sanity check the value range and alignment - if value%4 != 0 { - return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s %x %s failed (%x)", - symbolName, codeAddress, hex.Dump(code), value) + if err != nil || value%4 != 0 { + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s 0x%x %s failed (0x%x, %w)", + symbolName, codeAddress, hex.Dump(code), value, err) } // If base symbol (_PyRuntime) is not provided, accept any found value. if memoryBase == 0 && value != 0 { @@ -687,7 +687,7 @@ func decodeStub( if value > memoryBase && value < memoryBase+4096 { return value, nil } - return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s %x %s failed (%x)", + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s 0x%x %s failed (0x%x)", symbolName, codeAddress, hex.Dump(code), value) } From f568cda505670aed44706b9a50167b73d01bf5f6 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 17:27:31 +0700 Subject: [PATCH 15/26] lint --- asm/amd/insn.go | 2 +- asm/amd/insn_test.go | 3 +- interpreter/python/decode.go | 21 +++++++------ interpreter/python/decode_test.go | 50 +++++++++++++++++++++++++------ interpreter/python/python.go | 2 +- 5 files changed, 55 insertions(+), 23 deletions(-) diff --git a/asm/amd/insn.go b/asm/amd/insn.go index e54533043..11fef77a8 100644 --- a/asm/amd/insn.go +++ b/asm/amd/insn.go @@ -3,7 +3,7 @@ package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" -func IsEndbr64(code []byte) (bool, int) { +func IsEndbr64(code []byte) (isEndbr bool, size int) { if len(code) >= 4 && code[0] == 0xf3 && code[1] == 0x0f && diff --git a/asm/amd/insn_test.go b/asm/amd/insn_test.go index 43994dfa9..f7cacd00a 100644 --- a/asm/amd/insn_test.go +++ b/asm/amd/insn_test.go @@ -4,8 +4,9 @@ package amd import ( - "github.com/stretchr/testify/assert" "testing" + + "github.com/stretchr/testify/assert" ) func TestEndBr64(t *testing.T) { diff --git a/interpreter/python/decode.go b/interpreter/python/decode.go index a2cbd8711..d76736324 100644 --- a/interpreter/python/decode.go +++ b/interpreter/python/decode.go @@ -6,19 +6,20 @@ package python // import "go.opentelemetry.io/ebpf-profiler/interpreter/python" import ( "errors" "fmt" + "runtime" + ah "go.opentelemetry.io/ebpf-profiler/armhelpers" "go.opentelemetry.io/ebpf-profiler/asm/amd" + "go.opentelemetry.io/ebpf-profiler/libpf" aa "golang.org/x/arch/arm64/arm64asm" "golang.org/x/arch/x86/x86asm" - "runtime" - - "go.opentelemetry.io/ebpf-profiler/libpf" ) // decodeStubArgumentWrapperARM64 disassembles arm64 code and decodes the assumed value // of requested argument. -func decodeStubArgumentWrapperARM64(code []byte, argNumber uint8, _, +func decodeStubArgumentWrapperARM64(code []byte, addrBase libpf.SymbolValue) libpf.SymbolValue { + const argNumber uint8 = 0 // The concept is to track the latest load offset for all X0..X30 registers. // These registers are used as the function arguments. Once the first branch // instruction (function call/tail jump) is found, the state of the requested @@ -106,7 +107,8 @@ func decodeStubArgumentWrapperARM64(code []byte, argNumber uint8, _, return libpf.SymbolValueInvalid } -func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) (libpf.SymbolValue, error) { +func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( + libpf.SymbolValue, error) { targetRegister := x86asm.RDI instructionOffset := 0 @@ -121,7 +123,7 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) (libpf inst, err := x86asm.Decode(rem, 64) if err != nil { // todo return the error - return 0, fmt.Errorf("insn @ 0x%x failed: %w", + return 0, fmt.Errorf("failed to decode instruction at 0x%x : %w", instructionOffset, err) } @@ -155,7 +157,7 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) (libpf value = baseAddr + displacement } - if src.Index != 0 { // todo this is dead code according to test coverage, need a test or remove this + if src.Index != 0 { // todo cover this indexValue, _ := regs.Get(src.Index) value += indexValue * uint64(src.Scale) } @@ -186,11 +188,8 @@ func decodeStubArgumentWrapper( codeAddress libpf.SymbolValue, memoryBase libpf.SymbolValue, ) (libpf.SymbolValue, error) { - if len(code) == 0 { - return libpf.SymbolValueInvalid, errors.New("empty code") - } if runtime.GOARCH == "arm64" { - return decodeStubArgumentWrapperARM64(code, 0, codeAddress, memoryBase), nil + return decodeStubArgumentWrapperARM64(code, memoryBase), nil } if runtime.GOARCH == "amd64" { return decodeStubArgumentAMD64(code, uint64(codeAddress), uint64(memoryBase)) diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 21867d351..e5a65e743 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -1,12 +1,14 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +//nolint:lll package python import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.opentelemetry.io/ebpf-profiler/libpf" ) @@ -16,7 +18,7 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0x40, 0x0a, 0x00, 0x90, 0x01, 0xd4, 0x43, 0xf9, 0x22, 0x60, 0x17, 0x91, 0x40, 0x00, 0x40, 0xf9, 0xa2, 0xff, 0xff, 0x17}, - 0, 0, 0) + 0) assert.Equal(t, libpf.SymbolValue(1496), val, "PyEval_ReleaseLock stub test") val = decodeStubArgumentWrapperARM64( @@ -24,7 +26,7 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0x80, 0x12, 0x00, 0xb0, 0x02, 0xd4, 0x43, 0xf9, 0x41, 0xf4, 0x42, 0xf9, 0x61, 0x00, 0x00, 0xb4, 0x40, 0xc0, 0x17, 0x91, 0xad, 0xe4, 0xfe, 0x17}, - 0, 0, 0) + 0) assert.Equal(t, libpf.SymbolValue(1520), val, "PyGILState_GetThisThreadState test") // Python 3.10.12 on ARM64 Nix @@ -39,7 +41,7 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0x00, 0x00, 0x80, 0xd2, // mov x0, #0x0 0xc0, 0x03, 0x5f, 0xd6, // ret }, - 0, 0, 0) + 0) assert.Equal(t, libpf.SymbolValue(604), val, "PyGILState_GetThisThreadState test") } @@ -67,10 +69,11 @@ func BenchmarkDecodeAmd64(b *testing.B) { func TestAmd64DecodeStub(t *testing.T) { testdata := []struct { - name string - code []byte - rip uint64 - expected uint64 + name string + code []byte + rip uint64 + expected uint64 + expectedError string }{ { name: "3.10.16 gcc12 enable-optimizations disable-shared", @@ -216,15 +219,41 @@ func TestAmd64DecodeStub(t *testing.T) { rip: 0x1c03c0, expected: 0x24c, }, + { + name: "empty code", + code: nil, + expectedError: "no call/jump instructions found", + }, + { + name: "no call/jump instructions found", + code: []byte{ + 0x48, 0xC7, 0xC7, 0xEF, 0xEF, 0xEF, 0x00, // mov rdi, 0xefefef + }, + expectedError: "no call/jump instructions found", + }, + { + name: "bad instruction", + code: []byte{ + 0x48, 0xC7, 0xC7, 0xEF, 0xEF, 0xEF, 0x00, // mov rdi, 0xefefef + 0xea, // :shrug: + }, + expectedError: "failed to decode instruction at 0x7", + }, } for _, td := range testdata { t.Run(td.name, func(t *testing.T) { - val, _ := decodeStubArgumentAMD64( + val, err := decodeStubArgumentAMD64( td.code, td.rip, 0, // NULL pointer as mem ) + if td.expectedError != "" { + require.Error(t, err) + require.Contains(t, err.Error(), td.expectedError) + } else { + require.NoError(t, err) + } assert.Equal(t, td.expected, uint64(val)) }) } @@ -232,6 +261,9 @@ func TestAmd64DecodeStub(t *testing.T) { func FuzzDecodeAmd(f *testing.F) { f.Fuzz(func(_ *testing.T, code []byte, rip uint64) { - decodeStubArgumentAMD64(code, rip, 0) + _, err := decodeStubArgumentAMD64(code, rip, 0) + if err != nil { + return + } }) } diff --git a/interpreter/python/python.go b/interpreter/python/python.go index bfa04a9cb..fc4efc0b4 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -668,7 +668,7 @@ func decodeStub( } code := make([]byte, 64) - if _, err := ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { + if _, err = ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { return libpf.SymbolValueInvalid, fmt.Errorf("reading %s 0x%x code failed: %w", symbolName, codeAddress, err) } From 576fdf4d46904f300549eb40f60c9fef46e24573 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 18:01:19 +0700 Subject: [PATCH 16/26] handle index-scale insns --- interpreter/python/decode.go | 19 +++++++++++-------- interpreter/python/decode_test.go | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/interpreter/python/decode.go b/interpreter/python/decode.go index d76736324..3c524fb7e 100644 --- a/interpreter/python/decode.go +++ b/interpreter/python/decode.go @@ -122,11 +122,11 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( } inst, err := x86asm.Decode(rem, 64) - if err != nil { // todo return the error + if err != nil { return 0, fmt.Errorf("failed to decode instruction at 0x%x : %w", instructionOffset, err) } - + fmt.Printf("insn %s\n", inst.String()) instructionOffset += inst.Len regs.Set(x86asm.RIP, codeAddress+uint64(instructionOffset), 0) @@ -153,13 +153,16 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( if inst.Op == x86asm.MOV { value = memoryBase loadedFrom = baseAddr + displacement + if src.Index != 0 { + indexValue, _ := regs.Get(src.Index) + loadedFrom += indexValue * uint64(src.Scale) + } } else if inst.Op == x86asm.LEA { value = baseAddr + displacement - } - - if src.Index != 0 { // todo cover this - indexValue, _ := regs.Get(src.Index) - value += indexValue * uint64(src.Scale) + if src.Index != 0 { + indexValue, _ := regs.Get(src.Index) + value += indexValue * uint64(src.Scale) + } } case x86asm.Reg: @@ -180,7 +183,7 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( } } } - return 0, errors.New("no call/jump instructions found") // todo cover this + return 0, errors.New("no call/jump instructions found") } func decodeStubArgumentWrapper( diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index e5a65e743..adc162fa2 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -239,6 +239,26 @@ func TestAmd64DecodeStub(t *testing.T) { }, expectedError: "failed to decode instruction at 0x7", }, + { + name: "synthetic mov scale index", + code: []byte{ + 0x48, 0xC7, 0xC0, 0xCA, 0xCA, 0x00, 0x00, // mov rax, 0xcaca + 0xBB, 0x00, 0x00, 0x00, 0x5E, // mov ebx, 0x5e000000 + 0x67, 0x48, 0x8B, 0x7C, 0x43, 0x05, // mov rdi, qword ptr [ebx + eax*2 + 5] + 0xEB, 0x00, // jmp 0x14 + }, + expected: 0xCACA*2 + 0x5E000000 + 5, + }, + { + name: "synthetic lea scale index", + code: []byte{ + 0x48, 0xC7, 0xC0, 0xFE, 0xCA, 0x00, 0x00, // mov rax, 0xcafe + 0xBB, 0x00, 0x00, 0x00, 0x6E, // mov ebx, 0x6e000000 + 0x67, 0x48, 0x8D, 0x7C, 0x43, 0x07, // lea rdi, [ebx + eax*2 + 7] + 0xE8, 0xFB, 0xFF, 0xFF, 0xFF, // call 0x12 + }, + expected: 0xCAFE*2 + 0x6E000000 + 7, + }, } for _, td := range testdata { From 06daab695a43a2e2b59b06415aaf7f3e0de029ec Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 18:03:34 +0700 Subject: [PATCH 17/26] rm printf --- interpreter/python/decode.go | 1 - 1 file changed, 1 deletion(-) diff --git a/interpreter/python/decode.go b/interpreter/python/decode.go index 3c524fb7e..8684dabee 100644 --- a/interpreter/python/decode.go +++ b/interpreter/python/decode.go @@ -126,7 +126,6 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( return 0, fmt.Errorf("failed to decode instruction at 0x%x : %w", instructionOffset, err) } - fmt.Printf("insn %s\n", inst.String()) instructionOffset += inst.Len regs.Set(x86asm.RIP, codeAddress+uint64(instructionOffset), 0) From 6def09d8d72c2eac99fcb8380eddd6ca5b97bd9e Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 18:24:41 +0700 Subject: [PATCH 18/26] add coredump tests --- ..._sdk_502.0.0_slim_3.11.9_clang_18.1.8.json | 76 +++++++++++++++++++ .../amd64/pyenv_3.12.9_gcc_13.3.0.json | 66 ++++++++++++++++ .../amd64/pyenv_3.13.2_clang_21.0.0.json | 62 +++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json create mode 100644 tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json create mode 100644 tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json diff --git a/tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json b/tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json new file mode 100644 index 000000000..1cf798807 --- /dev/null +++ b/tools/coredump/testdata/amd64/gcloud_sdk_502.0.0_slim_3.11.9_clang_18.1.8.json @@ -0,0 +1,76 @@ +{ + "coredump-ref": "1c875546fb8c3b22bb7ed7b86c3a7f4da9a68a3ec93bb4732f0bff6ee793d23a", + "threads": [ + { + "lwp": 11, + "frames": [ + "fib+1 in /mnt/trash/fib.py:2", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "fib+3 in /mnt/trash/fib.py:4", + "+6 in /mnt/trash/fib.py:7", + "libpython3.11.so.1.0+0x32b580", + "libpython3.11.so.1.0+0x3259e3", + "libpython3.11.so.1.0+0x37a8f9", + "libpython3.11.so.1.0+0x378e32", + "libpython3.11.so.1.0+0x378972", + "libpython3.11.so.1.0+0x397899", + "libpython3.11.so.1.0+0x397102", + "libpython3.11.so.1.0+0x39739e", + "libpython3.11.so.1.0+0x3973fb", + "libc.so.6+0x27249", + "libc.so.6+0x27304", + "python3+0x1088" + ] + } + ], + "modules": [ + { + "ref": "11ce00a6490d5e4ef941e1f51faaddf40c088a1376f028cbc001985b779397ce", + "local-path": "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/lib/libpython3.11.so.1.0" + }, + { + "ref": "df8e371a04bcf4ea2d455277ecc9cd47fc9b4c58ed27a7f4e6c8343122a4d270", + "local-path": "/usr/lib/x86_64-linux-gnu/libpthread.so.0" + }, + { + "ref": "067650d84b8f554cedf0b9ff26137bdd10cd03d4bbcdba1029a543c59d1798e5", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "fe279657c804dcec88728eeb27187f983f6e5dc0c89575c4bd01aa6a8147b3a1", + "local-path": "/usr/lib/x86_64-linux-gnu/libutil.so.1" + }, + { + "ref": "2a7334caf9516a482110c769e92985b30e9d7d9d96a4227b93d04cce8af0701e", + "local-path": "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/bin/python3" + }, + { + "ref": "6445c275f2477ebf619b1e4ec6fe5a0e460b9745e360ef9b671cb5a2f9f362ae", + "local-path": "/usr/lib/x86_64-linux-gnu/librt.so.1" + }, + { + "ref": "1d25fd63234b59e4c581564c7a6d8f5c6cf36eee757e3d26f4b0808dd36a4896", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "582f2d3d4edab86d601c54b37f04bd18fa2cda28be30e9f8c87df73c1c581354", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + }, + { + "ref": "d71263682766154c159a63504fec543e3ea64a932e5f30d5f50758fab0405fa2", + "local-path": "/usr/lib/x86_64-linux-gnu/libdl.so.2" + } + ] +} diff --git a/tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json b/tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json new file mode 100644 index 000000000..7d7f152fd --- /dev/null +++ b/tools/coredump/testdata/amd64/pyenv_3.12.9_gcc_13.3.0.json @@ -0,0 +1,66 @@ +{ + "coredump-ref": "b21ceabeabae6be900f44f56bb52dd89065c71903603938135fb19b128575c6b", + "threads": [ + { + "lwp": 144534, + "frames": [ + "fib+1 in /home/korniltsev/trash/fib.py:2", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "+6 in /home/korniltsev/trash/fib.py:7", + "+0 in :1", + "libpython3.12.so.1.0+0x11243a", + "libpython3.12.so.1.0+0x28e7de", + "libpython3.12.so.1.0+0x2e9dc5", + "libpython3.12.so.1.0+0x2e9ed8", + "libpython3.12.so.1.0+0x2ecdbf", + "libpython3.12.so.1.0+0x2ed39f", + "libpython3.12.so.1.0+0x314d53", + "libpython3.12.so.1.0+0x315199", + "libpython3.12.so.1.0+0x31535d", + "libc.so.6+0x2a1c9", + "libc.so.6+0x2a28a", + "python3.12+0x1094" + ] + } + ], + "modules": [ + { + "ref": "474c778ae8a8baf4d26717c9e1011846268d7f0a3767f73b30a31d124a65d169", + "local-path": "/home/korniltsev/.pyenv/versions/3.12.9/bin/python3.12" + }, + { + "ref": "1a2eb220c22ae7ba8aaf8b243e57dbc25542f8c9c269ed6100c7ad5aea7c3ada", + "local-path": "/home/korniltsev/.pyenv/versions/3.12.9/lib/libpython3.12.so.1.0" + }, + { + "ref": "e7a914a33fd4f6d25057b8d48c7c5f3d55ab870ec4ee27693d6c5f3a532e6226", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "c09178edd7fbc58aa8415f4bbe54dd76c5ff6c6398ba3e56e5a4743fd7e9adfc", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "6c5e1b4528b704dc7081aa45b5037bda4ea9cad78ca562b4fb6b0dbdbfc7e7e7", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + } + ] +} diff --git a/tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json b/tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json new file mode 100644 index 000000000..c7bb497be --- /dev/null +++ b/tools/coredump/testdata/amd64/pyenv_3.13.2_clang_21.0.0.json @@ -0,0 +1,62 @@ +{ + "coredump-ref": "dc8dd740e0456edc70077e8e453facfae0775a8bf50387f05e9f4e948c3ae700", + "threads": [ + { + "lwp": 167127, + "frames": [ + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "fib+3 in /home/korniltsev/trash/fib.py:4", + "+6 in /home/korniltsev/trash/fib.py:7", + "ffffffffffffffffffffffffffffffff+0x0", + "libpython3.13.so.1.0+0x1f9740", + "libpython3.13.so.1.0+0x1f450c", + "libpython3.13.so.1.0+0x260fdc", + "libpython3.13.so.1.0+0x260cd7", + "libpython3.13.so.1.0+0x25e751", + "libpython3.13.so.1.0+0x25e262", + "libpython3.13.so.1.0+0x283a63", + "libpython3.13.so.1.0+0x2832a2", + "libpython3.13.so.1.0+0x283589", + "libpython3.13.so.1.0+0x2835dc", + "libc.so.6+0x2a1c9", + "libc.so.6+0x2a28a", + "python3.13+0x1074" + ] + } + ], + "modules": [ + { + "ref": "b76cc07b46f4a2f32a16f3a4df617353d454f7890ebd92f49a96f8f7410613f4", + "local-path": "/home/korniltsev/.pyenv/versions/3.13.2/bin/python3.13" + }, + { + "ref": "e7a914a33fd4f6d25057b8d48c7c5f3d55ab870ec4ee27693d6c5f3a532e6226", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "c09178edd7fbc58aa8415f4bbe54dd76c5ff6c6398ba3e56e5a4743fd7e9adfc", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "67997ac257675599247dc0445f4d2705f67e203678fb9920162bc2cd7f9d0009", + "local-path": "/home/korniltsev/.pyenv/versions/3.13.2/lib/libpython3.13.so.1.0" + }, + { + "ref": "6c5e1b4528b704dc7081aa45b5037bda4ea9cad78ca562b4fb6b0dbdbfc7e7e7", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + } + ] +} From 36bb7a0f26d9c2b0939ed3f7ae4986acdd1ce57b Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Sun, 6 Apr 2025 18:55:32 +0700 Subject: [PATCH 19/26] update modulestore to include meaningfull errors --- tools/coredump/modulestore/store.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/coredump/modulestore/store.go b/tools/coredump/modulestore/store.go index 35ac81ddf..77484eaa2 100644 --- a/tools/coredump/modulestore/store.go +++ b/tools/coredump/modulestore/store.go @@ -426,19 +426,23 @@ func (store *Store) ensurePresentLocally(id ID) (string, error) { return localPath, nil } - // Download the file to a temporary location to prevent half-complete modules on crashes. - file, err := os.CreateTemp(store.localCachePath, localTempPrefix) - if err != nil { - return "", fmt.Errorf("failed to create local file: %w", err) - } - defer file.Close() - moduleKey := makeS3Key(id) resp, err := http.Get(store.publicReadURL + moduleKey) if err != nil { return "", fmt.Errorf("failed to request file: %w", err) } defer resp.Body.Close() + if resp.StatusCode != 200 { + errorResponse, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("store returned %d %s", resp.StatusCode, errorResponse) + } + + // Download the file to a temporary location to prevent half-complete modules on crashes. + file, err := os.CreateTemp(store.localCachePath, localTempPrefix) + if err != nil { + return "", fmt.Errorf("failed to create local file: %w", err) + } + defer file.Close() if _, err = io.Copy(file, resp.Body); err != nil { return "", fmt.Errorf("failed to receive file: %w", err) } From 303cd1adad1d7ebe39783ccb118aebd0896713f6 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 7 Apr 2025 15:04:12 +0700 Subject: [PATCH 20/26] lint --- tools/coredump/modulestore/store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/coredump/modulestore/store.go b/tools/coredump/modulestore/store.go index 77484eaa2..b47dc6f6e 100644 --- a/tools/coredump/modulestore/store.go +++ b/tools/coredump/modulestore/store.go @@ -432,7 +432,7 @@ func (store *Store) ensurePresentLocally(id ID) (string, error) { return "", fmt.Errorf("failed to request file: %w", err) } defer resp.Body.Close() - if resp.StatusCode != 200 { + if resp.StatusCode != http.StatusOK { errorResponse, _ := io.ReadAll(resp.Body) return "", fmt.Errorf("store returned %d %s", resp.StatusCode, errorResponse) } From ab12dbd58507091248178b95210c460d6172b920 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 7 Apr 2025 20:40:07 +0700 Subject: [PATCH 21/26] Apply suggestions from code review Co-authored-by: Florian Lehner --- interpreter/python/python.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/interpreter/python/python.go b/interpreter/python/python.go index fc4efc0b4..0020615f6 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -663,20 +663,20 @@ func decodeStub( ) (libpf.SymbolValue, error) { codeAddress, err := ef.LookupSymbolAddress(symbolName) if err != nil { - return libpf.SymbolValueInvalid, fmt.Errorf("lookup %s failed: %w", + return libpf.SymbolValueInvalid, fmt.Errorf("lookup %s failed: %v", symbolName, err) } code := make([]byte, 64) if _, err = ef.ReadVirtualMemory(code, int64(codeAddress)); err != nil { - return libpf.SymbolValueInvalid, fmt.Errorf("reading %s 0x%x code failed: %w", + return libpf.SymbolValueInvalid, fmt.Errorf("reading %s 0x%x code failed: %v", symbolName, codeAddress, err) } value, err := decodeStubArgumentWrapper(code, codeAddress, memoryBase) // Sanity check the value range and alignment if err != nil || value%4 != 0 { - return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s 0x%x %s failed (0x%x, %w)", + return libpf.SymbolValueInvalid, fmt.Errorf("decode stub %s 0x%x %s failed (0x%x): %v", symbolName, codeAddress, hex.Dump(code), value, err) } // If base symbol (_PyRuntime) is not provided, accept any found value. @@ -745,7 +745,7 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr // Calls first: PyThread_tss_get(autoTSSKey) autoTLSKey, err = decodeStub(ef, pyruntimeAddr, "PyGILState_GetThisThreadState") if autoTLSKey == libpf.SymbolValueInvalid { - return nil, fmt.Errorf("unable to resolve autoTLSKey %w", err) + return nil, fmt.Errorf("unable to resolve autoTLSKey %v", err) } if version >= pythonVer(3, 7) && autoTLSKey%8 == 0 { // On Python 3.7+, the call is to PyThread_tss_get, but can get optimized to From 796a61718adbd3568efbf58765ad1eab27518321 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 7 Apr 2025 20:45:06 +0700 Subject: [PATCH 22/26] revert modulestore changes --- tools/coredump/modulestore/store.go | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tools/coredump/modulestore/store.go b/tools/coredump/modulestore/store.go index b47dc6f6e..35ac81ddf 100644 --- a/tools/coredump/modulestore/store.go +++ b/tools/coredump/modulestore/store.go @@ -426,23 +426,19 @@ func (store *Store) ensurePresentLocally(id ID) (string, error) { return localPath, nil } - moduleKey := makeS3Key(id) - resp, err := http.Get(store.publicReadURL + moduleKey) - if err != nil { - return "", fmt.Errorf("failed to request file: %w", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - errorResponse, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("store returned %d %s", resp.StatusCode, errorResponse) - } - // Download the file to a temporary location to prevent half-complete modules on crashes. file, err := os.CreateTemp(store.localCachePath, localTempPrefix) if err != nil { return "", fmt.Errorf("failed to create local file: %w", err) } defer file.Close() + + moduleKey := makeS3Key(id) + resp, err := http.Get(store.publicReadURL + moduleKey) + if err != nil { + return "", fmt.Errorf("failed to request file: %w", err) + } + defer resp.Body.Close() if _, err = io.Copy(file, resp.Body); err != nil { return "", fmt.Errorf("failed to receive file: %w", err) } From 4372fc30cc1fb13aeb761526e1d37bb5081ae6dd Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 7 Apr 2025 20:47:54 +0700 Subject: [PATCH 23/26] add endbr64 comment --- asm/amd/insn.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/asm/amd/insn.go b/asm/amd/insn.go index 11fef77a8..eb8abbaae 100644 --- a/asm/amd/insn.go +++ b/asm/amd/insn.go @@ -3,6 +3,9 @@ package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" +// IsEndbr64 returns true if the first 4 bytes of the code is endbr64 instruction +// https://www.felixcloutier.com/x86/endbr64 +// The second returned argument is the size of the instruction which is always 4 func IsEndbr64(code []byte) (isEndbr bool, size int) { if len(code) >= 4 && code[0] == 0xf3 && From 6573aef12f480337ff9f067efd086a70a3b570c3 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 7 Apr 2025 20:56:51 +0700 Subject: [PATCH 24/26] add extra test for lea edit (32bit) --- interpreter/python/decode_test.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index adc162fa2..1033e5c90 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -259,6 +259,16 @@ func TestAmd64DecodeStub(t *testing.T) { }, expected: 0xCAFE*2 + 0x6E000000 + 7, }, + { + name: "synthetic lea edi, ... scale index", + code: []byte{ + 0xB8, 0xEF, 0x00, 0x00, 0x00, // mov eax, 0xef + 0xBB, 0x2A, 0x00, 0x00, 0x00, // mov ebx, 0x2a + 0x67, 0x8D, 0x7C, 0x43, 0x07, // lea edi, [ebx + eax*2 + 7] + 0xEB, 0xEF, // jmp 0 + }, + expected: 0xEF*2 + 0x2a + 7, + }, } for _, td := range testdata { From 7a1593b1e85da9d0a8a8af14a728658543cfaf00 Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 14 Apr 2025 11:35:52 +0700 Subject: [PATCH 25/26] review fixes --- asm/amd/insn.go | 22 ++++++++++++---------- asm/amd/insn_test.go | 4 ++-- asm/amd/regs_state.go | 18 +++++++++++++++++- interpreter/python/decode.go | 17 +++++++++-------- interpreter/python/decode_test.go | 6 +++--- 5 files changed, 43 insertions(+), 24 deletions(-) diff --git a/asm/amd/insn.go b/asm/amd/insn.go index eb8abbaae..784b1d931 100644 --- a/asm/amd/insn.go +++ b/asm/amd/insn.go @@ -2,17 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" +import "bytes" -// IsEndbr64 returns true if the first 4 bytes of the code is endbr64 instruction // https://www.felixcloutier.com/x86/endbr64 -// The second returned argument is the size of the instruction which is always 4 -func IsEndbr64(code []byte) (isEndbr bool, size int) { - if len(code) >= 4 && - code[0] == 0xf3 && - code[1] == 0x0f && - code[2] == 0x1e && - code[3] == 0xfa { - return true, 4 +var opcodeEndBr64 = []byte{0xf3, 0x0f, 0x1e, 0xfa} + +// DecodeSkippable decodes an instruction that we don't care much about and are going to skip, +// as golang.org/x/arch/x86/x86asm fails to decode it. +// The second returned argument is the size of the decoded instruction to skip. +func DecodeSkippable(code []byte) (ok bool, size int) { + switch { + case bytes.HasPrefix(code, opcodeEndBr64): + return true, len(opcodeEndBr64) + default: + return false, 0 } - return false, 0 } diff --git a/asm/amd/insn_test.go b/asm/amd/insn_test.go index f7cacd00a..3dd3708f4 100644 --- a/asm/amd/insn_test.go +++ b/asm/amd/insn_test.go @@ -10,10 +10,10 @@ import ( ) func TestEndBr64(t *testing.T) { - res, n := IsEndbr64([]byte{0xF3, 0x0F, 0x1E, 0xFA}) + res, n := DecodeSkippable([]byte{0xF3, 0x0F, 0x1E, 0xFA}) assert.True(t, res) assert.Equal(t, 4, n) - res, _ = IsEndbr64([]byte{}) + res, _ = DecodeSkippable([]byte{}) assert.False(t, res) } diff --git a/asm/amd/regs_state.go b/asm/amd/regs_state.go index 22f404fdc..cddefd31a 100644 --- a/asm/amd/regs_state.go +++ b/asm/amd/regs_state.go @@ -26,13 +26,29 @@ func regIndex(reg x86asm.Reg) int { return 8 case x86asm.RIP: return 9 + case x86asm.R8: + return 10 + case x86asm.R9: + return 11 + case x86asm.R10: + return 12 + case x86asm.R11: + return 13 + case x86asm.R12: + return 14 + case x86asm.R13: + return 15 + case x86asm.R14: + return 16 + case x86asm.R15: + return 17 default: return 0 } } type RegsState struct { - regs [10]regState + regs [18]regState } func (r *RegsState) Set(reg x86asm.Reg, value, loadedFrom uint64) { diff --git a/interpreter/python/decode.go b/interpreter/python/decode.go index 8684dabee..820ab0ffc 100644 --- a/interpreter/python/decode.go +++ b/interpreter/python/decode.go @@ -15,9 +15,9 @@ import ( "golang.org/x/arch/x86/x86asm" ) -// decodeStubArgumentWrapperARM64 disassembles arm64 code and decodes the assumed value +// decodeStubArgumentARM64 disassembles arm64 code and decodes the assumed value // of requested argument. -func decodeStubArgumentWrapperARM64(code []byte, +func decodeStubArgumentARM64(code []byte, addrBase libpf.SymbolValue) libpf.SymbolValue { const argNumber uint8 = 0 // The concept is to track the latest load offset for all X0..X30 registers. @@ -116,7 +116,7 @@ func decodeStubArgumentAMD64(code []byte, codeAddress, memoryBase uint64) ( for instructionOffset < len(code) { rem := code[instructionOffset:] - if endbr64, insnLen := amd.IsEndbr64(rem); endbr64 { + if ok, insnLen := amd.DecodeSkippable(rem); ok { instructionOffset += insnLen continue } @@ -190,11 +190,12 @@ func decodeStubArgumentWrapper( codeAddress libpf.SymbolValue, memoryBase libpf.SymbolValue, ) (libpf.SymbolValue, error) { - if runtime.GOARCH == "arm64" { - return decodeStubArgumentWrapperARM64(code, memoryBase), nil - } - if runtime.GOARCH == "amd64" { + switch runtime.GOARCH { + case "arm64": + return decodeStubArgumentARM64(code, memoryBase), nil + case "amd64": return decodeStubArgumentAMD64(code, uint64(codeAddress), uint64(memoryBase)) + default: + return libpf.SymbolValueInvalid, fmt.Errorf("unsupported arch %s", runtime.GOARCH) } - return libpf.SymbolValueInvalid, fmt.Errorf("unsupported arch %s", runtime.GOARCH) } diff --git a/interpreter/python/decode_test.go b/interpreter/python/decode_test.go index 1033e5c90..e2cec192d 100644 --- a/interpreter/python/decode_test.go +++ b/interpreter/python/decode_test.go @@ -13,7 +13,7 @@ import ( ) func TestAnalyzeArm64Stubs(t *testing.T) { - val := decodeStubArgumentWrapperARM64( + val := decodeStubArgumentARM64( []byte{ 0x40, 0x0a, 0x00, 0x90, 0x01, 0xd4, 0x43, 0xf9, 0x22, 0x60, 0x17, 0x91, 0x40, 0x00, 0x40, 0xf9, @@ -21,7 +21,7 @@ func TestAnalyzeArm64Stubs(t *testing.T) { 0) assert.Equal(t, libpf.SymbolValue(1496), val, "PyEval_ReleaseLock stub test") - val = decodeStubArgumentWrapperARM64( + val = decodeStubArgumentARM64( []byte{ 0x80, 0x12, 0x00, 0xb0, 0x02, 0xd4, 0x43, 0xf9, 0x41, 0xf4, 0x42, 0xf9, 0x61, 0x00, 0x00, 0xb4, @@ -30,7 +30,7 @@ func TestAnalyzeArm64Stubs(t *testing.T) { assert.Equal(t, libpf.SymbolValue(1520), val, "PyGILState_GetThisThreadState test") // Python 3.10.12 on ARM64 Nix - val = decodeStubArgumentWrapperARM64( + val = decodeStubArgumentARM64( []byte{ 0x40, 0x1a, 0x00, 0xd0, // adrp x0, 0xffffa0eff000 0x00, 0xa0, 0x46, 0xf9, // ldr x0, [x0, #3392] From 6c6f8cafc7f3131409cf8d346abd1d91bdae95ea Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Mon, 14 Apr 2025 12:39:48 +0700 Subject: [PATCH 26/26] add r8l-r15l regs --- asm/amd/regs_state.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/asm/amd/regs_state.go b/asm/amd/regs_state.go index cddefd31a..92b36ee39 100644 --- a/asm/amd/regs_state.go +++ b/asm/amd/regs_state.go @@ -22,25 +22,25 @@ func regIndex(reg x86asm.Reg) int { return 6 case x86asm.RBP, x86asm.EBP: return 7 - case x86asm.RSP, x86asm.ESP: + case x86asm.R8, x86asm.R8L: return 8 - case x86asm.RIP: + case x86asm.R9, x86asm.R9L: return 9 - case x86asm.R8: + case x86asm.R10, x86asm.R10L: return 10 - case x86asm.R9: + case x86asm.R11, x86asm.R11L: return 11 - case x86asm.R10: + case x86asm.R12, x86asm.R12L: return 12 - case x86asm.R11: + case x86asm.R13, x86asm.R13L: return 13 - case x86asm.R12: + case x86asm.R14, x86asm.R14L: return 14 - case x86asm.R13: + case x86asm.R15, x86asm.R15L: return 15 - case x86asm.R14: + case x86asm.RSP, x86asm.ESP: return 16 - case x86asm.R15: + case x86asm.RIP: return 17 default: return 0