diff --git a/armhelpers/arm_helpers.go b/armhelpers/arm_helpers.go index 8065a07c6..41b7d10ac 100644 --- a/armhelpers/arm_helpers.go +++ b/armhelpers/arm_helpers.go @@ -29,7 +29,7 @@ func Xreg2num(arg interface{}) (int, bool) { if !ok { return 0, false } - ndx = aa.Reg(n) + ndx = n default: return 0, false } @@ -46,7 +46,9 @@ func Xreg2num(arg interface{}) (int, bool) { // DecodeRegister converts the result of calling Reg.String() // into the initial register's value. -func DecodeRegister(reg string) (uint64, bool) { +func DecodeRegister(reg string) (aa.Reg, bool) { + const maxRegister = uint64(aa.V31) + // This function is essentially just the inverse // of https://cs.opensource.google/go/x/arch/+/fc48f9fe:arm64/arm64asm/inst.go;l=335 length := len(reg) @@ -65,7 +67,10 @@ func DecodeRegister(reg string) (uint64, bool) { if err != nil { return 0, false } - return val, true + if val > maxRegister { + return 0, false + } + return aa.Reg(val), true } // Otherwise, we want to strip out the @@ -98,16 +103,20 @@ func DecodeRegister(reg string) (uint64, bool) { return 0, false } - return val + regOffset, true + res := val + regOffset + if res > maxRegister { + return 0, false + } + return aa.Reg(res), true } // DecodeImmediate converts an arm64asm Arg of immediate type to it's value. -func DecodeImmediate(arg aa.Arg) (uint64, bool) { +func DecodeImmediate(arg aa.Arg) (int64, bool) { switch val := arg.(type) { case aa.Imm: - return uint64(val.Imm), true + return int64(val.Imm), true case aa.PCRel: - return uint64(val), true + return int64(val), true case aa.MemImmediate: // The MemImmediate layout changes quite // a bit depending on its mode. @@ -139,13 +148,13 @@ func DecodeImmediate(arg aa.Arg) (uint64, bool) { // Note that the second %s here is the print // format from a register. Annoyingly this isn't a // register type, so we have to unwind it manually - val, err := DecodeRegister(fields[1]) - if !err { + reg, ok := DecodeRegister(fields[1]) + if !ok { return 0, false } // The Go disassembler always adds X0 here. // See https://cs.opensource.google/go/x/arch/+/fc48f9fe:arm64/arm64asm/inst.go;l=526 - return val - uint64(aa.X0), true + return int64(reg - aa.X0), true } // Otherwise all of the strings end with a ], so we just parse @@ -156,12 +165,12 @@ func DecodeImmediate(arg aa.Arg) (uint64, bool) { if err != nil { return 0, false } - return uint64(out), true + return out, true case aa.ImmShift: // Sadly, ImmShift{} does not have public fields. // https://github.com/golang/go/issues/51517 - var imm uint64 + var imm int64 n, err := fmt.Sscanf(val.String(), "#%v", &imm) if err != nil || n != 1 { return 0, false diff --git a/interpreter/hotspot/stubs.go b/interpreter/hotspot/stubs.go index 1f0e86b2b..0d687a4d5 100644 --- a/interpreter/hotspot/stubs.go +++ b/interpreter/hotspot/stubs.go @@ -207,7 +207,7 @@ Outer: continue } - spOffs += int64(imm) + spOffs += imm } case aa.SUB: for _, arg := range insn.Args[:2] { @@ -220,7 +220,7 @@ Outer: continue } - spOffs -= int64(imm) + spOffs -= imm } } diff --git a/interpreter/php/decode_aarch64.go b/interpreter/php/decode_aarch64.go index 1b6674b80..6ec7dea25 100644 --- a/interpreter/php/decode_aarch64.go +++ b/interpreter/php/decode_aarch64.go @@ -44,6 +44,9 @@ func retrieveZendVMKindARM(code []byte) (uint, error) { if !ok { break } + if val > ZEND_VM_KIND_HYBRID { + return 0, fmt.Errorf("zend_vm_kind has an invalid value %d", val) + } return uint(val), nil } } @@ -117,7 +120,7 @@ func retrieveJITBufferPtrARM(code []byte, addrBase libpf.SymbolValue) ( // // We also assume that the first BL we encounter is the one we care about. // This is because the first call inside zend_jit_protect is a call to mprotect. - var regOffset [32]uint64 + var regOffset [32]int64 bufRetVal := libpf.SymbolValueInvalid sizeRetVal := libpf.SymbolValueInvalid @@ -163,7 +166,7 @@ func retrieveJITBufferPtrARM(code []byte, addrBase libpf.SymbolValue) ( // The instruction specifies that this value needs to // shifted about before being added to the PC. - pc := uint64(addrBase) + uint64(offs) + pc := int64(addrBase) + int64(offs) regOffset[dest] = ((pc + a2) >> 12) << 12 case aa.LDR: m, ok := inst.Args[1].(aa.MemImmediate) diff --git a/interpreter/python/arm64_decode.go b/interpreter/python/arm64_decode.go index 798486182..f1577edcf 100644 --- a/interpreter/python/arm64_decode.go +++ b/interpreter/python/arm64_decode.go @@ -47,7 +47,7 @@ func decodeStubArgumentARM64(code []byte, // B .pthread_getspecific // Storage for load offsets for each Xn register - var regOffset [32]uint64 + var regOffset [32]int64 retValue := libpf.SymbolValueInvalid for offs := 0; offs < len(code); offs += 4 { @@ -65,7 +65,7 @@ func decodeStubArgumentARM64(code []byte, continue } - instOffset := uint64(0) + instOffset := int64(0) instRetval := libpf.SymbolValueInvalid switch inst.Op { case aa.ADD: diff --git a/tpbase/assembly_decode.go b/tpbase/assembly_decode_aarch64.go similarity index 93% rename from tpbase/assembly_decode.go rename to tpbase/assembly_decode_aarch64.go index b3641db42..41d8906c7 100644 --- a/tpbase/assembly_decode.go +++ b/tpbase/assembly_decode_aarch64.go @@ -13,13 +13,13 @@ import ( func arm64GetAnalyzers() []Analyzer { return []Analyzer{ - {"tls_set", AnalyzeTLSSetARM64}, + {"tls_set", analyzeTLSSetARM}, } } -// AnalyzeTLSSet looks at the assembly of the `tls_set` function in the +// analyzeTLSSetARM looks at the assembly of the `tls_set` function in the // kernel in order to compute the offset of `tp_value` into `task_struct`. -func AnalyzeTLSSetARM64(code []byte) (uint32, error) { +func analyzeTLSSetARM(code []byte) (uint32, error) { // This tries to extract offset of thread.uw.tp_value relative to // struct task_struct. The code analyzed comes from: // linux/arch/arm64/kernel/ptrace.c: tls_set(struct task_struct *target, ...) { diff --git a/tpbase/libc.go b/tpbase/libc.go index fa057a401..072780ae3 100644 --- a/tpbase/libc.go +++ b/tpbase/libc.go @@ -8,11 +8,7 @@ import ( "fmt" "regexp" - ah "go.opentelemetry.io/ebpf-profiler/armhelpers" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" - "go.opentelemetry.io/ebpf-profiler/stringutil" - - aa "golang.org/x/arch/arm64/arm64asm" ) // TSDInfo contains information to access C-library's Thread Specific Data from eBPF @@ -110,260 +106,3 @@ func ExtractTSDInfo(ef *pfelf.File) (*TSDInfo, error) { } return &info, nil } - -const ( - Unspec int = iota - TSDBase - TSDElementBase - TSDIndex - TSDValue - TSDConstant -) - -type regState struct { - status int - offset int - multiplier int - indirect bool -} - -func ExtractTSDInfoARM64(code []byte) (TSDInfo, error) { - // This tries to extract offsetof(struct pthread, tsd). - // The analyzed code is pthread_getspecific, and should work on glibc and musl. - // See test cases for example assembly. The strategy is to find "MRS xx, tpidr_el0" - // instruction as loading something relative to "struct pthread". It is - // then tracked against first argument to find the exact offset and multiplier - // to address the TSD array. - - // Start tracking of X0 - var regs [32]regState - - regs[0].status = TSDIndex - regs[0].multiplier = 1 - resetReg := int(-1) - - for offs := 0; offs < len(code); offs += 4 { - if resetReg >= 0 { - // Reset register state if something unsupported happens on it - regs[resetReg] = regState{status: Unspec} - } - - inst, err := aa.Decode(code[offs:]) - if err != nil { - continue - } - if inst.Op == aa.RET { - break - } - - destReg, ok := ah.Xreg2num(inst.Args[0]) - if !ok { - continue - } - - resetReg = destReg - switch inst.Op { - case aa.MOV: - switch val := inst.Args[1].(type) { - case aa.Imm64: - regs[destReg] = regState{ - status: TSDConstant, - offset: int(val.Imm), - multiplier: 1, - } - case aa.Imm: - regs[destReg] = regState{ - status: TSDConstant, - offset: int(val.Imm), - multiplier: 1, - } - default: - // Track register moves - srcReg, ok := ah.Xreg2num(inst.Args[1]) - if !ok { - continue - } - regs[destReg] = regs[srcReg] - } - case aa.MRS: - // MRS X1, S3_3_C13_C0_2 - if inst.Args[1].String() == "S3_3_C13_C0_2" { - regs[destReg] = regState{ - status: TSDBase, - multiplier: 1, - } - } - case aa.LDUR: - // LDUR X1, [X1,#-88] - m, ok := inst.Args[1].(aa.MemImmediate) - if !ok { - continue - } - srcReg, ok := ah.Xreg2num(m.Base) - if !ok { - continue - } - if regs[srcReg].status == TSDBase { - imm, ok := ah.DecodeImmediate(m) - if !ok { - continue - } - regs[destReg] = regState{ - status: TSDBase, - offset: regs[srcReg].offset + int(imm), - multiplier: regs[srcReg].multiplier, - indirect: true, - } - } else { - continue - } - case aa.LDR: - switch m := inst.Args[1].(type) { - case aa.MemExtend: - // LDR X0, [X1,W0,UXTW #3] - srcReg, ok := ah.Xreg2num(m.Base) - if !ok { - continue - } - srcIndex, ok := ah.Xreg2num(m.Index) - if !ok { - continue - } - if regs[srcReg].status == TSDBase && regs[srcIndex].status == TSDIndex { - regs[destReg] = regState{ - status: TSDValue, - offset: regs[srcReg].offset + (regs[srcIndex].offset << m.Amount), - multiplier: regs[srcReg].multiplier << m.Amount, - indirect: regs[srcReg].indirect, - } - } else { - continue - } - case aa.MemImmediate: - // ldr x0, [x2, #8] - srcReg, ok := ah.Xreg2num(m.Base) - if !ok { - continue - } - if regs[srcReg].status == TSDElementBase { - i, ok := ah.DecodeImmediate(m) - if !ok { - continue - } - regs[destReg] = regState{ - status: TSDValue, - offset: regs[srcReg].offset + int(i), - multiplier: regs[srcReg].multiplier, - indirect: regs[srcReg].indirect, - } - } else { - continue - } - } - case aa.UBFIZ: - // UBFIZ X0, X1, #4, #32 - srcReg, ok := ah.Xreg2num(inst.Args[1]) - if !ok { - continue - } - if regs[srcReg].status == TSDIndex { - i, ok := inst.Args[2].(aa.Imm) - if !ok { - continue - } - regs[destReg] = regState{ - status: TSDIndex, - offset: regs[srcReg].offset << i.Imm, - multiplier: regs[srcReg].multiplier << i.Imm, - } - } - case aa.ADD: - srcReg, ok := ah.Xreg2num(inst.Args[1]) - if !ok { - continue - } - switch a2 := inst.Args[2].(type) { - case aa.ImmShift: - i, ok := ah.DecodeImmediate(a2) - if !ok { - continue - } - regs[destReg] = regs[srcReg] - regs[destReg].offset += int(i) - case aa.RegExtshiftAmount: - regStr := inst.Args[2].String() - shift := int(0) - var fields [2]string - if stringutil.SplitN(regStr, ",", fields[:]) == 2 { - regStr = fields[0] - n, err := fmt.Sscanf(fields[1], " LSL #%v", &shift) - if n != 1 || err != nil { - n, err := fmt.Sscanf(fields[1], " UXTW #%v", &shift) - if n != 1 || err != nil { - continue - } - } - } - reg, ok := ah.DecodeRegister(regStr) - if !ok { - continue - } - srcReg2, ok := ah.Xreg2num(aa.Reg(reg)) - if !ok { - continue - } - if regs[srcReg].status == TSDBase && regs[srcReg2].status == TSDIndex { - regs[destReg] = regState{ - status: TSDElementBase, - offset: regs[srcReg].offset + regs[srcReg2].offset<= 0 { + // Reset register state if something unsupported happens on it + regs[resetReg] = regState{status: Unspec} + } + + inst, err := aa.Decode(code[offs:]) + if err != nil { + continue + } + if inst.Op == aa.RET { + break + } + + destReg, ok := ah.Xreg2num(inst.Args[0]) + if !ok { + continue + } + + resetReg = destReg + switch inst.Op { + case aa.MOV: + switch val := inst.Args[1].(type) { + case aa.Imm64: + regs[destReg] = regState{ + status: TSDConstant, + offset: int64(val.Imm), + multiplier: 1, + } + case aa.Imm: + regs[destReg] = regState{ + status: TSDConstant, + offset: int64(val.Imm), + multiplier: 1, + } + default: + // Track register moves + srcReg, ok := ah.Xreg2num(inst.Args[1]) + if !ok { + continue + } + regs[destReg] = regs[srcReg] + } + case aa.MRS: + // MRS X1, S3_3_C13_C0_2 + if inst.Args[1].String() == "S3_3_C13_C0_2" { + regs[destReg] = regState{ + status: TSDBase, + multiplier: 1, + } + } + case aa.LDUR: + // LDUR X1, [X1,#-88] + m, ok := inst.Args[1].(aa.MemImmediate) + if !ok { + continue + } + srcReg, ok := ah.Xreg2num(m.Base) + if !ok { + continue + } + if regs[srcReg].status == TSDBase { + imm, ok := ah.DecodeImmediate(m) + if !ok { + continue + } + regs[destReg] = regState{ + status: TSDBase, + offset: regs[srcReg].offset + imm, + multiplier: regs[srcReg].multiplier, + indirect: true, + } + } else { + continue + } + case aa.LDR: + switch m := inst.Args[1].(type) { + case aa.MemExtend: + // LDR X0, [X1,W0,UXTW #3] + srcReg, ok := ah.Xreg2num(m.Base) + if !ok { + continue + } + srcIndex, ok := ah.Xreg2num(m.Index) + if !ok { + continue + } + if regs[srcReg].status == TSDBase && regs[srcIndex].status == TSDIndex { + regs[destReg] = regState{ + status: TSDValue, + offset: regs[srcReg].offset + (regs[srcIndex].offset << m.Amount), + multiplier: regs[srcReg].multiplier << m.Amount, + indirect: regs[srcReg].indirect, + } + } else { + continue + } + case aa.MemImmediate: + // ldr x0, [x2, #8] + srcReg, ok := ah.Xreg2num(m.Base) + if !ok { + continue + } + if regs[srcReg].status == TSDElementBase { + i, ok := ah.DecodeImmediate(m) + if !ok { + continue + } + regs[destReg] = regState{ + status: TSDValue, + offset: regs[srcReg].offset + i, + multiplier: regs[srcReg].multiplier, + indirect: regs[srcReg].indirect, + } + } else { + continue + } + } + case aa.UBFIZ: + // UBFIZ X0, X1, #4, #32 + srcReg, ok := ah.Xreg2num(inst.Args[1]) + if !ok { + continue + } + if regs[srcReg].status == TSDIndex { + i, ok := inst.Args[2].(aa.Imm) + if !ok { + continue + } + regs[destReg] = regState{ + status: TSDIndex, + offset: regs[srcReg].offset << i.Imm, + multiplier: regs[srcReg].multiplier << i.Imm, + } + } + case aa.ADD: + srcReg, ok := ah.Xreg2num(inst.Args[1]) + if !ok { + continue + } + switch a2 := inst.Args[2].(type) { + case aa.ImmShift: + i, ok := ah.DecodeImmediate(a2) + if !ok { + continue + } + regs[destReg] = regs[srcReg] + regs[destReg].offset += i + case aa.RegExtshiftAmount: + regStr := inst.Args[2].String() + shift := int(0) + var fields [2]string + if stringutil.SplitN(regStr, ",", fields[:]) == 2 { + regStr = fields[0] + n, err := fmt.Sscanf(fields[1], " LSL #%v", &shift) + if n != 1 || err != nil { + n, err := fmt.Sscanf(fields[1], " UXTW #%v", &shift) + if n != 1 || err != nil { + continue + } + } + } + reg, ok := ah.DecodeRegister(regStr) + if !ok { + continue + } + srcReg2, ok := ah.Xreg2num(reg) + if !ok { + continue + } + if regs[srcReg].status == TSDBase && regs[srcReg2].status == TSDIndex { + regs[destReg] = regState{ + status: TSDElementBase, + offset: regs[srcReg].offset + regs[srcReg2].offset<