Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions interpreter/perl/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,21 @@ func hashCOPKey(k copKey) uint32 {

func (i *perlInstance) UpdateLibcInfo(ebpf interpreter.EbpfHandler, pid libpf.PID,
libcInfo libc.LibcInfo) error {
// Perl requires TSDInfo to access thread state. If stateInTSD is true,
// we need valid TSDInfo to proceed. If it's false, we can proceed without it.
// Since UpdateLibcInfo may be called multiple times as LibcInfo is collected
// from multiple DSOs, we should only insert proc data when we have what we need.
d := i.d
if d.stateInTSD && !libcInfo.HasTSDInfo() {
// We need TSDInfo but don't have it yet, wait for another call
return nil
}

// If we've already inserted proc info, don't do it again
if i.procInfoInserted {
return nil
}

stateInTSD := uint8(0)
if d.stateInTSD {
stateInTSD = 1
Expand Down
14 changes: 14 additions & 0 deletions interpreter/python/python.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,20 @@ func (p *pythonInstance) GetAndResetMetrics() ([]metrics.Metric, error) {
func (p *pythonInstance) UpdateLibcInfo(ebpf interpreter.EbpfHandler, pid libpf.PID,
libcInfo libc.LibcInfo) error {
d := p.d

// If we don't have a static TLS offset (Python < 3.13 or extraction failed),
// we need TSDInfo to access thread state via pthread_getspecific.
// Since UpdateLibcInfo may be called multiple times as LibcInfo is collected
// from multiple DSOs, wait until we have TSDInfo before inserting proc data.
if d.staticTLSOffset == 0 && !libcInfo.HasTSDInfo() {
Copy link
Copy Markdown
Contributor Author

@dalehamel dalehamel Feb 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@florianl after rereading things, I realized we do still need a check here for the case that:

  • Python is older than 3.13
  • The libc info is in multiple DSOs, and the first call to UpdateLibcInfo only has DTV info (HasTSDInfo is false)

To avoid causing a regression in this edge cases, we need to check that we are not using staticTLSOffset, and that the call to update the libc info actually has the TSD info we want to insert.

I have simplified the check and updated the comments, I don't think this is redundant anymore.

This is because this PR also handles the case that the DTV info and TSD info could be coming from separate libc DSOs (see the discussion thread with Timo)

return nil
}

// Prevent duplicate inserts
if p.procInfoInserted {
return nil
}

vm := &d.vmStructs

cdata := support.PyProcInfo{
Expand Down
111 changes: 90 additions & 21 deletions libc/libc.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,73 @@ import (
)

type TSDInfo = support.TSDInfo
type DTVInfo = support.DTVInfo

// LibcInfo contains introspection information extracted from the C-library
type LibcInfo struct {
Comment thread
fabled marked this conversation as resolved.
// TSDInfo is the TSDInfo extracted for this C-library
TSDInfo TSDInfo
// DTVInfo contains DTV (Dynamic Thread Vector) introspection data for accessing
// TLS variables when TLS descriptors are not available
DTVInfo DTVInfo
}

// IsEqual checks if two LibcInfo instances are equal
func (l LibcInfo) IsEqual(other LibcInfo) bool {
return l.TSDInfo == other.TSDInfo && l.DTVInfo == other.DTVInfo
}

// Merge fills in empty fields of the receiver with corresponding values from other.
// Fields already populated in the receiver are not overwritten.
func (l *LibcInfo) Merge(other LibcInfo) {
// If other has TSDInfo and this instance does not, take it
if l.TSDInfo == (TSDInfo{}) {
l.TSDInfo = other.TSDInfo
}

// If other has DTVInfo and this instance does not, take it
if l.DTVInfo == (DTVInfo{}) {
l.DTVInfo = other.DTVInfo
}
}

// HasTSDInfo returns true if the LibcInfo contains valid TSD information.
// TSDInfo is considered valid when the Multiplier field is non-zero.
func (l LibcInfo) HasTSDInfo() bool {
return l.TSDInfo.Multiplier != 0
}

// HasDTVInfo returns true if the LibcInfo contains valid DTV information.
// DTVInfo is considered valid when the Multiplier field is non-zero.
func (l LibcInfo) HasDTVInfo() bool {
return l.DTVInfo.Multiplier != 0
}

var (
// regex for the libc
libcRegex = regexp.MustCompile(`.*/(ld-musl|ld-linux|libc|libpthread)([-.].*)?\.so`)
)

// IsPotentialLibcDSO determines if the DSO filename potentially contains libc code
func IsPotentialLibcDSO(filename string) bool {
return libcRegex.MatchString(filename)
}

func ExtractLibcInfo(ef *pfelf.File) (*LibcInfo, error) {
tsdinfo, err := extractTSDInfo(ef)
if err != nil {
return nil, err
}

dtvinfo, err := extractDTVInfo(ef)
if err != nil {
return &LibcInfo{}, err
}

return &LibcInfo{
TSDInfo: tsdinfo,
DTVInfo: dtvinfo,
}, nil
}

// This code analyzes the C-library provided POSIX defined function which is used
Expand Down Expand Up @@ -65,27 +127,6 @@ type LibcInfo struct {
//
// Reading the value is basically "return self->specific_1stblock[key].data;"

var (
// regex for the libc
libcRegex = regexp.MustCompile(`.*/(ld-musl|libc|libpthread)([-.].*)?\.so`)
)

// IsPotentialTSDDSO determines if the DSO filename potentially contains pthread code
func IsPotentialTSDDSO(filename string) bool {
return libcRegex.MatchString(filename)
}

func ExtractLibcInfo(ef *pfelf.File) (*LibcInfo, error) {
tsdinfo, err := extractTSDInfo(ef)
if err != nil {
return nil, err
}

return &LibcInfo{
TSDInfo: tsdinfo,
}, nil
}

// extractTSDInfo extracts the introspection data for pthread thread specific data.
func extractTSDInfo(ef *pfelf.File) (TSDInfo, error) {
_, code, err := ef.SymbolData("__pthread_getspecific", 2048)
Expand Down Expand Up @@ -113,3 +154,31 @@ func extractTSDInfo(ef *pfelf.File) (TSDInfo, error) {
}
return info, nil
}

// extractDTVInfo extracts the introspection data for the DTV to access TLS vars
func extractDTVInfo(ef *pfelf.File) (DTVInfo, error) {
var info DTVInfo
_, code, err := ef.SymbolData("__tls_get_addr", 2048)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the test coredumps don't actually have this symbol, so it is necessary to not error out if the symbol isn't present. Hence why i added the logging package, and we now just log the error if the symbol is missing.

We return an empty DTVInfo struct, it is up to users of DTV info to check that it is valid before using it. This can easily be done by verifying that "EntryWidth" is not 0.

In the cases where we DO have the symbol, but fail to extract info from it, we legitimately error out.

Copy link
Copy Markdown
Contributor

@fabled fabled Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems this is defined in the ld-linux-x86-64.so.2 in (some versions of) glibc. So it means that the libc information may need to be collected from two DSOs in case of glibc.

You should add this to the regexp pattern in IsPotentialTSDDSO. Perhaps rename that to IsLibcDSO?

This also means that ProcessManager.assignLibcInfo should be updated to merge the information from these two different DSOs. Probably add a helper libc.MergeLibcInfo or add a struct method for LibcInfo.Merge?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the ability to check for equality and merge, and the values are "accumulated" when we call assignLibcInfo.

Also added unit tests for the associated LibcInfo.IsEqual and LibcInfo.Merge, and to verify the accumulation behaviour in assignLibcInfo.

if err != nil {
// If the symbol is not exported, this is not a critical error.
// Callers can check HasDTVInfo() to determine if DTV data is available.
return info, nil
}

if len(code) < 8 {
return info, fmt.Errorf("__tls_get_addr function size is %d", len(code))
}

switch ef.Machine {
case elf.EM_AARCH64:
info, err = extractDTVInfoARM(code)
case elf.EM_X86_64:
info, err = extractDTVInfoX86(code)
default:
return info, fmt.Errorf("unsupported arch %s", ef.Machine.String())
}
if err != nil {
return info, fmt.Errorf("failed to extract DTV data: %s", err)
}
return info, nil
}
143 changes: 143 additions & 0 deletions libc/libc_aarch64.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,146 @@ func extractTSDInfoARM(code []byte) (TSDInfo, error) {
Indirect: indirect,
}, nil
}

func extractDTVInfoARM(code []byte) (DTVInfo, error) {
// Track register states similar to extractTSDInfoARM
Comment on lines +272 to +273
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm. we really should implement the arm.NewInterpreter similar to the amd.NewInterpreter that creates similar expressions to match to avoid all this state tracking duplication. but i suppose that's outside the scope of this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes i found myself wishing for that several times while working on this, but yeah as it stands this is the status quo for arm. The amd api is quite nice. Something for future work - i can cut an issue for that?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be happy to work on arm counuterpart if no one objects

var regs [32]regState

dtvOffset := int16(0)
entryWidth := uint32(0)
resetReg := int(-1)

// Scan entire function
for offs := 0; offs < len(code); offs += 4 {
if offs+4 > len(code) {
break
}

if resetReg >= 0 {
// Reset register state if something unsupported happens on it
regs[resetReg] = regState{status: Unspec}
}

inst, err := aa.Decode(code[offs:])
if err != nil {
continue
}
if inst.Op == aa.RET {
break
}

destReg, ok := arm.Xreg2num(inst.Args[0])
if !ok {
continue
}

resetReg = destReg
switch inst.Op {
case aa.MOV:
// Track register moves
srcReg, ok := arm.Xreg2num(inst.Args[1])
if !ok {
continue
}
regs[destReg] = regs[srcReg]

case aa.MRS:
// MRS X1, S3_3_C13_C0_2 (tpidr_el0)
if inst.Args[1].String() == "S3_3_C13_C0_2" {
regs[destReg] = regState{
status: TSDBase, // Reuse TSDBase to mean thread pointer
multiplier: 1,
}
}

case aa.LDUR:
// LDUR X1, [X1,#-8]
m, ok := inst.Args[1].(aa.MemImmediate)
if !ok {
continue
}
srcReg, ok := arm.Xreg2num(m.Base)
if !ok {
continue
}
if regs[srcReg].status == TSDBase {
imm, ok := arm.DecodeImmediate(m)
if !ok {
continue
}
// This is loading the DTV pointer from thread pointer
dtvOffset = int16(imm & 0xFFFF)
regs[destReg] = regState{
status: TSDElementBase, // DTV pointer
offset: imm,
multiplier: 1,
}
} else {
continue
}

case aa.LDR:
if len(inst.Args) < 2 {
continue
}
switch m := inst.Args[1].(type) {
case aa.MemImmediate:
// ldr x1, [x1, #0] or ldr x1, [x1]
srcReg, ok := arm.Xreg2num(m.Base)
if !ok {
continue
}
if regs[srcReg].status == TSDBase {
// Loading DTV pointer from thread pointer
imm, ok := arm.DecodeImmediate(m)
if !ok {
imm = 0
}
dtvOffset = int16(imm & 0xFFFF)
regs[destReg] = regState{
status: TSDElementBase, // DTV pointer
offset: imm,
multiplier: 1,
}
} else {
continue
}

case aa.MemExtend:
// ldr x1, [x1, x2, lsl #3]
srcReg, ok := arm.Xreg2num(m.Base)
if !ok {
continue
}
if regs[srcReg].status == TSDElementBase {
// This is indexing into the DTV array
if m.Amount > 0 {
entryWidth = uint32(1 << m.Amount)
}
}
}

case aa.LSL:
// lsl x3, x3, #4
if len(inst.Args) >= 3 {
if imm, ok := inst.Args[2].(aa.Imm); ok {
entryWidth = uint32(1 << imm.Imm)
}
}

case aa.CMP, aa.CBZ, aa.CMN:
// Opcode with no affect on first argument.
// Noop to exit switch without default continue.

default:
continue
}
resetReg = -1
}

return DTVInfo{
Offset: dtvOffset,
Multiplier: uint8(entryWidth),
Indirect: 1,
}, nil
}
Loading