Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion metrics/ids.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions metrics/metrics.json
Original file line number Diff line number Diff line change
Expand Up @@ -2139,5 +2139,19 @@
"name": "DotnetPEInfoErrCacheHit",
"field": "agent.dotnet.pe_info_err_cache.hits",
"id": 292
},
{
"description": "Number of Go custom labels dropped because the label name was empty or not valid UTF-8",
"type": "counter",
"name": "GoLabelsDroppedInvalidName",
"field": "agent.golabels.dropped.invalid_name",
"id": 293
},
{
"description": "Number of Go custom labels dropped because the label value was not valid UTF-8",
"type": "counter",
"name": "GoLabelsDroppedInvalidValue",
"field": "agent.golabels.dropped.invalid_value",
"id": 294
}
]
10 changes: 10 additions & 0 deletions support/ebpf/go_labels.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ get_go_custom_labels_from_slice(PerCPURecord *record, void *labels_slice_ptr)
if (i >= labels_slice.len)
break;
CustomLabel *lbl = &out->labels[i];
// The PerCPURecord is reused across traces. Zero the label slot before
// populating it: bpf_probe_read_user() only writes the bytes it reads, so
// a key/value shorter than one previously written to this slot would
// otherwise inherit the old trailing bytes. Userspace reconstructs the
// string by scanning for the first NUL, so leftover bytes would corrupt
// the label keys and values we emit.
__builtin_memset(lbl, 0, sizeof(*lbl));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

memset unrolls to 64 1 byte writes, not the end of the world but instead I think we should just write a null terminator since we don't store the length and the Go side treats it as a C-string. This is why we read at most LEN-1.

u8 klen = MIN(record->labels[i * 2].len, CUSTOM_LABEL_MAX_KEY_LEN - 1);
if (bpf_probe_read_user(lbl->key, klen, record->labels[i * 2].str)) {
DEBUG_PRINT(
Expand Down Expand Up @@ -118,6 +125,9 @@ get_go_custom_labels_from_map(PerCPURecord *record, void *labels_map_ptr_ptr, Go
char *vstr = map_value->values[i].str;
unsigned vlen = map_value->values[i].len;
if (tophash != 0 && kstr != NULL) {
// Zero the slot before use; see get_go_custom_labels_from_slice for why
// this is required (the PerCPURecord is reused across traces).
__builtin_memset(lbl, 0, sizeof(*lbl));
if (bpf_probe_read_user(lbl->key, MIN(klen, CUSTOM_LABEL_MAX_KEY_LEN - 1), kstr)) {
DEBUG_PRINT("cl: failed to read key for custom label (%lx)", (unsigned long)kstr);
return false;
Expand Down
65 changes: 65 additions & 0 deletions tracer/string_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package tracer

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestGoString(t *testing.T) {
tests := map[string]struct {
input []byte
wantValue string
wantOK bool
}{
"plain ascii": {
input: []byte("tenant\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"),
wantValue: "tenant",
wantOK: true,
},
"valid multi-byte utf8": {
input: append([]byte("héllo"), 0),
wantValue: "héllo",
wantOK: true,
},
"empty buffer": {
input: make([]byte, 16),
wantValue: "",
wantOK: true,
},
"no nul terminator uses whole buffer": {
input: []byte("exactlysixteenb!"),
wantValue: "exactlysixteenb!",
wantOK: true,
},
"stale bytes after nul are discarded": {
// Models a short key written into a per-CPU slot that previously
// held a longer key. Everything past the first NUL must be
// dropped; otherwise this produces a garbage label name.
input: []byte("tier\x00equest-trace"),
wantValue: "tier",
wantOK: true,
},
"invalid utf8 lone continuation byte": {
input: []byte{'b', 'a', 'd', 0x80, 0x00},
wantOK: false,
},
"invalid utf8 truncated multi-byte rune": {
// A 3-byte rune cut after its first byte, as fixed-width
// truncation in the eBPF extractor can produce.
input: []byte{'x', 0xE2, 0x00},
wantOK: false,
},
}

for name, tc := range tests {
t.Run(name, func(t *testing.T) {
got, ok := goString(tc.input)
require.Equal(t, tc.wantOK, ok)
require.Equal(t, tc.wantValue, got.String())
})
}
}
60 changes: 54 additions & 6 deletions tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import (
"slices"
"strings"
"sync"
"sync/atomic"
"time"
"unicode/utf8"
"unsafe"

cebpf "github.com/cilium/ebpf"
Expand Down Expand Up @@ -132,6 +134,14 @@ type Tracer struct {
// probabilisticThreshold holds the threshold for probabilistic profiling.
probabilisticThreshold uint

// goLabelsDroppedInvalidName counts Go custom labels dropped since the last
// metrics report because their name was empty or not valid UTF-8.
goLabelsDroppedInvalidName atomic.Int64

// goLabelsDroppedInvalidValue counts Go custom labels dropped since the last
// metrics report because their value was not valid UTF-8.
goLabelsDroppedInvalidValue atomic.Int64

// done is closed when the tracer encounters an unrecoverable error.
// Use Done() to obtain a read-only channel for use in select statements.
done chan libpf.Void
Expand Down Expand Up @@ -213,13 +223,22 @@ type progLoaderHelper struct {
noTailCallTarget bool
}

// Convert a C-string to Go string.
func goString(cstr []byte) libpf.String {
// goString converts a fixed-size NUL-terminated buffer into an interned string,
// ignoring everything from the first NUL byte onward. It returns ok=false when
// the content is not valid UTF-8, so callers handling untrusted data (custom
// label keys/values, thread comm) can reject it rather than emit garbage. This
// catches a multi-byte rune split by fixed-width truncation in eBPF, and guards
// against any future extraction bug.
func goString(cstr []byte) (libpf.String, bool) {
index := bytes.IndexByte(cstr, byte(0))
if index < 0 {
index = len(cstr)
}
return libpf.Intern(pfunsafe.ToString(cstr[:index]))
b := cstr[:index]
if !utf8.Valid(b) {
return libpf.NullString, false
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This throws away all the valid bits of the string in the case of a mid-rune truncation, feels like a bad idea, wouldn't it be better to skip the utf8 validation and let downstream consumers deal with it?

}
return libpf.Intern(pfunsafe.ToString(b)), true
}

// schedProcessFreeHookName returns the name of the tracepoint hook to use.
Expand Down Expand Up @@ -1030,8 +1049,11 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) {
pid := libpf.PID(ptr.Pid)
procMeta := t.processManager.MetaForPID(pid)
trace := t.tracePool.Get().(*libpf.EbpfTrace)
// comm is best-effort: if the kernel handed us non-UTF-8 bytes, drop it
// rather than emit an invalid string downstream.
comm, _ := goString(ptr.Comm[:])
Copy link
Copy Markdown
Contributor

@gnurizen gnurizen May 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing utf8 validation on comm feels wrong (like wasted energy) but its probably not a big deal.

*trace = libpf.EbpfTrace{
Comm: goString(ptr.Comm[:]),
Comm: comm,
ExecutablePath: procMeta.Executable,
ContainerID: procMeta.ContainerID,
ProcessName: procMeta.Name,
Expand All @@ -1058,8 +1080,18 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) {
trace.CustomLabels = make(map[libpf.String]libpf.String, int(ptr.Custom_labels.Len))
for i := 0; i < int(ptr.Custom_labels.Len); i++ {
lbl := ptr.Custom_labels.Labels[i]
key := goString(lbl.Key[:])
val := goString(lbl.Val[:])
key, ok := goString(lbl.Key[:])
if !ok || key == libpf.NullString {
t.goLabelsDroppedInvalidName.Add(1)
log.Debugf("Dropping Go custom label with empty or invalid UTF-8 name")
continue
}
val, ok := goString(lbl.Val[:])
if !ok {
t.goLabelsDroppedInvalidValue.Add(1)
log.Debugf("Dropping Go custom label %s with invalid UTF-8 value", key)
continue
}
trace.CustomLabels[key] = val
}
}
Expand All @@ -1080,6 +1112,21 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) {
return trace, nil
}

// goLabelsMetricCollector reports and resets the counters of Go custom labels
// dropped due to an invalid name or value since the previous call.
func (t *Tracer) goLabelsMetricCollector() []metrics.Metric {
return []metrics.Metric{
{
ID: metrics.IDGoLabelsDroppedInvalidName,
Value: metrics.MetricValue(t.goLabelsDroppedInvalidName.Swap(0)),
},
{
ID: metrics.IDGoLabelsDroppedInvalidValue,
Value: metrics.MetricValue(t.goLabelsDroppedInvalidValue.Swap(0)),
},
}
}

// StartMapMonitors starts goroutines for collecting metrics and monitoring eBPF
// maps for tracepoints, new traces, trace count updates and unknown PCs.
func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *libpf.EbpfTrace) error {
Expand Down Expand Up @@ -1133,6 +1180,7 @@ func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *libp
metrics.AddSlice(eventMetricCollector())
metrics.AddSlice(traceEventMetricCollector())
metrics.AddSlice(t.eBPFMetricsCollector(translateIDs, previousMetricValue))
metrics.AddSlice(t.goLabelsMetricCollector())
})

return nil
Expand Down
Loading