diff --git a/metrics/ids.go b/metrics/ids.go index 23fc6e0ec..c02a3dcc6 100644 --- a/metrics/ids.go +++ b/metrics/ids.go @@ -665,6 +665,12 @@ const ( // Number of cache hits for the dotnet PE open/parse error LRU IDDotnetPEInfoErrCacheHit = 292 + // Number of Go custom labels dropped because the label name was empty or not valid UTF-8 + IDGoLabelsDroppedInvalidName = 293 + + // Number of Go custom labels dropped because the label value was not valid UTF-8 + IDGoLabelsDroppedInvalidValue = 294 + // max number of ID values, keep this as *last entry* - IDMax = 293 + IDMax = 295 ) diff --git a/metrics/metrics.json b/metrics/metrics.json index 0d977c90d..97dc5c4bf 100644 --- a/metrics/metrics.json +++ b/metrics/metrics.json @@ -2139,5 +2139,19 @@ "name": "DotnetPEInfoErrCacheHit", "field": "agent.dotnet.pe_info_err_cache.hits", "id": 292 + }, + { + "description": "Number of Go custom labels dropped because the label name was empty or not valid UTF-8", + "type": "counter", + "name": "GoLabelsDroppedInvalidName", + "field": "agent.golabels.dropped.invalid_name", + "id": 293 + }, + { + "description": "Number of Go custom labels dropped because the label value was not valid UTF-8", + "type": "counter", + "name": "GoLabelsDroppedInvalidValue", + "field": "agent.golabels.dropped.invalid_value", + "id": 294 } ] diff --git a/support/ebpf/go_labels.ebpf.c b/support/ebpf/go_labels.ebpf.c index 352f3094b..479168e20 100644 --- a/support/ebpf/go_labels.ebpf.c +++ b/support/ebpf/go_labels.ebpf.c @@ -38,6 +38,13 @@ get_go_custom_labels_from_slice(PerCPURecord *record, void *labels_slice_ptr) if (i >= labels_slice.len) break; CustomLabel *lbl = &out->labels[i]; + // The PerCPURecord is reused across traces. Zero the label slot before + // populating it: bpf_probe_read_user() only writes the bytes it reads, so + // a key/value shorter than one previously written to this slot would + // otherwise inherit the old trailing bytes. Userspace reconstructs the + // string by scanning for the first NUL, so leftover bytes would corrupt + // the label keys and values we emit. + __builtin_memset(lbl, 0, sizeof(*lbl)); u8 klen = MIN(record->labels[i * 2].len, CUSTOM_LABEL_MAX_KEY_LEN - 1); if (bpf_probe_read_user(lbl->key, klen, record->labels[i * 2].str)) { DEBUG_PRINT( @@ -118,6 +125,9 @@ get_go_custom_labels_from_map(PerCPURecord *record, void *labels_map_ptr_ptr, Go char *vstr = map_value->values[i].str; unsigned vlen = map_value->values[i].len; if (tophash != 0 && kstr != NULL) { + // Zero the slot before use; see get_go_custom_labels_from_slice for why + // this is required (the PerCPURecord is reused across traces). + __builtin_memset(lbl, 0, sizeof(*lbl)); if (bpf_probe_read_user(lbl->key, MIN(klen, CUSTOM_LABEL_MAX_KEY_LEN - 1), kstr)) { DEBUG_PRINT("cl: failed to read key for custom label (%lx)", (unsigned long)kstr); return false; diff --git a/tracer/string_test.go b/tracer/string_test.go new file mode 100644 index 000000000..5ddb89035 --- /dev/null +++ b/tracer/string_test.go @@ -0,0 +1,65 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package tracer + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGoString(t *testing.T) { + tests := map[string]struct { + input []byte + wantValue string + wantOK bool + }{ + "plain ascii": { + input: []byte("tenant\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), + wantValue: "tenant", + wantOK: true, + }, + "valid multi-byte utf8": { + input: append([]byte("héllo"), 0), + wantValue: "héllo", + wantOK: true, + }, + "empty buffer": { + input: make([]byte, 16), + wantValue: "", + wantOK: true, + }, + "no nul terminator uses whole buffer": { + input: []byte("exactlysixteenb!"), + wantValue: "exactlysixteenb!", + wantOK: true, + }, + "stale bytes after nul are discarded": { + // Models a short key written into a per-CPU slot that previously + // held a longer key. Everything past the first NUL must be + // dropped; otherwise this produces a garbage label name. + input: []byte("tier\x00equest-trace"), + wantValue: "tier", + wantOK: true, + }, + "invalid utf8 lone continuation byte": { + input: []byte{'b', 'a', 'd', 0x80, 0x00}, + wantOK: false, + }, + "invalid utf8 truncated multi-byte rune": { + // A 3-byte rune cut after its first byte, as fixed-width + // truncation in the eBPF extractor can produce. + input: []byte{'x', 0xE2, 0x00}, + wantOK: false, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got, ok := goString(tc.input) + require.Equal(t, tc.wantOK, ok) + require.Equal(t, tc.wantValue, got.String()) + }) + } +} diff --git a/tracer/tracer.go b/tracer/tracer.go index 159e790ab..29788fe50 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -18,7 +18,9 @@ import ( "slices" "strings" "sync" + "sync/atomic" "time" + "unicode/utf8" "unsafe" cebpf "github.com/cilium/ebpf" @@ -132,6 +134,14 @@ type Tracer struct { // probabilisticThreshold holds the threshold for probabilistic profiling. probabilisticThreshold uint + // goLabelsDroppedInvalidName counts Go custom labels dropped since the last + // metrics report because their name was empty or not valid UTF-8. + goLabelsDroppedInvalidName atomic.Int64 + + // goLabelsDroppedInvalidValue counts Go custom labels dropped since the last + // metrics report because their value was not valid UTF-8. + goLabelsDroppedInvalidValue atomic.Int64 + // done is closed when the tracer encounters an unrecoverable error. // Use Done() to obtain a read-only channel for use in select statements. done chan libpf.Void @@ -213,13 +223,22 @@ type progLoaderHelper struct { noTailCallTarget bool } -// Convert a C-string to Go string. -func goString(cstr []byte) libpf.String { +// goString converts a fixed-size NUL-terminated buffer into an interned string, +// ignoring everything from the first NUL byte onward. It returns ok=false when +// the content is not valid UTF-8, so callers handling untrusted data (custom +// label keys/values, thread comm) can reject it rather than emit garbage. This +// catches a multi-byte rune split by fixed-width truncation in eBPF, and guards +// against any future extraction bug. +func goString(cstr []byte) (libpf.String, bool) { index := bytes.IndexByte(cstr, byte(0)) if index < 0 { index = len(cstr) } - return libpf.Intern(pfunsafe.ToString(cstr[:index])) + b := cstr[:index] + if !utf8.Valid(b) { + return libpf.NullString, false + } + return libpf.Intern(pfunsafe.ToString(b)), true } // schedProcessFreeHookName returns the name of the tracepoint hook to use. @@ -1030,8 +1049,11 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) { pid := libpf.PID(ptr.Pid) procMeta := t.processManager.MetaForPID(pid) trace := t.tracePool.Get().(*libpf.EbpfTrace) + // comm is best-effort: if the kernel handed us non-UTF-8 bytes, drop it + // rather than emit an invalid string downstream. + comm, _ := goString(ptr.Comm[:]) *trace = libpf.EbpfTrace{ - Comm: goString(ptr.Comm[:]), + Comm: comm, ExecutablePath: procMeta.Executable, ContainerID: procMeta.ContainerID, ProcessName: procMeta.Name, @@ -1058,8 +1080,18 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) { trace.CustomLabels = make(map[libpf.String]libpf.String, int(ptr.Custom_labels.Len)) for i := 0; i < int(ptr.Custom_labels.Len); i++ { lbl := ptr.Custom_labels.Labels[i] - key := goString(lbl.Key[:]) - val := goString(lbl.Val[:]) + key, ok := goString(lbl.Key[:]) + if !ok || key == libpf.NullString { + t.goLabelsDroppedInvalidName.Add(1) + log.Debugf("Dropping Go custom label with empty or invalid UTF-8 name") + continue + } + val, ok := goString(lbl.Val[:]) + if !ok { + t.goLabelsDroppedInvalidValue.Add(1) + log.Debugf("Dropping Go custom label %s with invalid UTF-8 value", key) + continue + } trace.CustomLabels[key] = val } } @@ -1080,6 +1112,21 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) { return trace, nil } +// goLabelsMetricCollector reports and resets the counters of Go custom labels +// dropped due to an invalid name or value since the previous call. +func (t *Tracer) goLabelsMetricCollector() []metrics.Metric { + return []metrics.Metric{ + { + ID: metrics.IDGoLabelsDroppedInvalidName, + Value: metrics.MetricValue(t.goLabelsDroppedInvalidName.Swap(0)), + }, + { + ID: metrics.IDGoLabelsDroppedInvalidValue, + Value: metrics.MetricValue(t.goLabelsDroppedInvalidValue.Swap(0)), + }, + } +} + // StartMapMonitors starts goroutines for collecting metrics and monitoring eBPF // maps for tracepoints, new traces, trace count updates and unknown PCs. func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *libpf.EbpfTrace) error { @@ -1133,6 +1180,7 @@ func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *libp metrics.AddSlice(eventMetricCollector()) metrics.AddSlice(traceEventMetricCollector()) metrics.AddSlice(t.eBPFMetricsCollector(translateIDs, previousMetricValue)) + metrics.AddSlice(t.goLabelsMetricCollector()) }) return nil