diff --git a/metrics/ids.go b/metrics/ids.go index 23fc6e0ec..c02a3dcc6 100644 --- a/metrics/ids.go +++ b/metrics/ids.go @@ -665,6 +665,12 @@ const ( // Number of cache hits for the dotnet PE open/parse error LRU IDDotnetPEInfoErrCacheHit = 292 + // Number of Go custom labels dropped because the label name was empty or not valid UTF-8 + IDGoLabelsDroppedInvalidName = 293 + + // Number of Go custom labels dropped because the label value was not valid UTF-8 + IDGoLabelsDroppedInvalidValue = 294 + // max number of ID values, keep this as *last entry* - IDMax = 293 + IDMax = 295 ) diff --git a/metrics/metrics.json b/metrics/metrics.json index 0d977c90d..97dc5c4bf 100644 --- a/metrics/metrics.json +++ b/metrics/metrics.json @@ -2139,5 +2139,19 @@ "name": "DotnetPEInfoErrCacheHit", "field": "agent.dotnet.pe_info_err_cache.hits", "id": 292 + }, + { + "description": "Number of Go custom labels dropped because the label name was empty or not valid UTF-8", + "type": "counter", + "name": "GoLabelsDroppedInvalidName", + "field": "agent.golabels.dropped.invalid_name", + "id": 293 + }, + { + "description": "Number of Go custom labels dropped because the label value was not valid UTF-8", + "type": "counter", + "name": "GoLabelsDroppedInvalidValue", + "field": "agent.golabels.dropped.invalid_value", + "id": 294 } ] diff --git a/support/ebpf/go_labels.ebpf.c b/support/ebpf/go_labels.ebpf.c index 352f3094b..62a8bd09a 100644 --- a/support/ebpf/go_labels.ebpf.c +++ b/support/ebpf/go_labels.ebpf.c @@ -38,12 +38,15 @@ get_go_custom_labels_from_slice(PerCPURecord *record, void *labels_slice_ptr) if (i >= labels_slice.len) break; CustomLabel *lbl = &out->labels[i]; - u8 klen = MIN(record->labels[i * 2].len, CUSTOM_LABEL_MAX_KEY_LEN - 1); + + u8 klen = MIN(record->labels[i * 2].len, CUSTOM_LABEL_MAX_KEY_LEN - 1); if (bpf_probe_read_user(lbl->key, klen, record->labels[i * 2].str)) { DEBUG_PRINT( "cl: failed to read key for custom label (%lx)", (unsigned long)record->labels[i * 2].str); return false; } + lbl->key[klen] = 0; + u8 vlen = MIN(record->labels[i * 2 + 1].len, CUSTOM_LABEL_MAX_VAL_LEN - 1); if (bpf_probe_read_user(lbl->val, vlen, record->labels[i * 2 + 1].str)) { DEBUG_PRINT( @@ -51,6 +54,7 @@ get_go_custom_labels_from_slice(PerCPURecord *record, void *labels_slice_ptr) (unsigned long)record->labels[i * 2 + 1].str); return false; } + lbl->val[vlen] = 0; } out->len = num_to_read; @@ -114,18 +118,21 @@ get_go_custom_labels_from_map(PerCPURecord *record, void *labels_map_ptr_ptr, Go CustomLabel *lbl = &out->labels[out->len]; char tophash = map_value->tophash[i]; char *kstr = map_value->keys[i].str; - unsigned klen = map_value->keys[i].len; - char *vstr = map_value->values[i].str; - unsigned vlen = map_value->values[i].len; if (tophash != 0 && kstr != NULL) { - if (bpf_probe_read_user(lbl->key, MIN(klen, CUSTOM_LABEL_MAX_KEY_LEN - 1), kstr)) { + unsigned klen = MIN(map_value->keys[i].len, CUSTOM_LABEL_MAX_KEY_LEN - 1); + if (bpf_probe_read_user(lbl->key, klen, kstr)) { DEBUG_PRINT("cl: failed to read key for custom label (%lx)", (unsigned long)kstr); return false; } - if (bpf_probe_read_user(lbl->val, MIN(vlen, CUSTOM_LABEL_MAX_VAL_LEN - 1), vstr)) { + lbl->key[klen] = 0; + + char *vstr = map_value->values[i].str; + unsigned vlen = MIN(map_value->values[i].len, CUSTOM_LABEL_MAX_VAL_LEN - 1); + if (bpf_probe_read_user(lbl->val, vlen, vstr)) { DEBUG_PRINT("cl: failed to read value for custom label"); return false; } + lbl->val[vlen] = 0; out->len++; } } diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index 5bd6e6473..8234d4fa8 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index 65283a6eb..41bb49ea9 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/tracer/labels.go b/tracer/labels.go new file mode 100644 index 000000000..40e03935a --- /dev/null +++ b/tracer/labels.go @@ -0,0 +1,88 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package tracer // import "go.opentelemetry.io/ebpf-profiler/tracer" + +import ( + "bytes" + "sync/atomic" + "unicode/utf8" + + "go.opentelemetry.io/ebpf-profiler/metrics" +) + +// customLabelValidator validates custom label keys and values extracted from +// eBPF and tracks how many are dropped due to invalid UTF-8. The zero value is +// ready to use. Methods take pointer receivers (atomic ops require an +// addressable counter), so embed as a value field on a struct held by pointer. +type customLabelValidator struct { + droppedInvalidName atomic.Int64 + droppedInvalidValue atomic.Int64 +} + +// cstring returns the prefix of buf up to (but not including) the first NUL +// byte, or all of buf if no NUL is present. Suitable for fixed-size buffers +// populated from eBPF. +func cstring(buf []byte) []byte { + if i := bytes.IndexByte(buf, 0); i >= 0 { + return buf[:i] + } + return buf +} + +// validateKey enforces strict UTF-8 validity on a custom label key. Any invalid +// byte (or an empty key) returns ok=false and bumps the drop counter, signaling +// the caller to drop the label. A corrupted key would silently group unrelated +// samples under a garbage name, so strictness is intentional here. The returned +// slice aliases buf; copy or intern before the buffer is reused. +func (v *customLabelValidator) validateKey(buf []byte) ([]byte, bool) { + b := cstring(buf) + if len(b) == 0 || !utf8.Valid(b) { + v.droppedInvalidName.Add(1) + return nil, false + } + return b, true +} + +// validateValue is lenient on a custom label value: fixed-width eBPF buffers +// can clip a multi-byte rune in half, so on invalid trailing bytes we salvage +// the longest valid UTF-8 prefix rather than discard the whole label. ok=false +// (and bumping the drop counter) fires only when the salvage is empty, i.e. +// the input was non-empty garbage rather than mid-rune truncation. The returned +// slice aliases buf; copy or intern before the buffer is reused. +func (v *customLabelValidator) validateValue(buf []byte) ([]byte, bool) { + b := cstring(buf) + pos := len(b) + if !utf8.Valid(b) { + // Walk forward; stop at the first invalid byte. This recovers the entire + // valid prefix of a mid-rune truncation in one pass. + pos = 0 + for pos < len(b) { + r, size := utf8.DecodeRune(b[pos:]) + if r == utf8.RuneError && size == 1 { + break + } + pos += size + } + if pos == 0 { + v.droppedInvalidValue.Add(1) + return nil, false + } + } + return b[:pos], true +} + +// getAndResetMetrics reports and resets the counters of custom labels dropped +// due to an invalid name or value since the previous call. +func (v *customLabelValidator) getAndResetMetrics() []metrics.Metric { + return []metrics.Metric{ + { + ID: metrics.IDGoLabelsDroppedInvalidName, + Value: metrics.MetricValue(v.droppedInvalidName.Swap(0)), + }, + { + ID: metrics.IDGoLabelsDroppedInvalidValue, + Value: metrics.MetricValue(v.droppedInvalidValue.Swap(0)), + }, + } +} diff --git a/tracer/labels_test.go b/tracer/labels_test.go new file mode 100644 index 000000000..c2a0c5bda --- /dev/null +++ b/tracer/labels_test.go @@ -0,0 +1,156 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package tracer + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/ebpf-profiler/metrics" +) + +func TestCustomLabelValidatorValidateKey(t *testing.T) { + tests := map[string]struct { + input []byte + wantValue string + wantOK bool + wantDropped int64 + }{ + "plain ascii": { + input: []byte("tenant\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), + wantValue: "tenant", + wantOK: true, + }, + "valid multi-byte utf8": { + input: append([]byte("héllo"), 0), + wantValue: "héllo", + wantOK: true, + }, + "empty buffer drops": { + // An empty key cannot be grouped against, so reject. + input: make([]byte, 16), + wantOK: false, + wantDropped: 1, + }, + "stale bytes after nul are discarded": { + input: []byte("tier\x00equest-trace"), + wantValue: "tier", + wantOK: true, + }, + "mid-rune truncation drops the whole key": { + // Keys are strict: a salvageable value-style prefix is not enough, + // since dropping the trailing byte would silently change which key + // samples are grouped under. + input: []byte{'o', 'k', 0xE2, 0x00}, + wantOK: false, + wantDropped: 1, + }, + "trailing lone continuation byte drops": { + input: []byte{'a', 'b', 'c', 0x80, 0x00}, + wantOK: false, + wantDropped: 1, + }, + "all-invalid bytes drop": { + input: []byte{0x80, 0x80, 0x00}, + wantOK: false, + wantDropped: 1, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + var v customLabelValidator + got, ok := v.validateKey(tc.input) + require.Equal(t, tc.wantOK, ok) + require.Equal(t, tc.wantValue, string(got)) + require.Equal(t, tc.wantDropped, v.droppedInvalidName.Load()) + }) + } +} + +func TestCustomLabelValidatorValidateValue(t *testing.T) { + tests := map[string]struct { + input []byte + wantValue string + wantOK bool + wantDropped int64 + }{ + "plain ascii": { + input: []byte("tenant\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), + wantValue: "tenant", + wantOK: true, + }, + "valid multi-byte utf8": { + input: append([]byte("héllo"), 0), + wantValue: "héllo", + wantOK: true, + }, + "empty buffer is valid": { + input: make([]byte, 16), + wantValue: "", + wantOK: true, + }, + "stale bytes after nul are discarded": { + input: []byte("tier\x00equest-trace"), + wantValue: "tier", + wantOK: true, + }, + "mid-rune truncation salvages valid prefix": { + // 3-byte rune (0xE2 0x98 0x83 = U+2603) cut after the first byte. + // The valid "ok" prefix must be preserved. + input: []byte{'o', 'k', 0xE2, 0x00}, + wantValue: "ok", + wantOK: true, + }, + "trailing lone continuation byte salvages valid prefix": { + input: []byte{'a', 'b', 'c', 0x80, 0x00}, + wantValue: "abc", + wantOK: true, + }, + "all-invalid bytes drop": { + input: []byte{0x80, 0x80, 0x00}, + wantOK: false, + wantDropped: 1, + }, + "single invalid byte drops": { + input: []byte{0xC0, 0x00}, + wantOK: false, + wantDropped: 1, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + var v customLabelValidator + got, ok := v.validateValue(tc.input) + require.Equal(t, tc.wantOK, ok) + require.Equal(t, tc.wantValue, string(got)) + require.Equal(t, tc.wantDropped, v.droppedInvalidValue.Load()) + }) + } +} + +func TestCustomLabelValidatorGetAndResetMetrics(t *testing.T) { + var v customLabelValidator + + // Trigger two name drops and one value drop. + v.validateKey([]byte{0xC0, 0}) + v.validateKey([]byte{0}) + v.validateValue([]byte{0xC0, 0}) + + m := v.getAndResetMetrics() + byID := map[metrics.MetricID]metrics.MetricValue{} + for _, x := range m { + byID[x.ID] = x.Value + } + require.Equal(t, metrics.MetricValue(2), byID[metrics.IDGoLabelsDroppedInvalidName]) + require.Equal(t, metrics.MetricValue(1), byID[metrics.IDGoLabelsDroppedInvalidValue]) + + // Second call returns zeros — counters reset. + m = v.getAndResetMetrics() + for _, x := range m { + require.Equal(t, metrics.MetricValue(0), x.Value, x.ID) + } +} diff --git a/tracer/string_test.go b/tracer/string_test.go new file mode 100644 index 000000000..4f043663c --- /dev/null +++ b/tracer/string_test.go @@ -0,0 +1,53 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package tracer + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGoString(t *testing.T) { + tests := map[string]struct { + input []byte + wantValue string + }{ + "plain ascii": { + input: []byte("tenant\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), + wantValue: "tenant", + }, + "valid multi-byte utf8": { + input: append([]byte("héllo"), 0), + wantValue: "héllo", + }, + "empty buffer": { + input: make([]byte, 16), + wantValue: "", + }, + "no nul terminator uses whole buffer": { + input: []byte("exactlysixteenb!"), + wantValue: "exactlysixteenb!", + }, + "stale bytes after nul are discarded": { + // Models a short string written into a per-CPU slot that previously + // held a longer one. Everything past the first NUL must be dropped. + input: []byte("tier\x00equest-trace"), + wantValue: "tier", + }, + "invalid utf8 is passed through unvalidated": { + // goString is used for comm, which is kernel-supplied and trusted + // as-is; validation happens only for label strings. + input: []byte{'b', 'a', 'd', 0x80, 0x00}, + wantValue: "bad\x80", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got := goString(tc.input) + require.Equal(t, tc.wantValue, got.String()) + }) + } +} diff --git a/tracer/tracer.go b/tracer/tracer.go index 57a446a52..8deec02b9 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -6,7 +6,6 @@ package tracer // import "go.opentelemetry.io/ebpf-profiler/tracer" import ( "bufio" - "bytes" "context" "errors" "fmt" @@ -132,6 +131,10 @@ type Tracer struct { // probabilisticThreshold holds the threshold for probabilistic profiling. probabilisticThreshold uint + // customLabels validates custom label keys/values pulled from eBPF and + // tracks how many were dropped due to invalid UTF-8. + customLabels customLabelValidator + // done is closed when the tracer encounters an unrecoverable error. // Use Done() to obtain a read-only channel for use in select statements. done chan libpf.Void @@ -213,13 +216,10 @@ type progLoaderHelper struct { noTailCallTarget bool } -// Convert a C-string to Go string. -func goString(cstr []byte) libpf.String { - index := bytes.IndexByte(cstr, byte(0)) - if index < 0 { - index = len(cstr) - } - return libpf.Intern(pfunsafe.ToString(cstr[:index])) +// goString converts a fixed-size NUL-terminated buffer into an interned string, +// ignoring everything from the first NUL byte onward. +func goString(buf []byte) libpf.String { + return libpf.Intern(pfunsafe.ToString(cstring(buf))) } // schedProcessFreeHookName returns the name of the tracepoint hook to use. @@ -1058,9 +1058,18 @@ func (t *Tracer) loadBpfTrace(raw []byte) (*libpf.EbpfTrace, error) { trace.CustomLabels = make(map[libpf.String]libpf.String, int(ptr.Custom_labels.Len)) for i := 0; i < int(ptr.Custom_labels.Len); i++ { lbl := ptr.Custom_labels.Labels[i] - key := goString(lbl.Key[:]) - val := goString(lbl.Val[:]) - trace.CustomLabels[key] = val + keyBytes, ok := t.customLabels.validateKey(lbl.Key[:]) + if !ok { + log.Debugf("Dropping Go custom label with empty or invalid UTF-8 name") + continue + } + key := libpf.Intern(pfunsafe.ToString(keyBytes)) + valBytes, ok := t.customLabels.validateValue(lbl.Val[:]) + if !ok { + log.Debugf("Dropping Go custom label %s with invalid UTF-8 value", key) + continue + } + trace.CustomLabels[key] = libpf.Intern(pfunsafe.ToString(valBytes)) } } @@ -1133,6 +1142,7 @@ func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *libp metrics.AddSlice(eventMetricCollector()) metrics.AddSlice(traceEventMetricCollector()) metrics.AddSlice(t.eBPFMetricsCollector(translateIDs, previousMetricValue)) + metrics.AddSlice(t.customLabels.getAndResetMetrics()) }) return nil