From 6f8149a32d019eb2f581225141ab02ecc41f91ee Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Mon, 11 Aug 2025 20:43:54 +0000 Subject: [PATCH 01/15] tyler's PoC for attribute hashing --- CHANGELOG.md | 2 + attribute/hash.go | 92 ++++++++++++++++++ attribute/hash_test.go | 150 +++++++++++++++++++++++++++++ attribute/internal/fnv/fnv.go | 76 +++++++++++++++ attribute/internal/fnv/fnv_test.go | 98 +++++++++++++++++++ attribute/set.go | 94 +++++++++--------- 6 files changed, 468 insertions(+), 44 deletions(-) create mode 100644 attribute/hash.go create mode 100644 attribute/hash_test.go create mode 100644 attribute/internal/fnv/fnv.go create mode 100644 attribute/internal/fnv/fnv_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c4002d5b166..f46e42765ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,7 @@ The next release will require at least [Go 1.24]. - The `go.opentelemetry.io/otel/semconv/v1.37.0` package. The package contains semantic conventions from the `v1.37.0` version of the OpenTelemetry Semantic Conventions. See the [migration documentation](./semconv/v1.37.0/MIGRATION.md) for information on how to upgrade from `go.opentelemetry.io/otel/semconv/v1.36.0.`(#7254) +- Greatly reduce the cost of recording metrics in `go.opentelemetry.io/otel/sdk/metric` using hashing for map keys. (#7175) ### Changed @@ -101,6 +102,7 @@ The next release will require at least [Go 1.24]. - Fix `go.opentelemetry.io/otel/exporters/prometheus` to not append a suffix if it's already present in metric name. (#7088) - Fix the `go.opentelemetry.io/otel/exporters/stdout/stdouttrace` self-observability component type and name. (#7195) - Fix partial export count metric in `go.opentelemetry.io/otel/exporters/stdout/stdouttrace`. (#7199) +- Clarify the documentation about equivalence guarantees for the `Set` and `Distinct` types in `go.opentelemetry.io/otel/attribute`. (#7175) ### Deprecated diff --git a/attribute/hash.go b/attribute/hash.go new file mode 100644 index 00000000000..742cb28326c --- /dev/null +++ b/attribute/hash.go @@ -0,0 +1,92 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package attribute // import "go.opentelemetry.io/otel/attribute" + +import ( + "fmt" + "reflect" + + "go.opentelemetry.io/otel/attribute/internal/fnv" +) + +// Type identifiers. These identifiers are hashed before the value of the +// corresponding type. This is done to distinguish values that are hashed with +// the same value representation (e.g. `int64(1)` and `true`, []int64{0} and +// int64(0)). +// +// These are all 8 byte length strings converted to a uint64 representation. A +// uint64 is used instead of the string directly as an optimization, it avoids +// the for loop in [fnv] which adds minor overhead. +const ( + boolID uint64 = 7953749933313450591 // "_boolean" (little endian) + int64ID uint64 = 7592915492740740150 // "64_bit_i" (little endian) + float64ID uint64 = 7376742710626956342 // "64_bit_f" (little endian) + stringID uint64 = 6874584755375207263 // "_string_" (little endian) + boolSliceID uint64 = 6875993255270243167 // "_[]bool_" (little endian) + int64SliceID uint64 = 3762322556277578591 // "_[]int64" (little endian) + float64SliceID uint64 = 7308324551835016539 // "[]double" (little endian) + stringSliceID uint64 = 7453010373645655387 // "[]string" (little endian) +) + +// hashKVs returns a new FNV-1a hash of kvs. +func hashKVs(kvs []KeyValue) fnv.Hash { + h := fnv.New() + for _, kv := range kvs { + h = hashKV(h, kv) + } + return h +} + +// hashKV returns the FNV-1a hash of kv with h as the base. +func hashKV(h fnv.Hash, kv KeyValue) fnv.Hash { + h = h.String(string(kv.Key)) + + switch kv.Value.Type() { + case BOOL: + h = h.Uint64(boolID) + h = h.Uint64(kv.Value.numeric) + case INT64: + h = h.Uint64(int64ID) + h = h.Uint64(kv.Value.numeric) + case FLOAT64: + h = h.Uint64(float64ID) + // Assumes numeric stored with math.Float64bits. + h = h.Uint64(kv.Value.numeric) + case STRING: + h = h.Uint64(stringID) + h = h.String(kv.Value.stringly) + case BOOLSLICE: + h = h.Uint64(boolSliceID) + rv := reflect.ValueOf(kv.Value.slice) + for i := 0; i < rv.Len(); i++ { + h = h.Bool(rv.Index(i).Bool()) + } + case INT64SLICE: + h = h.Uint64(int64SliceID) + rv := reflect.ValueOf(kv.Value.slice) + for i := 0; i < rv.Len(); i++ { + h = h.Int64(rv.Index(i).Int()) + } + case FLOAT64SLICE: + h = h.Uint64(float64SliceID) + rv := reflect.ValueOf(kv.Value.slice) + for i := 0; i < rv.Len(); i++ { + h = h.Float64(rv.Index(i).Float()) + } + case STRINGSLICE: + h = h.Uint64(stringSliceID) + rv := reflect.ValueOf(kv.Value.slice) + for i := 0; i < rv.Len(); i++ { + h = h.String(rv.Index(i).String()) + } + case INVALID: + default: + // Logging is an alternative, but using the internal logger here + // causes an import cycle so it is not done. + v := kv.Value.AsInterface() + msg := fmt.Sprintf("unknown value type: %[1]v (%[1]T)", v) + panic(msg) + } + return h +} diff --git a/attribute/hash_test.go b/attribute/hash_test.go new file mode 100644 index 00000000000..da67614c0a9 --- /dev/null +++ b/attribute/hash_test.go @@ -0,0 +1,150 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package attribute // import "go.opentelemetry.io/otel/attribute" + +import ( + "fmt" + "strings" + "testing" + + "go.opentelemetry.io/otel/attribute/internal/fnv" +) + +// keyVals is all the KeyValue generators that are used for testing. This is +// not []KeyValue so different keys can be used with the test Values. +var keyVals = []func(string) KeyValue{ + func(k string) KeyValue { return Bool(k, true) }, + func(k string) KeyValue { return Bool(k, false) }, + func(k string) KeyValue { return BoolSlice(k, []bool{false, true}) }, + func(k string) KeyValue { return BoolSlice(k, []bool{true, true, false}) }, + func(k string) KeyValue { return Int(k, -1278) }, + func(k string) KeyValue { return Int(k, 0) }, // Should be different than false above. + func(k string) KeyValue { return IntSlice(k, []int{3, 23, 21, -8, 0}) }, + func(k string) KeyValue { return IntSlice(k, []int{1}) }, + func(k string) KeyValue { return Int64(k, 1) }, // Should be different from true and []int{1}. + func(k string) KeyValue { return Int64(k, 29369) }, + func(k string) KeyValue { return Int64Slice(k, []int64{3826, -38, -29, -1}) }, + func(k string) KeyValue { return Int64Slice(k, []int64{8, -328, 29, 0}) }, + func(k string) KeyValue { return Float64(k, -0.3812381) }, + func(k string) KeyValue { return Float64(k, 1e32) }, + func(k string) KeyValue { return Float64Slice(k, []float64{0.1, -3.8, -29., 0.3321}) }, + func(k string) KeyValue { return Float64Slice(k, []float64{-13e8, -32.8, 4., 1e28}) }, + func(k string) KeyValue { return String(k, "foo") }, + func(k string) KeyValue { return String(k, "bar") }, + func(k string) KeyValue { return StringSlice(k, []string{"foo", "bar", "baz"}) }, + func(k string) KeyValue { return StringSlice(k, []string{"[]i1"}) }, +} + +func TestHashKVsEquality(t *testing.T) { + type testcase struct { + hash fnv.Hash + kvs []KeyValue + } + + keys := []string{"k0", "k1"} + + // Test all combinations up to length 3. + n := len(keyVals) + result := make([]testcase, 0, 1+len(keys)*(n+(n*n)+(n*n*n))) + + result = append(result, testcase{hashKVs(nil), nil}) + + for _, key := range keys { + for i := 0; i < len(keyVals); i++ { + kvs := []KeyValue{keyVals[i](key)} + hash := hashKVs(kvs) + result = append(result, testcase{hash, kvs}) + + for j := 0; j < len(keyVals); j++ { + kvs := []KeyValue{ + keyVals[i](key), + keyVals[j](key), + } + hash := hashKVs(kvs) + result = append(result, testcase{hash, kvs}) + + for k := 0; k < len(keyVals); k++ { + kvs := []KeyValue{ + keyVals[i](key), + keyVals[j](key), + keyVals[k](key), + } + hash := hashKVs(kvs) + result = append(result, testcase{hash, kvs}) + } + } + } + } + + for i := 0; i < len(result); i++ { + hI, kvI := result[i].hash, result[i].kvs + for j := 0; j < len(result); j++ { + hJ, kvJ := result[j].hash, result[j].kvs + m := msg{i: i, j: j, hI: hI, hJ: hJ, kvI: kvI, kvJ: kvJ} + if i == j { + m.cmp = "==" + if hI != hJ { + t.Errorf("hashes not equal: %s", m) + } + } else { + m.cmp = "!=" + if hI == hJ { + // Do not use testify/assert here. It is slow. + t.Errorf("hashes equal: %s", m) + } + } + } + } +} + +type msg struct { + cmp string + i, j int + hI, hJ fnv.Hash + kvI, kvJ []KeyValue +} + +func (m msg) String() string { + return fmt.Sprintf( + "(%d: %d)%s %s (%d: %d)%s", + m.i, m.hI, m.slice(m.kvI), m.cmp, m.j, m.hJ, m.slice(m.kvJ), + ) +} + +func (m msg) slice(kvs []KeyValue) string { + if len(kvs) == 0 { + return "[]" + } + + var b strings.Builder + _, _ = b.WriteRune('[') + _, _ = b.WriteString(string(kvs[0].Key)) + _, _ = b.WriteRune(':') + _, _ = b.WriteString(kvs[0].Value.Emit()) + for _, kv := range kvs[1:] { + _, _ = b.WriteRune(',') + _, _ = b.WriteString(string(kv.Key)) + _, _ = b.WriteRune(':') + _, _ = b.WriteString(kv.Value.Emit()) + } + _, _ = b.WriteRune(']') + return b.String() +} + +func BenchmarkHashKVs(b *testing.B) { + attrs := make([]KeyValue, len(keyVals)) + for i := range keyVals { + attrs[i] = keyVals[i]("k") + } + + var h fnv.Hash + + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + h = hashKVs(attrs) + } + + _ = h +} diff --git a/attribute/internal/fnv/fnv.go b/attribute/internal/fnv/fnv.go new file mode 100644 index 00000000000..9557f63cef3 --- /dev/null +++ b/attribute/internal/fnv/fnv.go @@ -0,0 +1,76 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fnv provides an efficient and allocation free implementation of the +// FNV-1a, non-cryptographic hash functions created by Glenn Fowler, Landon +// Curt Noll, and Phong Vo. See +// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function. +// +// This implementation is provided as an alternative to "hash/fnv". The +// built-in implementation requires two allocations per Write for a string (one +// for the hash pointer and the other to convert a string to a []byte). This +// implementation is more efficientient and does not require any allocations. +package fnv // import "go.opentelemetry.io/otel/attribute/internal/fnv" + +import ( + "math" +) + +// Taken from "hash/fnv". Verified at: +// +// - https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17.html +// - http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-param +const ( + offset64 = 14695981039346656037 + prime64 = 1099511628211 +) + +// Hash is an FNV-1a hash with appropriate hashing functions for methods. +type Hash uint64 + +// New64 returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in +// big-endian byte order. +func New() Hash { + return offset64 +} + +func (h Hash) Uint64(val uint64) Hash { + v := uint64(h) + v = (v ^ ((val >> 56) & 0xFF)) * prime64 + v = (v ^ ((val >> 48) & 0xFF)) * prime64 + v = (v ^ ((val >> 40) & 0xFF)) * prime64 + v = (v ^ ((val >> 32) & 0xFF)) * prime64 + v = (v ^ ((val >> 24) & 0xFF)) * prime64 + v = (v ^ ((val >> 16) & 0xFF)) * prime64 + v = (v ^ ((val >> 8) & 0xFF)) * prime64 + v = (v ^ ((val >> 0) & 0xFF)) * prime64 + return Hash(v) +} + +func (h Hash) Bool(val bool) Hash { // nolint:revive // val is not a flag. + if val { + return h.Uint64(1) + } + return h.Uint64(0) +} + +func (h Hash) Float64(val float64) Hash { + return h.Uint64(math.Float64bits(val)) +} + +func (h Hash) Int64(val int64) Hash { + return h.Uint64(uint64(val)) +} + +func (h Hash) String(val string) Hash { + v := uint64(h) + for _, c := range val { + v ^= uint64(c) + v *= prime64 + } + return Hash(v) +} diff --git a/attribute/internal/fnv/fnv_test.go b/attribute/internal/fnv/fnv_test.go new file mode 100644 index 00000000000..2f771336633 --- /dev/null +++ b/attribute/internal/fnv/fnv_test.go @@ -0,0 +1,98 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fnv + +import ( + "encoding/binary" + "hash/fnv" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestStringHashCorrectness(t *testing.T) { + input := []string{"", "a", "ab", "abc"} + + refH := fnv.New64a() + for _, in := range input { + h := New() + got := h.String(in) + + refH.Reset() + n, err := refH.Write([]byte(in)) + require.NoError(t, err) + require.Equalf(t, len(in), n, "wrote only %d out of %d bytes", n, len(in)) + want := refH.Sum64() + + assert.Equal(t, want, uint64(got), in) + } +} + +func TestUint64HashCorrectness(t *testing.T) { + input := []uint64{0, 10, 312984238623, 1024} + + buf := make([]byte, 8) + refH := fnv.New64a() + for _, in := range input { + h := New() + got := h.Uint64(in) + + refH.Reset() + binary.BigEndian.PutUint64(buf, in) + n, err := refH.Write(buf[:]) + require.NoError(t, err) + require.Equalf(t, 8, n, "wrote only %d out of 8 bytes", n) + want := refH.Sum64() + + assert.Equal(t, want, uint64(got), in) + } +} + +func TestIntegrity(t *testing.T) { + data := []byte{'1', '2', 3, 4, 5, 6, 7, 8, 9, 10} + h0 := New() + want := h0.String(string(data)) + + h1 := New() + got := h1.String(string(data[:2])) + num := binary.BigEndian.Uint64(data[2:]) + got = got.Uint64(num) + + assert.Equal(t, want, got) +} + +var result Hash + +func BenchmarkStringKB(b *testing.B) { + b.SetBytes(1024) + data := make([]byte, 1024) + for i := range data { + data[i] = byte(i) + } + s := string(data) + h := New() + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + result = h.String(s) + } +} + +func BenchmarkUint64KB(b *testing.B) { + b.SetBytes(8) + i := uint64(192386739218721) + h := New() + + b.ReportAllocs() + b.ResetTimer() + for n := 0; n < b.N; n++ { + result = h.Uint64(i) + } +} diff --git a/attribute/set.go b/attribute/set.go index 764285a9097..c8cfd2e1fe1 100644 --- a/attribute/set.go +++ b/attribute/set.go @@ -9,6 +9,8 @@ import ( "reflect" "slices" "sort" + + "go.opentelemetry.io/otel/attribute/internal/fnv" ) type ( @@ -26,7 +28,8 @@ type ( // instead of a Set directly. In addition to that type providing guarantees // on stable equivalence, it may also provide performance improvements. Set struct { - equivalent Distinct + hash fnv.Hash + data any } // Distinct is a unique identifier of a Set. @@ -35,7 +38,7 @@ type ( // return the same value across versions. For this reason, Distinct should // always be used as a map key instead of a Set. Distinct struct { - iface any + hash fnv.Hash } // Sortable implements sort.Interface, used for sorting KeyValue. @@ -46,6 +49,14 @@ type ( Sortable []KeyValue ) +// Compile time check these types remain comparable. +var ( + _ = isComparable(Set{}) + _ = isComparable(Distinct{}) +) + +func isComparable[T comparable](t T) T { return t } + var ( // keyValueType is used in computeDistinctReflect. keyValueType = reflect.TypeOf(KeyValue{}) @@ -56,15 +67,13 @@ var ( // // This is kept for backwards compatibility, but should not be used in new code. userDefinedEmptySet = &Set{ - equivalent: Distinct{ - iface: [0]KeyValue{}, - }, + hash: fnv.New(), + data: [0]KeyValue{}, } emptySet = Set{ - equivalent: Distinct{ - iface: [0]KeyValue{}, - }, + hash: fnv.New(), + data: [0]KeyValue{}, } ) @@ -79,30 +88,28 @@ func EmptySet() *Set { return userDefinedEmptySet } -// reflectValue abbreviates reflect.ValueOf(d). -func (d Distinct) reflectValue() reflect.Value { - return reflect.ValueOf(d.iface) -} - // Valid reports whether this value refers to a valid Set. -func (d Distinct) Valid() bool { - return d.iface != nil +func (d Distinct) Valid() bool { return d.hash != 0 } + +// reflectValue abbreviates reflect.ValueOf(d). +func (l Set) reflectValue() reflect.Value { + return reflect.ValueOf(l.data) } // Len returns the number of attributes in this set. func (l *Set) Len() int { - if l == nil || !l.equivalent.Valid() { + if l == nil || l.hash == 0 { return 0 } - return l.equivalent.reflectValue().Len() + return l.reflectValue().Len() } // Get returns the KeyValue at ordered position idx in this set. func (l *Set) Get(idx int) (KeyValue, bool) { - if l == nil || !l.equivalent.Valid() { + if l == nil || l.hash == 0 { return KeyValue{}, false } - value := l.equivalent.reflectValue() + value := l.reflectValue() if idx >= 0 && idx < value.Len() { // Note: The Go compiler successfully avoids an allocation for @@ -115,10 +122,10 @@ func (l *Set) Get(idx int) (KeyValue, bool) { // Value returns the value of a specified key in this set. func (l *Set) Value(k Key) (Value, bool) { - if l == nil || !l.equivalent.Valid() { + if l == nil || l.hash == 0 { return Value{}, false } - rValue := l.equivalent.reflectValue() + rValue := l.reflectValue() vlen := rValue.Len() idx := sort.Search(vlen, func(idx int) bool { @@ -163,10 +170,10 @@ func (l *Set) ToSlice() []KeyValue { // attribute set with the same elements as this, where sets are made unique by // choosing the last value in the input for any given key. func (l *Set) Equivalent() Distinct { - if l == nil || !l.equivalent.Valid() { - return emptySet.equivalent + if l == nil || l.hash == 0 { + return Distinct{hash: emptySet.hash} } - return l.equivalent + return Distinct{hash: l.hash} } // Equals reports whether the argument set is equivalent to this set. @@ -241,10 +248,10 @@ func NewSetWithFiltered(kvs []KeyValue, filter Filter) (Set, []KeyValue) { if filter != nil { if div := filteredToFront(kvs, filter); div != 0 { - return Set{equivalent: computeDistinct(kvs[div:])}, kvs[:div] + return newSet(kvs[div:]), kvs[:div] } } - return Set{equivalent: computeDistinct(kvs)}, nil + return newSet(kvs), nil } // NewSetWithSortableFiltered returns a new Set. @@ -324,7 +331,7 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) { if first == 0 { // It is safe to assume len(slice) >= 1 given we found at least one // attribute above that needs to be filtered out. - return Set{equivalent: computeDistinct(slice[1:])}, slice[:1] + return newSet(slice[1:]), slice[:1] } // Move the filtered slice[first] to the front (preserving order). @@ -334,25 +341,24 @@ func (l *Set) Filter(re Filter) (Set, []KeyValue) { // Do not re-evaluate re(slice[first+1:]). div := filteredToFront(slice[1:first+1], re) + 1 - return Set{equivalent: computeDistinct(slice[div:])}, slice[:div] + return newSet(slice[div:]), slice[:div] } -// computeDistinct returns a Distinct using either the fixed- or -// reflect-oriented code path, depending on the size of the input. The input -// slice is assumed to already be sorted and de-duplicated. -func computeDistinct(kvs []KeyValue) Distinct { - iface := computeDistinctFixed(kvs) - if iface == nil { - iface = computeDistinctReflect(kvs) +// newSet returns a new set based on the sorted and uniqued kvs. +func newSet(kvs []KeyValue) Set { + s := Set{ + hash: hashKVs(kvs), + data: computeDataFixed(kvs), } - return Distinct{ - iface: iface, + if s.data == nil { + s.data = computeDataReflect(kvs) } + return s } -// computeDistinctFixed computes a Distinct for small slices. It returns nil -// if the input is too large for this code path. -func computeDistinctFixed(kvs []KeyValue) any { +// computeDataFixed computes a Set data for small slices. It returns nil if the +// input is too large for this code path. +func computeDataFixed(kvs []KeyValue) any { switch len(kvs) { case 1: return [1]KeyValue(kvs) @@ -379,9 +385,9 @@ func computeDistinctFixed(kvs []KeyValue) any { } } -// computeDistinctReflect computes a Distinct using reflection, works for any -// size input. -func computeDistinctReflect(kvs []KeyValue) any { +// computeDataReflect computes a Set data using reflection, works for any size +// input. +func computeDataReflect(kvs []KeyValue) any { at := reflect.New(reflect.ArrayOf(len(kvs), keyValueType)).Elem() for i, keyValue := range kvs { *(at.Index(i).Addr().Interface().(*KeyValue)) = keyValue @@ -391,7 +397,7 @@ func computeDistinctReflect(kvs []KeyValue) any { // MarshalJSON returns the JSON encoding of the Set. func (l *Set) MarshalJSON() ([]byte, error) { - return json.Marshal(l.equivalent.iface) + return json.Marshal(l.data) } // MarshalLog is the marshaling function used by the logging system to represent this Set. From f36871da129d1f32ff5da2579fe2ecdbd6d159ce Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 12 Aug 2025 14:11:09 +0000 Subject: [PATCH 02/15] lint --- attribute/hash_test.go | 4 ++-- attribute/internal/fnv/fnv.go | 4 ++-- attribute/internal/fnv/fnv_test.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index da67614c0a9..d8987a705ab 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -108,11 +108,11 @@ type msg struct { func (m msg) String() string { return fmt.Sprintf( "(%d: %d)%s %s (%d: %d)%s", - m.i, m.hI, m.slice(m.kvI), m.cmp, m.j, m.hJ, m.slice(m.kvJ), + m.i, m.hI, slice(m.kvI), m.cmp, m.j, m.hJ, slice(m.kvJ), ) } -func (m msg) slice(kvs []KeyValue) string { +func slice(kvs []KeyValue) string { if len(kvs) == 0 { return "[]" } diff --git a/attribute/internal/fnv/fnv.go b/attribute/internal/fnv/fnv.go index 9557f63cef3..7c6cc75bcca 100644 --- a/attribute/internal/fnv/fnv.go +++ b/attribute/internal/fnv/fnv.go @@ -32,7 +32,7 @@ const ( // Hash is an FNV-1a hash with appropriate hashing functions for methods. type Hash uint64 -// New64 returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in +// New returns a new initialized 64-bit FNV-1a Hash. Its value is laid out in // big-endian byte order. func New() Hash { return offset64 @@ -63,7 +63,7 @@ func (h Hash) Float64(val float64) Hash { } func (h Hash) Int64(val int64) Hash { - return h.Uint64(uint64(val)) + return h.Uint64(uint64(val)) // nolint:gosec // overflow doesn't matter since we are hashing. } func (h Hash) String(val string) Hash { diff --git a/attribute/internal/fnv/fnv_test.go b/attribute/internal/fnv/fnv_test.go index 2f771336633..7dab26983ec 100644 --- a/attribute/internal/fnv/fnv_test.go +++ b/attribute/internal/fnv/fnv_test.go @@ -45,7 +45,7 @@ func TestUint64HashCorrectness(t *testing.T) { refH.Reset() binary.BigEndian.PutUint64(buf, in) - n, err := refH.Write(buf[:]) + n, err := refH.Write(buf) require.NoError(t, err) require.Equalf(t, 8, n, "wrote only %d out of 8 bytes", n) want := refH.Sum64() From 8da27b296a2bc8bfacf6faa6ce9158ae4114ee14 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 19 Aug 2025 17:47:34 +0000 Subject: [PATCH 03/15] iterate on runes, not bytes --- attribute/internal/fnv/fnv.go | 4 ++-- attribute/internal/fnv/fnv_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/attribute/internal/fnv/fnv.go b/attribute/internal/fnv/fnv.go index 7c6cc75bcca..449b7d85323 100644 --- a/attribute/internal/fnv/fnv.go +++ b/attribute/internal/fnv/fnv.go @@ -68,8 +68,8 @@ func (h Hash) Int64(val int64) Hash { func (h Hash) String(val string) Hash { v := uint64(h) - for _, c := range val { - v ^= uint64(c) + for i := 0; i < len(val); i++ { + v ^= uint64(val[i]) v *= prime64 } return Hash(v) diff --git a/attribute/internal/fnv/fnv_test.go b/attribute/internal/fnv/fnv_test.go index 7dab26983ec..f76e085a1e9 100644 --- a/attribute/internal/fnv/fnv_test.go +++ b/attribute/internal/fnv/fnv_test.go @@ -17,7 +17,7 @@ import ( ) func TestStringHashCorrectness(t *testing.T) { - input := []string{"", "a", "ab", "abc"} + input := []string{"", "a", "ab", "abc", "δΈ–η•Œ"} refH := fnv.New64a() for _, in := range input { From 1ca3971682e8d5aa8b6107b0554ce8cd4f30183f Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 19 Aug 2025 17:50:08 +0000 Subject: [PATCH 04/15] simplify benchmark loops --- attribute/hash_test.go | 2 +- attribute/internal/fnv/fnv_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index d8987a705ab..a644d3168cb 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -142,7 +142,7 @@ func BenchmarkHashKVs(b *testing.B) { b.ResetTimer() b.ReportAllocs() - for n := 0; n < b.N; n++ { + for range b.N { h = hashKVs(attrs) } diff --git a/attribute/internal/fnv/fnv_test.go b/attribute/internal/fnv/fnv_test.go index f76e085a1e9..7525c997ba2 100644 --- a/attribute/internal/fnv/fnv_test.go +++ b/attribute/internal/fnv/fnv_test.go @@ -80,7 +80,7 @@ func BenchmarkStringKB(b *testing.B) { b.ReportAllocs() b.ResetTimer() - for i := 0; i < b.N; i++ { + for range b.N { result = h.String(s) } } @@ -92,7 +92,7 @@ func BenchmarkUint64KB(b *testing.B) { b.ReportAllocs() b.ResetTimer() - for n := 0; n < b.N; n++ { + for range b.N { result = h.Uint64(i) } } From 6d026701411bc9f463b47c70ad5410ba3c0345f2 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 19 Aug 2025 17:54:27 +0000 Subject: [PATCH 05/15] declare benchmark var globally --- attribute/hash_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index a644d3168cb..2b3103a5c1b 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -132,19 +132,17 @@ func slice(kvs []KeyValue) string { return b.String() } +var h fnv.Hash + func BenchmarkHashKVs(b *testing.B) { attrs := make([]KeyValue, len(keyVals)) for i := range keyVals { attrs[i] = keyVals[i]("k") } - var h fnv.Hash - b.ResetTimer() b.ReportAllocs() for range b.N { h = hashKVs(attrs) } - - _ = h } From 34995bfd795fb1871f7a2157df2c63693d2b0158 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 27 Aug 2025 17:54:44 +0000 Subject: [PATCH 06/15] add fuzz test --- Makefile | 3 ++- attribute/hash_test.go | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bc0f1f92d1f..44870248c32 100644 --- a/Makefile +++ b/Makefile @@ -146,11 +146,12 @@ build-tests/%: # Tests -TEST_TARGETS := test-default test-bench test-short test-verbose test-race test-concurrent-safe +TEST_TARGETS := test-default test-bench test-short test-verbose test-race test-concurrent-safe test-fuzz .PHONY: $(TEST_TARGETS) test test-default test-race: ARGS=-race test-bench: ARGS=-run=xxxxxMatchNothingxxxxx -test.benchtime=1ms -bench=. test-short: ARGS=-short +test-fuzz: ARGS=-fuzztime=10s -fuzz test-verbose: ARGS=-v -race test-concurrent-safe: ARGS=-run=ConcurrentSafe -count=100 -race test-concurrent-safe: TIMEOUT=120 diff --git a/attribute/hash_test.go b/attribute/hash_test.go index 2b3103a5c1b..117cab92f2c 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -8,6 +8,7 @@ import ( "strings" "testing" + "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute/internal/fnv" ) @@ -146,3 +147,16 @@ func BenchmarkHashKVs(b *testing.B) { h = hashKVs(attrs) } } + +func FuzzHashKVs(f *testing.F) { + f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, f float64, b bool) { + kvs := []KeyValue{ + String(k1, s), + Int(k2, i), + Int64(k3, i64), + Float64(k4, f), + Bool(k5, b), + } + require.Equal(t, hashKVs(kvs), hashKVs(kvs)) + }) +} From 8a13753d923efe407830f9aae342472961480c0c Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 27 Aug 2025 18:03:03 +0000 Subject: [PATCH 07/15] remove changelog entry --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f46e42765ed..6acf45fb96f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,7 +102,6 @@ The next release will require at least [Go 1.24]. - Fix `go.opentelemetry.io/otel/exporters/prometheus` to not append a suffix if it's already present in metric name. (#7088) - Fix the `go.opentelemetry.io/otel/exporters/stdout/stdouttrace` self-observability component type and name. (#7195) - Fix partial export count metric in `go.opentelemetry.io/otel/exporters/stdout/stdouttrace`. (#7199) -- Clarify the documentation about equivalence guarantees for the `Set` and `Distinct` types in `go.opentelemetry.io/otel/attribute`. (#7175) ### Deprecated From 1ca4a332bf21936d73a0569bb50e65184e1b1baa Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Thu, 28 Aug 2025 18:36:12 +0000 Subject: [PATCH 08/15] lint --- attribute/hash_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index 117cab92f2c..49ff3a461d6 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -8,7 +8,6 @@ import ( "strings" "testing" - "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute/internal/fnv" ) @@ -149,7 +148,7 @@ func BenchmarkHashKVs(b *testing.B) { } func FuzzHashKVs(f *testing.F) { - f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, f float64, b bool) { + f.Fuzz(func(_ *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, f float64, b bool) { kvs := []KeyValue{ String(k1, s), Int(k2, i), @@ -157,6 +156,6 @@ func FuzzHashKVs(f *testing.F) { Float64(k4, f), Bool(k5, b), } - require.Equal(t, hashKVs(kvs), hashKVs(kvs)) + h = hashKVs(kvs) }) } From fffec875286a8caf610d9872c4b51d394beaa3ef Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Thu, 28 Aug 2025 19:03:33 +0000 Subject: [PATCH 09/15] fix set equality --- attribute/set.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/attribute/set.go b/attribute/set.go index c8cfd2e1fe1..f6632a796be 100644 --- a/attribute/set.go +++ b/attribute/set.go @@ -178,7 +178,16 @@ func (l *Set) Equivalent() Distinct { // Equals reports whether the argument set is equivalent to this set. func (l *Set) Equals(o *Set) bool { - return l.Equivalent() == o.Equivalent() + if l.Equivalent() != o.Equivalent() { + return false + } + if l == nil || l.hash == 0 { + l = emptySet + } + if o == nil || o.hash == 0 { + o = emptySet + } + return l.data == o.data } // Encoded returns the encoded form of this set, according to encoder. From 7ac00138addfced7dcc94ecfd466b5c914ef729d Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 3 Sep 2025 14:21:11 +0000 Subject: [PATCH 10/15] document that Distinct now has theoretically possible collisions --- CHANGELOG.md | 1 + attribute/set.go | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acf45fb96f..afd18be8521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,6 +89,7 @@ The next release will require at least [Go 1.24]. The package contains semantic conventions from the `v1.37.0` version of the OpenTelemetry Semantic Conventions. See the [migration documentation](./semconv/v1.37.0/MIGRATION.md) for information on how to upgrade from `go.opentelemetry.io/otel/semconv/v1.36.0.`(#7254) - Greatly reduce the cost of recording metrics in `go.opentelemetry.io/otel/sdk/metric` using hashing for map keys. (#7175) +- `Distinct` in `go.opentelemetry.io/otel/attribute` is no longer guaranteed to uniquely identify an attribute set. Collisions between `Distinct` values for different Sets are theoretically possible, but highly unlikely. (#7175) ### Changed diff --git a/attribute/set.go b/attribute/set.go index f6632a796be..b6578c3bc85 100644 --- a/attribute/set.go +++ b/attribute/set.go @@ -25,18 +25,17 @@ type ( // the Equals method to ensure stable equivalence checking. // // Users should also use the Distinct returned from Equivalent as a map key - // instead of a Set directly. In addition to that type providing guarantees - // on stable equivalence, it may also provide performance improvements. + // instead of a Set directly. Set has relatively poor performance when used + // as a map key compared to Distinct. Set struct { hash fnv.Hash data any } - // Distinct is a unique identifier of a Set. + // Distinct is an identifier of a Set which is very likely to be unique. // - // Distinct is designed to ensure equivalence stability: comparisons will - // return the same value across versions. For this reason, Distinct should - // always be used as a map key instead of a Set. + // Distinct should be used as a map key instead of a Set for to provide better + // performance for map operations. Distinct struct { hash fnv.Hash } @@ -165,8 +164,8 @@ func (l *Set) ToSlice() []KeyValue { return iter.ToSlice() } -// Equivalent returns a value that may be used as a map key. The Distinct type -// guarantees that the result will equal the equivalent. Distinct value of any +// Equivalent returns a value that may be used as a map key. Equal Distinct +// values are very likely to be equivalent attribute Sets. Distinct value of any // attribute set with the same elements as this, where sets are made unique by // choosing the last value in the input for any given key. func (l *Set) Equivalent() Distinct { From 04702b781fba6a0c31c461df7937edd5998c8ef2 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Wed, 3 Sep 2025 14:26:32 +0000 Subject: [PATCH 11/15] move changelog to unreleased --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afd18be8521..3a46d2fa49b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Add `WithInstrumentationAttributeSet` option to `go.opentelemetry.io/otel/log`, `go.opentelemetry.io/otel/metric`, and `go.opentelemetry.io/otel/trace` packages. This provides a concurrent-safe and performant alternative to `WithInstrumentationAttributes` by accepting a pre-constructed `attribute.Set`. (#7287) +- Greatly reduce the cost of recording metrics in `go.opentelemetry.io/otel/sdk/metric` using hashing for map keys. (#7175) ### Fixed @@ -29,6 +30,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/trace` synchronously de-duplicates the passed attributes instead of delegating it to the returned `TracerOption`. (#7266) - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/meter` synchronously de-duplicates the passed attributes instead of delegating it to the returned `MeterOption`. (#7266) - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/log` synchronously de-duplicates the passed attributes instead of delegating it to the returned `LoggerOption`. (#7266) +- `Distinct` in `go.opentelemetry.io/otel/attribute` is no longer guaranteed to uniquely identify an attribute set. Collisions between `Distinct` values for different Sets are possible with extremely high cardinality (billions of series per instrument), but are highly unlikely. (#7175) @@ -88,8 +90,6 @@ The next release will require at least [Go 1.24]. - The `go.opentelemetry.io/otel/semconv/v1.37.0` package. The package contains semantic conventions from the `v1.37.0` version of the OpenTelemetry Semantic Conventions. See the [migration documentation](./semconv/v1.37.0/MIGRATION.md) for information on how to upgrade from `go.opentelemetry.io/otel/semconv/v1.36.0.`(#7254) -- Greatly reduce the cost of recording metrics in `go.opentelemetry.io/otel/sdk/metric` using hashing for map keys. (#7175) -- `Distinct` in `go.opentelemetry.io/otel/attribute` is no longer guaranteed to uniquely identify an attribute set. Collisions between `Distinct` values for different Sets are theoretically possible, but highly unlikely. (#7175) ### Changed From 103045badd84b20590dcb94825d0888911586a4c Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 16 Sep 2025 12:16:50 -0400 Subject: [PATCH 12/15] Update attribute/hash_test.go Co-authored-by: Tyler Yahn --- attribute/hash_test.go | 171 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 162 insertions(+), 9 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index 49ff3a461d6..7db7bbfb83e 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -148,14 +148,167 @@ func BenchmarkHashKVs(b *testing.B) { } func FuzzHashKVs(f *testing.F) { - f.Fuzz(func(_ *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, f float64, b bool) { - kvs := []KeyValue{ - String(k1, s), - Int(k2, i), - Int64(k3, i64), - Float64(k4, f), - Bool(k5, b), - } - h = hashKVs(kvs) + // Add seed inputs to ensure coverage of edge cases. + f.Add("", "", "", "", "", "", 0, int64(0), 0.0, false, uint8(0)) + f.Add("key", "value", "🌍", "test", "bool", "float", -1, int64(-1), -1.0, true, uint8(1)) + f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN", 0, int64(0), math.Inf(1), false, uint8(2)) + + f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, fVal float64, b bool, sliceType uint8) { + // Test variable number of attributes (0-10). + numAttrs := len(k1) % 11 // Use key length to determine number of attributes. + if numAttrs == 0 && len(k1) == 0 { + // Test empty set. + h := hashKVs(nil) + if h == 0 { + t.Error("hash of empty slice should not be zero") + } + return + } + + var kvs []KeyValue + + // Add basic types. + if numAttrs > 0 { + kvs = append(kvs, String(k1, s)) + } + if numAttrs > 1 { + kvs = append(kvs, Int(k2, i)) + } + if numAttrs > 2 { + kvs = append(kvs, Int64(k3, i64)) + } + if numAttrs > 3 { + kvs = append(kvs, Float64(k4, fVal)) + } + if numAttrs > 4 { + kvs = append(kvs, Bool(k5, b)) + } + + // Add slice types based on sliceType parameter + if numAttrs > 5 { + switch sliceType % 4 { + case 0: + // Test BoolSlice with variable length. + bools := make([]bool, len(s)%5) // 0-4 elements + for i := range bools { + bools[i] = (i+len(k1))%2 == 0 + } + kvs = append(kvs, BoolSlice("boolslice", bools)) + case 1: + // Test IntSlice with variable length. + ints := make([]int, len(s)%6) // 0-5 elements + for i := range ints { + ints[i] = i + len(k2) + } + kvs = append(kvs, IntSlice("intslice", ints)) + case 2: + // Test Int64Slice with variable length. + int64s := make([]int64, len(s)%4) // 0-3 elements + for i := range int64s { + int64s[i] = int64(i) + i64 + } + kvs = append(kvs, Int64Slice("int64slice", int64s)) + case 3: + // Test Float64Slice with variable length and special values. + float64s := make([]float64, len(s)%5) // 0-4 elements + for i := range float64s { + switch i % 4 { + case 0: + float64s[i] = fVal + case 1: + float64s[i] = math.Inf(1) // +Inf + case 2: + float64s[i] = math.Inf(-1) // -Inf + case 3: + float64s[i] = math.NaN() // NaN + } + } + kvs = append(kvs, Float64Slice("float64slice", float64s)) + } + } + + // Add StringSlice. + if numAttrs > 6 { + strings := make([]string, len(k1)%4) // 0-3 elements + for i := range strings { + strings[i] = fmt.Sprintf("%s_%d", s, i) + } + kvs = append(kvs, StringSlice("stringslice", strings)) + } + + // Test duplicate keys (should be handled by Set construction). + if numAttrs > 7 && len(k1) > 0 { + kvs = append(kvs, String(k1, "duplicate_key_value")) + } + + // Add more attributes with Unicode keys. + if numAttrs > 8 { + kvs = append(kvs, String("πŸ”‘", "unicode_key")) + } + if numAttrs > 9 { + kvs = append(kvs, String("empty", "")) + } + + // Sort to ensure consistent ordering (as Set would do). + slices.SortFunc(kvs, func(a, b KeyValue) int { + return cmp.Compare(string(a.Key), string(b.Key)) + }) + + // Remove duplicates (as Set will do). + if len(kvs) > 1 { + j := 0 + for i := 1; i < len(kvs); i++ { + if kvs[j].Key != kvs[i].Key { + j++ + kvs[j] = kvs[i] + } else { + // Keep the later value for duplicate keys. + kvs[j] = kvs[i] + } + } + kvs = kvs[:j+1] + } + + // Hash the key-value pairs. + h1 := hashKVs(kvs) + h2 := hashKVs(kvs) // Should be deterministic + + if h1 != h2 { + t.Errorf("hash is not deterministic: %d != %d for kvs=%v", h1, h2, kvs) + } + + if h1 == 0 && len(kvs) > 0 { + t.Errorf("hash should not be zero for non-empty input: kvs=%v", kvs) + } + + // Test that different inputs produce different hashes (most of the time). + // This is a probabilistic test - collisions are possible but rare. + if len(kvs) > 0 { + // Modify one value slightly. + modifiedKvs := make([]KeyValue, len(kvs)) + copy(modifiedKvs, kvs) + if len(modifiedKvs) > 0 { + switch modifiedKvs[0].Value.Type() { + case STRING: + modifiedKvs[0] = String(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsString()+"_modified") + case INT64: + modifiedKvs[0] = Int64(string(modifiedKvs[0].Key), modifiedKvs[0].Value.AsInt64()+1) + case BOOL: + modifiedKvs[0] = Bool(string(modifiedKvs[0].Key), !modifiedKvs[0].Value.AsBool()) + case FLOAT64: + val := modifiedKvs[0].Value.AsFloat64() + if !math.IsNaN(val) && !math.IsInf(val, 0) { + modifiedKvs[0] = Float64(string(modifiedKvs[0].Key), val+1.0) + } + } + + h3 := hashKVs(modifiedKvs) + // Note: We don't assert h1 != h3 because hash collisions are theoretically possible + // but we can log suspicious cases for manual review. + if h1 == h3 && !reflect.DeepEqual(kvs, modifiedKvs) { + t.Logf("Potential hash collision detected: original=%v, modified=%v, hash=%d", kvs, modifiedKvs, h1) + } + } + } }) } From 3add6465bcaa970970b610ed0e316ede642dc307 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 16 Sep 2025 12:52:52 -0400 Subject: [PATCH 13/15] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Robert PajΔ…k --- attribute/hash_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index 7db7bbfb83e..3a6c90c289f 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -142,8 +142,8 @@ func BenchmarkHashKVs(b *testing.B) { b.ResetTimer() b.ReportAllocs() - for range b.N { - h = hashKVs(attrs) + for b.Loop() { + hashKVs(attrs) } } From 5acd267fa95994cb0d32c932460eb04cdc414aa0 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 16 Sep 2025 16:59:12 +0000 Subject: [PATCH 14/15] fixes after rebase --- attribute/hash_test.go | 36 ++++++++++++++++++++---------------- attribute/set.go | 4 ++-- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index 3a6c90c289f..8f4cbae9d91 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -4,7 +4,11 @@ package attribute // import "go.opentelemetry.io/otel/attribute" import ( + "cmp" "fmt" + "math" + "reflect" + "slices" "strings" "testing" @@ -152,7 +156,7 @@ func FuzzHashKVs(f *testing.F) { f.Add("", "", "", "", "", "", 0, int64(0), 0.0, false, uint8(0)) f.Add("key", "value", "🌍", "test", "bool", "float", -1, int64(-1), -1.0, true, uint8(1)) f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN", 0, int64(0), math.Inf(1), false, uint8(2)) - + f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, fVal float64, b bool, sliceType uint8) { // Test variable number of attributes (0-10). numAttrs := len(k1) % 11 // Use key length to determine number of attributes. @@ -164,9 +168,9 @@ func FuzzHashKVs(f *testing.F) { } return } - + var kvs []KeyValue - + // Add basic types. if numAttrs > 0 { kvs = append(kvs, String(k1, s)) @@ -183,7 +187,7 @@ func FuzzHashKVs(f *testing.F) { if numAttrs > 4 { kvs = append(kvs, Bool(k5, b)) } - + // Add slice types based on sliceType parameter if numAttrs > 5 { switch sliceType % 4 { @@ -196,7 +200,7 @@ func FuzzHashKVs(f *testing.F) { kvs = append(kvs, BoolSlice("boolslice", bools)) case 1: // Test IntSlice with variable length. - ints := make([]int, len(s)%6) // 0-5 elements + ints := make([]int, len(s)%6) // 0-5 elements for i := range ints { ints[i] = i + len(k2) } @@ -218,7 +222,7 @@ func FuzzHashKVs(f *testing.F) { case 1: float64s[i] = math.Inf(1) // +Inf case 2: - float64s[i] = math.Inf(-1) // -Inf + float64s[i] = math.Inf(-1) // -Inf case 3: float64s[i] = math.NaN() // NaN } @@ -226,7 +230,7 @@ func FuzzHashKVs(f *testing.F) { kvs = append(kvs, Float64Slice("float64slice", float64s)) } } - + // Add StringSlice. if numAttrs > 6 { strings := make([]string, len(k1)%4) // 0-3 elements @@ -235,12 +239,12 @@ func FuzzHashKVs(f *testing.F) { } kvs = append(kvs, StringSlice("stringslice", strings)) } - + // Test duplicate keys (should be handled by Set construction). if numAttrs > 7 && len(k1) > 0 { kvs = append(kvs, String(k1, "duplicate_key_value")) } - + // Add more attributes with Unicode keys. if numAttrs > 8 { kvs = append(kvs, String("πŸ”‘", "unicode_key")) @@ -248,12 +252,12 @@ func FuzzHashKVs(f *testing.F) { if numAttrs > 9 { kvs = append(kvs, String("empty", "")) } - + // Sort to ensure consistent ordering (as Set would do). slices.SortFunc(kvs, func(a, b KeyValue) int { return cmp.Compare(string(a.Key), string(b.Key)) }) - + // Remove duplicates (as Set will do). if len(kvs) > 1 { j := 0 @@ -268,19 +272,19 @@ func FuzzHashKVs(f *testing.F) { } kvs = kvs[:j+1] } - + // Hash the key-value pairs. h1 := hashKVs(kvs) h2 := hashKVs(kvs) // Should be deterministic - + if h1 != h2 { t.Errorf("hash is not deterministic: %d != %d for kvs=%v", h1, h2, kvs) } - + if h1 == 0 && len(kvs) > 0 { t.Errorf("hash should not be zero for non-empty input: kvs=%v", kvs) } - + // Test that different inputs produce different hashes (most of the time). // This is a probabilistic test - collisions are possible but rare. if len(kvs) > 0 { @@ -301,7 +305,7 @@ func FuzzHashKVs(f *testing.F) { modifiedKvs[0] = Float64(string(modifiedKvs[0].Key), val+1.0) } } - + h3 := hashKVs(modifiedKvs) // Note: We don't assert h1 != h3 because hash collisions are theoretically possible // but we can log suspicious cases for manual review. diff --git a/attribute/set.go b/attribute/set.go index b6578c3bc85..177cf2ee0c2 100644 --- a/attribute/set.go +++ b/attribute/set.go @@ -181,10 +181,10 @@ func (l *Set) Equals(o *Set) bool { return false } if l == nil || l.hash == 0 { - l = emptySet + l = &emptySet } if o == nil || o.hash == 0 { - o = emptySet + o = &emptySet } return l.data == o.data } From 4ed4e0d726e319d5cf81ac4d24b0ba921261759f Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Tue, 16 Sep 2025 17:03:40 +0000 Subject: [PATCH 15/15] lint --- attribute/hash_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/attribute/hash_test.go b/attribute/hash_test.go index 8f4cbae9d91..d49cdf636ad 100644 --- a/attribute/hash_test.go +++ b/attribute/hash_test.go @@ -136,8 +136,6 @@ func slice(kvs []KeyValue) string { return b.String() } -var h fnv.Hash - func BenchmarkHashKVs(b *testing.B) { attrs := make([]KeyValue, len(keyVals)) for i := range keyVals { @@ -155,12 +153,13 @@ func FuzzHashKVs(f *testing.F) { // Add seed inputs to ensure coverage of edge cases. f.Add("", "", "", "", "", "", 0, int64(0), 0.0, false, uint8(0)) f.Add("key", "value", "🌍", "test", "bool", "float", -1, int64(-1), -1.0, true, uint8(1)) - f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN", 0, int64(0), math.Inf(1), false, uint8(2)) + f.Add("duplicate", "duplicate", "duplicate", "duplicate", "duplicate", "NaN", + 0, int64(0), math.Inf(1), false, uint8(2)) f.Fuzz(func(t *testing.T, k1, k2, k3, k4, k5, s string, i int, i64 int64, fVal float64, b bool, sliceType uint8) { // Test variable number of attributes (0-10). numAttrs := len(k1) % 11 // Use key length to determine number of attributes. - if numAttrs == 0 && len(k1) == 0 { + if numAttrs == 0 && k1 == "" { // Test empty set. h := hashKVs(nil) if h == 0 { @@ -241,7 +240,7 @@ func FuzzHashKVs(f *testing.F) { } // Test duplicate keys (should be handled by Set construction). - if numAttrs > 7 && len(k1) > 0 { + if numAttrs > 7 && k1 != "" { kvs = append(kvs, String(k1, "duplicate_key_value")) }