diff --git a/pdata/pprofile/dictionary_helpers.go b/pdata/pprofile/dictionary_helpers.go index cd3e6c3a59b..0a59933c168 100644 --- a/pdata/pprofile/dictionary_helpers.go +++ b/pdata/pprofile/dictionary_helpers.go @@ -8,104 +8,44 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" ) +// mapKeyValues returns the underlying KeyValue slice of a pcommon.Map. +func mapKeyValues(m pcommon.Map) []internal.KeyValue { + return *internal.GetMapOrig(internal.MapWrapper(m)) +} + // resolveProfilesReferences walks through all profiles data after unmarshaling // and resolves any string_value_ref and key_ref to their actual string values. // This ensures the pdata API works transparently with referenced strings. func resolveProfilesReferences(profiles Profiles) { dict := profiles.Dictionary() - // Quick check: if there are no resource profiles, nothing to do - if profiles.ResourceProfiles().Len() == 0 { - return - } - - // Check if resolution is needed by sampling first resource - rp := profiles.ResourceProfiles().At(0) - if !needsResolution(rp.Resource().Attributes()) { - // Check scope attributes too - if rp.ScopeProfiles().Len() == 0 { - return - } - sp := rp.ScopeProfiles().At(0) - if !needsResolution(sp.Scope().Attributes()) { - // Already resolved, skip - return - } - } - // Resolve references in resource attributes for i := 0; i < profiles.ResourceProfiles().Len(); i++ { rp := profiles.ResourceProfiles().At(i) - resolveMapReferences(dict, rp.Resource().Attributes()) + resolveKeyValueReferences(dict, mapKeyValues(rp.Resource().Attributes())) // Resolve references in scope attributes for j := 0; j < rp.ScopeProfiles().Len(); j++ { sp := rp.ScopeProfiles().At(j) - resolveMapReferences(dict, sp.Scope().Attributes()) + resolveKeyValueReferences(dict, mapKeyValues(sp.Scope().Attributes())) } } } -// needsResolution checks if a map has any refs that need resolution -func needsResolution(m pcommon.Map) bool { - if m.Len() == 0 { - return false - } - mapOrig := internal.GetMapOrig(internal.MapWrapper(m)) - for i := 0; i < len(*mapOrig); i++ { - kv := &(*mapOrig)[i] - // If KeyRef is set, needs resolution - if kv.KeyRef != 0 { - return true - } - // Check if any values need resolution - if anyValueNeedsResolution(&kv.Value) { - return true - } - } - return false -} - -// anyValueNeedsResolution checks if an AnyValue has refs that need resolution -func anyValueNeedsResolution(anyValue *internal.AnyValue) bool { - if ref, ok := anyValue.Value.(*internal.AnyValue_StringValueRef); ok && ref.StringValueRef != 0 { - return true - } else if kvList, ok := anyValue.Value.(*internal.AnyValue_KvlistValue); ok && kvList.KvlistValue != nil { - for i := 0; i < len(kvList.KvlistValue.Values); i++ { - kv := &kvList.KvlistValue.Values[i] - if kv.KeyRef != 0 { - return true - } - if anyValueNeedsResolution(&kv.Value) { - return true - } - } - } else if arrVal, ok := anyValue.Value.(*internal.AnyValue_ArrayValue); ok && arrVal.ArrayValue != nil { - for i := 0; i < len(arrVal.ArrayValue.Values); i++ { - if anyValueNeedsResolution(&arrVal.ArrayValue.Values[i]) { - return true - } - } - } - return false -} - -// resolveMapReferences resolves all string_value_ref and key_ref in a map -func resolveMapReferences(dict ProfilesDictionary, m pcommon.Map) { - mapOrig := internal.GetMapOrig(internal.MapWrapper(m)) - - for i := 0; i < len(*mapOrig); i++ { - kv := &(*mapOrig)[i] - +// resolveKeyValueReferences resolves key_ref and string_value_ref in a KeyValue slice +func resolveKeyValueReferences(dict ProfilesDictionary, kvs []internal.KeyValue) { + for i := range kvs { + kv := &kvs[i] // Resolve key_ref if set - if kv.KeyRef != 0 { + if kv.KeyRef >= 0 { idx := int(kv.KeyRef) - if idx >= 0 && idx < dict.StringTable().Len() { + if idx < dict.StringTable().Len() { kv.Key = dict.StringTable().At(idx) - // Keep ref set for potential re-marshaling + // N.b. keep KeyRef set to optimize re-marshaling. This is + // technically a violation of the proto spec, but acceptable + // for the in-memory pdata API since keys are immutable. } } - // Resolve string_value_ref if set resolveAnyValueReference(dict, &kv.Value) } @@ -127,16 +67,7 @@ func resolveAnyValueReference(dict ProfilesDictionary, anyValue *internal.AnyVal anyValue.Value = ov } } else if kvList, ok := anyValue.Value.(*internal.AnyValue_KvlistValue); ok && kvList.KvlistValue != nil { - for i := 0; i < len(kvList.KvlistValue.Values); i++ { - kv := &kvList.KvlistValue.Values[i] - if kv.KeyRef != 0 { - idx := int(kv.KeyRef) - if idx >= 0 && idx < dict.StringTable().Len() { - kv.Key = dict.StringTable().At(idx) - } - } - resolveAnyValueReference(dict, &kv.Value) - } + resolveKeyValueReferences(dict, kvList.KvlistValue.Values) } else if arrVal, ok := anyValue.Value.(*internal.AnyValue_ArrayValue); ok && arrVal.ArrayValue != nil { for i := 0; i < len(arrVal.ArrayValue.Values); i++ { resolveAnyValueReference(dict, &arrVal.ArrayValue.Values[i]) @@ -151,32 +82,16 @@ func convertProfilesToReferences(profiles Profiles) { dict := profiles.Dictionary() stringTable := dict.StringTable() - // Quick check: if there are no resource profiles, nothing to do - if profiles.ResourceProfiles().Len() == 0 { - return - } - - // Check if conversion is needed by sampling first resource - rp := profiles.ResourceProfiles().At(0) - if !needsConversion(rp.Resource().Attributes()) { - // Check scope attributes too - if rp.ScopeProfiles().Len() == 0 { - return - } - sp := rp.ScopeProfiles().At(0) - if !needsConversion(sp.Scope().Attributes()) { - // Already converted, skip - return - } - } - // Map for quick string lookups - only allocate if needed - stringIndex := make(map[string]int32, stringTable.Len()) - for i := 0; i < stringTable.Len(); i++ { - stringIndex[stringTable.At(i)] = int32(i) - } - + var stringIndex map[string]int32 getStringIndex := func(s string) int32 { + if stringIndex == nil { + stringIndex = make(map[string]int32, stringTable.Len()) + for i := 0; i < stringTable.Len(); i++ { + stringIndex[stringTable.At(i)] = int32(i) + } + } + if idx, ok := stringIndex[s]; ok { return idx } @@ -189,70 +104,25 @@ func convertProfilesToReferences(profiles Profiles) { // Convert strings in resource attributes for i := 0; i < profiles.ResourceProfiles().Len(); i++ { rp := profiles.ResourceProfiles().At(i) - convertMapToReferences(getStringIndex, rp.Resource().Attributes()) + convertKeyValueToReferences(getStringIndex, mapKeyValues(rp.Resource().Attributes())) // Convert strings in scope attributes for j := 0; j < rp.ScopeProfiles().Len(); j++ { sp := rp.ScopeProfiles().At(j) - convertMapToReferences(getStringIndex, sp.Scope().Attributes()) + convertKeyValueToReferences(getStringIndex, mapKeyValues(sp.Scope().Attributes())) } } } -// needsConversion checks if a map has any string values that need conversion to refs -func needsConversion(m pcommon.Map) bool { - if m.Len() == 0 { - return false - } - mapOrig := internal.GetMapOrig(internal.MapWrapper(m)) - for i := 0; i < len(*mapOrig); i++ { - kv := &(*mapOrig)[i] - // If KeyRef is not set but Key is, needs conversion - if kv.Key != "" && kv.KeyRef == 0 { - return true - } - // Check if any string values need conversion - if anyValueNeedsConversion(&kv.Value) { - return true - } - } - return false -} - -// anyValueNeedsConversion checks if an AnyValue has string values that need conversion -func anyValueNeedsConversion(anyValue *internal.AnyValue) bool { - if strVal, ok := anyValue.Value.(*internal.AnyValue_StringValue); ok && strVal.StringValue != "" { - return true - } else if kvList, ok := anyValue.Value.(*internal.AnyValue_KvlistValue); ok && kvList.KvlistValue != nil { - for i := 0; i < len(kvList.KvlistValue.Values); i++ { - kv := &kvList.KvlistValue.Values[i] - if kv.Key != "" && kv.KeyRef == 0 { - return true - } - if anyValueNeedsConversion(&kv.Value) { - return true - } - } - } else if arrVal, ok := anyValue.Value.(*internal.AnyValue_ArrayValue); ok && arrVal.ArrayValue != nil { - for i := 0; i < len(arrVal.ArrayValue.Values); i++ { - if anyValueNeedsConversion(&arrVal.ArrayValue.Values[i]) { - return true - } - } - } - return false -} - -// convertMapToReferences converts string keys and values to references -func convertMapToReferences(getStringIndex func(string) int32, m pcommon.Map) { - mapOrig := internal.GetMapOrig(internal.MapWrapper(m)) - - for i := 0; i < len(*mapOrig); i++ { - kv := &(*mapOrig)[i] +// convertKeyValueToReferences converts string keys and values to references in a KeyValue slice +func convertKeyValueToReferences(getStringIndex func(string) int32, kvs []internal.KeyValue) { + for i := range kvs { + kv := &kvs[i] // Convert key to reference if kv.Key != "" && kv.KeyRef == 0 { kv.KeyRef = getStringIndex(kv.Key) + kv.Key = "" } // Convert string values to references @@ -279,14 +149,7 @@ func convertAnyValueToReference(getStringIndex func(string) int32, anyValue *int ov.StringValueRef = idx anyValue.Value = ov } else if kvList, ok := anyValue.Value.(*internal.AnyValue_KvlistValue); ok && kvList.KvlistValue != nil { - // Recursively convert nested key-value lists - for i := 0; i < len(kvList.KvlistValue.Values); i++ { - kv := &kvList.KvlistValue.Values[i] - if kv.Key != "" && kv.KeyRef == 0 { - kv.KeyRef = getStringIndex(kv.Key) - } - convertAnyValueToReference(getStringIndex, &kv.Value) - } + convertKeyValueToReferences(getStringIndex, kvList.KvlistValue.Values) } else if arrVal, ok := anyValue.Value.(*internal.AnyValue_ArrayValue); ok && arrVal.ArrayValue != nil { // Recursively convert arrays for i := 0; i < len(arrVal.ArrayValue.Values); i++ { diff --git a/pdata/pprofile/dictionary_helpers_test.go b/pdata/pprofile/dictionary_helpers_test.go index b09a7235ba8..2e3cbfcd670 100644 --- a/pdata/pprofile/dictionary_helpers_test.go +++ b/pdata/pprofile/dictionary_helpers_test.go @@ -386,7 +386,7 @@ func TestConvertMapToReferencesEmptyKey(t *testing.T) { return 1 } - convertMapToReferences(getStringIndex, attrs) + convertKeyValueToReferences(getStringIndex, mapKeyValues(attrs)) // Empty key should not have KeyRef set kv := &(*mapOrig)[0] @@ -414,7 +414,7 @@ func TestConvertMapToReferencesExistingKeyRef(t *testing.T) { return 99 } - convertMapToReferences(getStringIndex, attrs) + convertKeyValueToReferences(getStringIndex, mapKeyValues(attrs)) // KeyRef should remain unchanged kv := &(*mapOrig)[0] @@ -441,609 +441,93 @@ func TestResolveAnyValueReferenceNonStringTypes(t *testing.T) { assert.Equal(t, int64(42), intVal.IntValue) } -func TestConvertAnyValueToReferenceNonStringTypes(t *testing.T) { - getStringIndex := func(_ string) int32 { - return 0 - } - - // Test with bool value (should not be affected) - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_BoolValue{ - BoolValue: true, - }, - } - - convertAnyValueToReference(getStringIndex, anyVal) - - // Should remain as BoolValue - boolVal, ok := anyVal.Value.(*internal.AnyValue_BoolValue) - assert.True(t, ok) - assert.True(t, boolVal.BoolValue) -} - -func TestNeedsResolution(t *testing.T) { - t.Run("empty map", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - assert.False(t, needsResolution(attrs)) - }) - - t.Run("map with KeyRef set", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - - mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) - *mapOrig = append(*mapOrig, internal.KeyValue{ - Key: "test-key", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "value", - }, - }, - }) - - assert.True(t, needsResolution(attrs)) - }) - - t.Run("map with StringValueRef in value", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - - mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) - *mapOrig = append(*mapOrig, internal.KeyValue{ - Key: "test-key", - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }) - - assert.True(t, needsResolution(attrs)) - }) - - t.Run("map with no refs", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - attrs.PutStr("key", "value") - - assert.False(t, needsResolution(attrs)) - }) - - t.Run("map with nested KvList with KeyRef", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - - mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) - *mapOrig = append(*mapOrig, internal.KeyValue{ - Key: "test-key", - Value: internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "nested-key", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "nested-value", - }, - }, - }, - }, - }, - }, - }, - }) - - assert.True(t, needsResolution(attrs)) - }) - - t.Run("map with nested array with StringValueRef", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - - mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) - *mapOrig = append(*mapOrig, internal.KeyValue{ - Key: "test-key", - Value: internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }, - }, - }, - }, - }) - - assert.True(t, needsResolution(attrs)) - }) -} - -func TestAnyValueNeedsResolution(t *testing.T) { - t.Run("StringValueRef with non-zero ref", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - } - assert.True(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("StringValueRef with zero ref", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 0, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) +func TestConvertMapToReferencesClearsKey(t *testing.T) { + profiles := NewProfiles() + rp := profiles.ResourceProfiles().AppendEmpty() + attrs := rp.Resource().Attributes() - t.Run("StringValue no ref", func(t *testing.T) { - anyVal := &internal.AnyValue{ + mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) + *mapOrig = append(*mapOrig, internal.KeyValue{ + Key: "my-key", + Value: internal.AnyValue{ Value: &internal.AnyValue_StringValue{ - StringValue: "test", - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("IntValue", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_IntValue{ - IntValue: 42, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("BoolValue", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_BoolValue{ - BoolValue: true, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("DoubleValue", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_DoubleValue{ - DoubleValue: 3.14, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("BytesValue", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_BytesValue{ - BytesValue: []byte{1, 2, 3}, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("KvList with KeyRef", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "value", - }, - }, - }, - }, - }, - }, - } - assert.True(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("KvList with StringValueRef in nested value", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }, - }, - }, - }, - } - assert.True(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("KvList with no refs", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "value", - }, - }, - }, - }, - }, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("KvList nil", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: nil, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("Array with StringValueRef", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }, - }, - }, - } - assert.True(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("Array with no refs", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValue{ - StringValue: "test", - }, - }, - }, - }, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("Array nil", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: nil, - }, - } - assert.False(t, anyValueNeedsResolution(anyVal)) - }) - - t.Run("nested array within kvlist", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - Value: internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }, - }, - }, - }, - }, - }, - }, + StringValue: "my-value", }, - } - assert.True(t, anyValueNeedsResolution(anyVal)) + }, }) - t.Run("deeply nested kvlist", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "level1", - Value: internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "level2", - KeyRef: 5, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "value", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, + getStringIndex := func(s string) int32 { + if s == "my-key" { + return 1 } - assert.True(t, anyValueNeedsResolution(anyVal)) - }) -} - -func TestNeedsConversion(t *testing.T) { - t.Run("empty map", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - assert.False(t, needsConversion(attrs)) - }) - - t.Run("map with key but no KeyRef", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - attrs.PutStr("key", "value") - - assert.True(t, needsConversion(attrs)) - }) - - t.Run("map with KeyRef already set", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() - - mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) - *mapOrig = append(*mapOrig, internal.KeyValue{ - Key: "test-key", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }) - - assert.False(t, needsConversion(attrs)) - }) + return 2 + } - t.Run("map with StringValue needs conversion", func(t *testing.T) { - profiles := NewProfiles() - rp := profiles.ResourceProfiles().AppendEmpty() - attrs := rp.Resource().Attributes() + convertKeyValueToReferences(getStringIndex, mapKeyValues(attrs)) - mapOrig := internal.GetMapOrig(internal.MapWrapper(attrs)) - *mapOrig = append(*mapOrig, internal.KeyValue{ - Key: "test-key", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "needs-conversion", - }, - }, - }) - - assert.True(t, needsConversion(attrs)) - }) + kv := &(*mapOrig)[0] + // key_ref should be set + assert.Equal(t, int32(1), kv.KeyRef) + // key MUST NOT be set when key_ref is used (per proto spec) + assert.Equal(t, "", kv.Key, "Key must be cleared when KeyRef is set") } -func TestAnyValueNeedsConversion(t *testing.T) { - t.Run("StringValue with non-empty string", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "test", - }, - } - assert.True(t, anyValueNeedsConversion(anyVal)) - }) - - t.Run("StringValue with empty string", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "", - }, - } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) - - t.Run("StringValueRef already converted", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) - - t.Run("IntValue", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_IntValue{ - IntValue: 42, - }, +func TestConvertAnyValueToReferenceNestedKvListClearsKey(t *testing.T) { + stringIndex := make(map[string]int32) + counter := int32(1) + getStringIndex := func(s string) int32 { + if idx, ok := stringIndex[s]; ok { + return idx } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) + idx := counter + counter++ + stringIndex[s] = idx + return idx + } - t.Run("KvList with key but no KeyRef", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - KeyRef: 0, - Value: internal.AnyValue{ - Value: &internal.AnyValue_IntValue{ - IntValue: 1, - }, - }, - }, + kvList := &internal.KeyValueList{ + Values: []internal.KeyValue{ + { + Key: "nested-key", + Value: internal.AnyValue{ + Value: &internal.AnyValue_StringValue{ + StringValue: "nested-value", }, }, }, - } - assert.True(t, anyValueNeedsConversion(anyVal)) - }) + }, + } - t.Run("KvList with StringValue in nested value", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValue{ - StringValue: "needs-conversion", - }, - }, - }, - }, - }, - }, - } - assert.True(t, anyValueNeedsConversion(anyVal)) - }) + anyVal := &internal.AnyValue{ + Value: &internal.AnyValue_KvlistValue{ + KvlistValue: kvList, + }, + } - t.Run("KvList already converted", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }, - }, - }, - }, - } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) + convertAnyValueToReference(getStringIndex, anyVal) - t.Run("KvList nil", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: nil, - }, - } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) + // key_ref should be set + assert.NotEqual(t, int32(0), kvList.Values[0].KeyRef) + // key MUST NOT be set when key_ref is used (per proto spec) + assert.Equal(t, "", kvList.Values[0].Key, "Key must be cleared when KeyRef is set in nested kvlist") +} - t.Run("Array with StringValue", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValue{ - StringValue: "needs-conversion", - }, - }, - }, - }, - }, - } - assert.True(t, anyValueNeedsConversion(anyVal)) - }) +func TestConvertAnyValueToReferenceNonStringTypes(t *testing.T) { + getStringIndex := func(_ string) int32 { + return 0 + } - t.Run("Array already converted", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValueRef{ - StringValueRef: 1, - }, - }, - }, - }, - }, - } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) + // Test with bool value (should not be affected) + anyVal := &internal.AnyValue{ + Value: &internal.AnyValue_BoolValue{ + BoolValue: true, + }, + } - t.Run("Array nil", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: nil, - }, - } - assert.False(t, anyValueNeedsConversion(anyVal)) - }) + convertAnyValueToReference(getStringIndex, anyVal) - t.Run("nested structures needing conversion", func(t *testing.T) { - anyVal := &internal.AnyValue{ - Value: &internal.AnyValue_KvlistValue{ - KvlistValue: &internal.KeyValueList{ - Values: []internal.KeyValue{ - { - Key: "test", - KeyRef: 1, - Value: internal.AnyValue{ - Value: &internal.AnyValue_ArrayValue{ - ArrayValue: &internal.ArrayValue{ - Values: []internal.AnyValue{ - { - Value: &internal.AnyValue_StringValue{ - StringValue: "deep-value", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - assert.True(t, anyValueNeedsConversion(anyVal)) - }) + // Should remain as BoolValue + boolVal, ok := anyVal.Value.(*internal.AnyValue_BoolValue) + assert.True(t, ok) + assert.True(t, boolVal.BoolValue) } diff --git a/pdata/pprofile/json.go b/pdata/pprofile/json.go index 8b3fa3fac85..b691b81abc7 100644 --- a/pdata/pprofile/json.go +++ b/pdata/pprofile/json.go @@ -15,6 +15,9 @@ type JSONMarshaler struct{} // MarshalProfiles to the OTLP/JSON format. func (*JSONMarshaler) MarshalProfiles(pd Profiles) ([]byte, error) { + // Convert strings to references for efficient transmission + convertProfilesToReferences(pd) + dest := json.BorrowStream(nil) defer json.ReturnStream(dest) pd.getOrig().MarshalJSON(dest) @@ -37,5 +40,10 @@ func (*JSONUnmarshaler) UnmarshalProfiles(buf []byte) (Profiles, error) { return Profiles{}, iter.Error() } otlp.MigrateProfiles(pd.getOrig().ResourceProfiles) + + // Resolve all string_value_ref and key_ref to their actual strings + // so the pdata API works transparently + resolveProfilesReferences(pd) + return pd, nil } diff --git a/pdata/pprofile/json_references_test.go b/pdata/pprofile/json_references_test.go new file mode 100644 index 00000000000..191bbf0429b --- /dev/null +++ b/pdata/pprofile/json_references_test.go @@ -0,0 +1,87 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package pprofile + +import ( + stdjson "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// newProfilesWithAttributes creates a Profiles with resource and scope +// attributes for testing reference conversion. +func newProfilesWithAttributes() Profiles { + profiles := NewProfiles() + profiles.Dictionary().StringTable().Append("") // index 0 + + rp := profiles.ResourceProfiles().AppendEmpty() + rp.Resource().Attributes().PutStr("service.name", "test-service") + rp.Resource().Attributes().PutStr("host.name", "test-host") + + sp := rp.ScopeProfiles().AppendEmpty() + sp.Scope().Attributes().PutStr("scope.attr", "scope-value") + + return profiles +} + +func TestJSONMarshalConvertsToReferences(t *testing.T) { + marshaler := JSONMarshaler{} + jsonBytes, err := marshaler.MarshalProfiles(newProfilesWithAttributes()) + require.NoError(t, err) + + // Parse the JSON output to verify references were used + var parsed map[string]any + require.NoError(t, stdjson.Unmarshal(jsonBytes, &parsed)) + + // The dictionary's stringTable should contain the attribute keys and values + dictionary, ok := parsed["dictionary"].(map[string]any) + require.True(t, ok, "JSON output should contain a dictionary object") + stringTable, ok := dictionary["stringTable"].([]any) + require.True(t, ok, "dictionary should contain a stringTable array") + + tableStrs := make([]string, len(stringTable)) + for i, v := range stringTable { + tableStrs[i], _ = v.(string) + } + assert.Contains(t, tableStrs, "service.name") + assert.Contains(t, tableStrs, "test-service") + assert.Contains(t, tableStrs, "host.name") + assert.Contains(t, tableStrs, "test-host") + assert.Contains(t, tableStrs, "scope.attr") + assert.Contains(t, tableStrs, "scope-value") +} + +func TestJSONUnmarshalResolvesReferences(t *testing.T) { + profiles := newProfilesWithAttributes() + + // Manually convert to references before marshaling, so the JSON output + // contains key_ref/string_value_ref regardless of whether the JSON + // marshaler itself calls convertProfilesToReferences. + convertProfilesToReferences(profiles) + + marshaler := JSONMarshaler{} + jsonBytes, err := marshaler.MarshalProfiles(profiles) + require.NoError(t, err) + + // Unmarshal and verify references were resolved + unmarshaler := JSONUnmarshaler{} + restored, err := unmarshaler.UnmarshalProfiles(jsonBytes) + require.NoError(t, err) + + rp := restored.ResourceProfiles().At(0) + serviceNameVal, ok := rp.Resource().Attributes().Get("service.name") + assert.True(t, ok, "service.name attribute should be accessible after JSON unmarshal") + assert.Equal(t, "test-service", serviceNameVal.Str()) + + hostNameVal, ok := rp.Resource().Attributes().Get("host.name") + assert.True(t, ok, "host.name attribute should be accessible after JSON unmarshal") + assert.Equal(t, "test-host", hostNameVal.Str()) + + sp := rp.ScopeProfiles().At(0) + scopeAttrVal, ok := sp.Scope().Attributes().Get("scope.attr") + assert.True(t, ok, "scope.attr should be accessible after JSON unmarshal") + assert.Equal(t, "scope-value", scopeAttrVal.Str()) +} diff --git a/pdata/pprofile/pb_test.go b/pdata/pprofile/pb_test.go index d0c79a965dd..7464db127b8 100644 --- a/pdata/pprofile/pb_test.go +++ b/pdata/pprofile/pb_test.go @@ -50,15 +50,13 @@ func TestProfilesProtoWireCompatibility(t *testing.T) { wire3, err := marshaler.MarshalProfiles(td2) require.NoError(t, err) - // Verify that wire1 and wire3 are compatible by unmarshaling both and checking the data + // Verify full round-trip fidelity: unmarshal both wire1 and wire3 into goproto + // messages and compare them semantically. This ensures all data (attributes, + // dictionary, profiles, etc.) survives the round-trip through both libraries. var check1, check2 gootlpprofiles.ProfilesData require.NoError(t, goproto.Unmarshal(wire1, &check1)) require.NoError(t, goproto.Unmarshal(wire3, &check2)) - - // Both should unmarshal successfully, proving wire compatibility - assert.NotNil(t, check1.ResourceProfiles) - assert.NotNil(t, check2.ResourceProfiles) - assert.Len(t, check1.ResourceProfiles, len(check2.ResourceProfiles)) + assert.True(t, goproto.Equal(&check1, &check2), "round-trip through goproto did not preserve profile data") } func TestProtoProfilesUnmarshalerError(t *testing.T) { @@ -283,20 +281,55 @@ func BenchmarkMarshalProfiles(b *testing.B) { for _, tc := range testCases { b.Run(tc.name, func(b *testing.B) { - // Generate profile data once - profiles := generateProfiles(b, tc.resourceCount, tc.scopeCount, tc.profileCount, tc.sampleCount) - marshaler := &ProtoMarshaler{} - b.ResetTimer() - b.ReportAllocs() - for i := 0; i < b.N; i++ { + // with_refs: simulate the normal ingest path where data was + // received on the wire (refs present), then unmarshaled (refs + // resolved but KeyRef kept), and is now being re-marshaled + // without any attribute modifications. + b.Run("with_refs", func(b *testing.B) { + profiles := generateProfiles(b, tc.resourceCount, tc.scopeCount, tc.profileCount, tc.sampleCount) + unmarshaler := &ProtoUnmarshaler{} buf, err := marshaler.MarshalProfiles(profiles) if err != nil { b.Fatalf("failed to marshal: %v", err) } - _ = buf - } + profiles, err = unmarshaler.UnmarshalProfiles(buf) + if err != nil { + b.Fatalf("failed to unmarshal: %v", err) + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + buf, err := marshaler.MarshalProfiles(profiles) + if err != nil { + b.Fatalf("failed to marshal: %v", err) + } + _ = buf + } + }) + + // without_refs: each iteration gets a fresh copy with no refs, + // simulating data that was constructed or had attributes modified. + b.Run("without_refs", func(b *testing.B) { + copies := make([]Profiles, b.N) + for i := range copies { + copies[i] = generateProfiles(b, tc.resourceCount, tc.scopeCount, tc.profileCount, tc.sampleCount) + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + buf, err := marshaler.MarshalProfiles(copies[i]) + if err != nil { + b.Fatalf("failed to marshal: %v", err) + } + _ = buf + } + }) }) } }