From 19aa542206ccd85d4aaf6ff0dfaa99be583c7c60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Geisend=C3=B6rfer?= Date: Thu, 9 Jan 2025 11:34:00 +0100 Subject: [PATCH 1/5] Simplify profile stack trace representation - Introduce a first-class Stack message type and lookup table. - Replace location index range based stack trace encoding on Sample with a single stack_index reference. - Remove the location_indices lookup table. The primary motivation is laying the ground work for [timestamp based profiling][timestamp proposal] where the same stack trace needs to be referenced much more frequently compared to aggregation based on low cardinality attributes. Timestamp based profiling is also expected to be used with the the upcoming [Off-CPU profiling][off-cpu pr] feature in the eBPF profiler. Off-CPU stack traces have a different distribution compared to CPU samples. In particular stack traces are much more repetitive because they only occur at call sites such as syscalls. For the same reason it is also uncommon to see a stack trace are a root-prefix of a previously observed stack trace. We might need to revisit the previous [previous benchmarks][benchmarks] to confirm these claims. The secondary motivation is simplicitly. Arguably the proposed change here will make it easier to write exporters, processors as well as receivers. It seems like we had rough consensus around this change in previous SIG meetings, and it seems like a good incremental step to make progress on the timestamp proposal. [timestamp proposal]: https://github.com/open-telemetry/opentelemetry-proto/pull/594 [off-cpu pr]: https://github.com/open-telemetry/opentelemetry-ebpf-profiler/pull/196 [benchmarks]: https://docs.google.com/spreadsheets/d/1Q-6MlegV8xLYdz5WD5iPxQU2tsfodX1-CDV1WeGzyQ0/edit?gid=2069300294#gid=2069300294 Modified-by: Christos Kalkanis --- .../profiles/v1development/profiles.proto | 65 ++++++++++--------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/opentelemetry/proto/profiles/v1development/profiles.proto b/opentelemetry/proto/profiles/v1development/profiles.proto index 8dcb9cfc..28a5833f 100644 --- a/opentelemetry/proto/profiles/v1development/profiles.proto +++ b/opentelemetry/proto/profiles/v1development/profiles.proto @@ -102,7 +102,7 @@ message ProfilesDictionary { // into that address range referenced by locations via Location.mapping_index. repeated Mapping mapping_table = 1; - // Locations referenced by samples via Profile.location_indices. + // Locations referenced by samples via Stack.location_indices. repeated Location location_table = 2; // Functions referenced by locations via Line.function_index. @@ -134,6 +134,9 @@ message ProfilesDictionary { // The restrictions take origin from the OpenTelemetry specification: // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.47.0/specification/common/README.md#attribute. repeated KeyValueAndUnit attribute_table = 6; + + // Stacks referenced by samples via Sample.stack_index. + repeated Stack stack_table = 7; } // ProfilesData represents the profiles data that can be stored in persistent storage, @@ -214,18 +217,19 @@ message ScopeProfiles { // information present to determine the original sampled values. // // - The profile is represented as a set of samples, where each sample -// references a sequence of locations, and where each location belongs +// references a stack trace which is a list of locations, each belonging // to a mapping. -// - There is a N->1 relationship from sample.location_id entries to -// locations. For every sample.location_id entry there must be a +// - There is a N->1 relationship from Stack.location_indices entries to +// locations. For every Stack.location_indices entry there must be a // unique Location with that index. // - There is an optional N->1 relationship from locations to // mappings. For every nonzero Location.mapping_id there must be a // unique Mapping with that index. -// Represents a complete profile, including sample types, samples, -// mappings to binaries, locations, functions, string table, and additional metadata. -// It modifies and annotates pprof Profile with OpenTelemetry specific fields. +// Represents a complete profile, including sample types, samples, mappings to +// binaries, stacks, locations, functions, string table, and additional +// metadata. It modifies and annotates pprof Profile with OpenTelemetry +// specific fields. // // Note that whilst fields in this message retain the name and field id from pprof in most cases // for ease of understanding data migration, it is not intended that pprof:Profile and @@ -240,27 +244,24 @@ message Profile { // The set of samples recorded in this profile. repeated Sample sample = 2; - // References to locations in ProfilesDictionary.location_table. - repeated int32 location_indices = 3; - - // The following fields 4-14 are informational, do not affect + // The following fields 3-12 are informational, do not affect // interpretation of results. // Time of collection (UTC) represented as nanoseconds past the epoch. - fixed64 time_unix_nano = 4; + fixed64 time_unix_nano = 3; // Duration of the profile, if a duration makes sense. - uint64 duration_nano = 5; + uint64 duration_nano = 4; // The kind of events between sampled occurrences. // e.g [ "cpu","cycles" ] or [ "heap","bytes" ] - ValueType period_type = 6; + ValueType period_type = 5; // The number of events between sampled occurrences. - int64 period = 7; + int64 period = 6; // Free-form text associated with the profile. The text is displayed as is // to the user by the tools that read profiles (e.g. by pprof). This field // should not be used to store any machine-readable information, it is only // for human-friendly content. The profile must stay functional if this field // is cleaned. - repeated int32 comment_strindices = 8; // Indices into ProfilesDictionary.string_table. + repeated int32 comment_strindices = 7; // Indices into ProfilesDictionary.string_table. // A globally unique identifier for a profile. The ID is a 16-byte array. An ID with // all zeroes is considered invalid. It may be used for deduplication and signal @@ -268,15 +269,15 @@ message Profile { // in this field as not equal, even if they represented the same object at an earlier // time. // This field is optional; an ID may be assigned to an ID-less profile in a later step. - bytes profile_id = 9; + bytes profile_id = 8; // dropped_attributes_count is the number of attributes that were discarded. Attributes // can be discarded because their keys are too long or because there are too many // attributes. If this value is 0, then no attributes were dropped. - uint32 dropped_attributes_count = 10; + uint32 dropped_attributes_count = 9; // Specifies format of the original payload. Common values are defined in semantic conventions. [required if original_payload is present] - string original_payload_format = 11; + string original_payload_format = 10; // Original payload can be stored in this field. This can be useful for users who want to get the original payload. // Formats such as JFR are highly extensible and can contain more information than what is defined in this spec. @@ -284,10 +285,10 @@ message Profile { // If the original payload is in pprof format, it SHOULD not be included in this field. // The field is optional, however if it is present then equivalent converted data should be populated in other fields // of this message as far as is practicable. - bytes original_payload = 12; + bytes original_payload = 11; // References to attributes in attribute_table. [optional] - repeated int32 attribute_indices = 13; + repeated int32 attribute_indices = 12; } // A pointer from a profile Sample to a trace Span. @@ -400,23 +401,20 @@ message ValueType { // values: [2, 2, 3, 3] // timestamps_unix_nano: [1, 2, 3, 4] message Sample { - // locations_start_index along with locations_length refers to to a slice of locations in Profile.location_indices. - int32 locations_start_index = 1; - // locations_length along with locations_start_index refers to a slice of locations in Profile.location_indices. - // Supersedes location_index. - int32 locations_length = 2; + // Reference to stack in ProfilesDictionary.stack_table. + int32 stack_index = 1; // The type and unit of each value is defined by Profile.sample_type. - repeated int64 values = 3; + repeated int64 values = 2; // References to attributes in ProfilesDictionary.attribute_table. [optional] - repeated int32 attribute_indices = 4; + repeated int32 attribute_indices = 3; // Reference to link in ProfilesDictionary.link_table. [optional] // It can be unset / set to 0 if no link exists, as link_table[0] is always a 'null' default value. - int32 link_index = 5; + int32 link_index = 4; // Timestamps associated with Sample represented in nanoseconds. These // timestamps should fall within the Profile's time range. - repeated fixed64 timestamps_unix_nano = 6; + repeated fixed64 timestamps_unix_nano = 4; } // Describes the mapping of a binary in memory, including its address range, @@ -436,6 +434,13 @@ message Mapping { repeated int32 attribute_indices = 5; } +// A Stack represents a stack trace as a list of locations. The first location +// is the leaf frame. +message Stack { + // References to locations in ProfilesDictionary.location_table. + repeated int32 location_indices = 1; +} + // Describes function and line table debug information. message Location { // Reference to mapping in ProfilesDictionary.mapping_table. From 23e00d8b3c38df7ffeffa1e7d19cd6341cb1741a Mon Sep 17 00:00:00 2001 From: Christos Kalkanis Date: Fri, 22 Aug 2025 07:54:28 -0400 Subject: [PATCH 2/5] Update ASCII diagram for new Stack message --- .../profiles/v1development/profiles.proto | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/opentelemetry/proto/profiles/v1development/profiles.proto b/opentelemetry/proto/profiles/v1development/profiles.proto index 28a5833f..20157875 100644 --- a/opentelemetry/proto/profiles/v1development/profiles.proto +++ b/opentelemetry/proto/profiles/v1development/profiles.proto @@ -70,12 +70,18 @@ option go_package = "go.opentelemetry.io/proto/otlp/profiles/v1development"; // ┌──────────────────┐ 1-n ┌─────────────────┐ ┌──────────┐ // │ Sample │ ──────▷ │ KeyValueAndUnit │ │ Link │ // └──────────────────┘ └─────────────────┘ └──────────┘ -// │ 1-n △ △ -// │ 1-n ┌─────────────────┘ │ 1-n -// ▽ │ │ -// ┌──────────────────┐ n-1 ┌──────────────┐ -// │ Location │ ──────▷ │ Mapping │ -// └──────────────────┘ └──────────────┘ +// │ △ △ +// │ 1-1 │ │ 1-n +// ▽ │ │ +// ┌──────────────────┐ │ │ +// │ Stack │ │ │ +// └──────────────────┘ │ │ +// │ 1-n │ │ +// │ 1-n ┌────────────────┘ │ +// ▽ │ │ +// ┌──────────────────┐ n-1 ┌─────────────┐ +// │ Location │ ──────▷ │ Mapping │ +// └──────────────────┘ └─────────────┘ // │ // │ 1-n // ▼ From e147f2afc076f5e717fe9fec89a4d68a7500ab8a Mon Sep 17 00:00:00 2001 From: Christos Kalkanis Date: Fri, 22 Aug 2025 08:01:30 -0400 Subject: [PATCH 3/5] Move leaf frame explanation to field inside Stack message --- opentelemetry/proto/profiles/v1development/profiles.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opentelemetry/proto/profiles/v1development/profiles.proto b/opentelemetry/proto/profiles/v1development/profiles.proto index 20157875..222b2963 100644 --- a/opentelemetry/proto/profiles/v1development/profiles.proto +++ b/opentelemetry/proto/profiles/v1development/profiles.proto @@ -440,10 +440,10 @@ message Mapping { repeated int32 attribute_indices = 5; } -// A Stack represents a stack trace as a list of locations. The first location -// is the leaf frame. +// A Stack represents a stack trace as a list of locations. message Stack { // References to locations in ProfilesDictionary.location_table. + // The first location is the leaf frame. repeated int32 location_indices = 1; } From 11ee1291aba41f24204f1addfe5ec50f1a8c755a Mon Sep 17 00:00:00 2001 From: Christos Kalkanis Date: Tue, 26 Aug 2025 13:55:26 -0400 Subject: [PATCH 4/5] Update field number --- opentelemetry/proto/profiles/v1development/profiles.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry/proto/profiles/v1development/profiles.proto b/opentelemetry/proto/profiles/v1development/profiles.proto index 222b2963..642dc81d 100644 --- a/opentelemetry/proto/profiles/v1development/profiles.proto +++ b/opentelemetry/proto/profiles/v1development/profiles.proto @@ -420,7 +420,7 @@ message Sample { // Timestamps associated with Sample represented in nanoseconds. These // timestamps should fall within the Profile's time range. - repeated fixed64 timestamps_unix_nano = 4; + repeated fixed64 timestamps_unix_nano = 5; } // Describes the mapping of a binary in memory, including its address range, From a48c975892377c54ec3f07ad5815a7172d96aa02 Mon Sep 17 00:00:00 2001 From: Christos Kalkanis Date: Wed, 27 Aug 2025 03:10:26 -0400 Subject: [PATCH 5/5] Update ASCII diagram (Sample->Stack n-1 instead of 1-1) --- opentelemetry/proto/profiles/v1development/profiles.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry/proto/profiles/v1development/profiles.proto b/opentelemetry/proto/profiles/v1development/profiles.proto index 642dc81d..dc101770 100644 --- a/opentelemetry/proto/profiles/v1development/profiles.proto +++ b/opentelemetry/proto/profiles/v1development/profiles.proto @@ -71,7 +71,7 @@ option go_package = "go.opentelemetry.io/proto/otlp/profiles/v1development"; // │ Sample │ ──────▷ │ KeyValueAndUnit │ │ Link │ // └──────────────────┘ └─────────────────┘ └──────────┘ // │ △ △ -// │ 1-1 │ │ 1-n +// │ n-1 │ │ 1-n // ▽ │ │ // ┌──────────────────┐ │ │ // │ Stack │ │ │