Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not trim span descriptions. #1983

Merged
merged 4 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,12 +320,7 @@ def _prepare_event(
# Postprocess the event here so that annotated types do
# generally not surface in before_send
if event is not None:
event = serialize(
event,
smart_transaction_trimming=self.options["_experiments"].get(
"smart_transaction_trimming"
),
)
event = serialize(event)

before_send = self.options["before_send"]
if before_send is not None and event.get("type") != "transaction":
Expand Down
3 changes: 1 addition & 2 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
{
"max_spans": Optional[int],
"record_sql_params": Optional[bool],
"smart_transaction_trimming": Optional[bool],
# TODO: Remvoe these 2 profiling related experiments
# TODO: Remove these 2 profiling related experiments
"profiles_sample_rate": Optional[float],
"profiler_mode": Optional[ProfilerMode],
},
Expand Down
129 changes: 15 additions & 114 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@
capture_internal_exception,
disable_capture_event,
format_timestamp,
json_dumps,
safe_repr,
strip_string,
)

import sentry_sdk.utils

from sentry_sdk._compat import (
text_type,
PY2,
Expand All @@ -23,12 +19,9 @@
iteritems,
binary_sequence_types,
)

from sentry_sdk._types import TYPE_CHECKING

if TYPE_CHECKING:
from datetime import timedelta

from types import TracebackType

from typing import Any
Expand All @@ -37,7 +30,6 @@
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union

Expand Down Expand Up @@ -120,12 +112,11 @@ def __exit__(
self._ids.pop(id(self._objs.pop()), None)


def serialize(event, smart_transaction_trimming=False, **kwargs):
# type: (Event, bool, **Any) -> Event
def serialize(event, **kwargs):
# type: (Event, **Any) -> Event
memo = Memo()
path = [] # type: List[Segment]
meta_stack = [] # type: List[Dict[str, Any]]
span_description_bytes = [] # type: List[int]

def _annotate(**meta):
# type: (**Any) -> None
Expand Down Expand Up @@ -365,113 +356,23 @@ def _serialize_node_impl(
if not isinstance(obj, string_types):
obj = safe_repr(obj)

# Allow span descriptions to be longer than other strings.
#
# For database auto-instrumented spans, the description contains
# potentially long SQL queries that are most useful when not truncated.
# Because arbitrarily large events may be discarded by the server as a
# protection mechanism, we dynamically limit the description length
# later in _truncate_span_descriptions.
if (
smart_transaction_trimming
and len(path) == 3
and path[0] == "spans"
and path[-1] == "description"
):
sl0thentr0py marked this conversation as resolved.
Show resolved Hide resolved
span_description_bytes.append(len(obj))
is_span_description = (
len(path) == 3 and path[0] == "spans" and path[-1] == "description"
)
if is_span_description:
return obj
return _flatten_annotated(strip_string(obj))

def _truncate_span_descriptions(serialized_event, event, excess_bytes):
# type: (Event, Event, int) -> None
"""
Modifies serialized_event in-place trying to remove excess_bytes from
span descriptions. The original event is used read-only to access the
span timestamps (represented as RFC3399-formatted strings in
serialized_event).

It uses heuristics to prioritize preserving the description of spans
that might be the most interesting ones in terms of understanding and
optimizing performance.
"""
# When truncating a description, preserve a small prefix.
min_length = 10

def shortest_duration_longest_description_first(args):
# type: (Tuple[int, Span]) -> Tuple[timedelta, int]
i, serialized_span = args
span = event["spans"][i]
now = datetime.utcnow()
start = span.get("start_timestamp") or now
end = span.get("timestamp") or now
duration = end - start
description = serialized_span.get("description") or ""
return (duration, -len(description))

# Note: for simplicity we sort spans by exact duration and description
# length. If ever needed, we could have a more involved heuristic, e.g.
# replacing exact durations with "buckets" and/or looking at other span
# properties.
path.append("spans")
for i, span in sorted(
enumerate(serialized_event.get("spans") or []),
key=shortest_duration_longest_description_first,
):
description = span.get("description") or ""
if len(description) <= min_length:
continue
excess_bytes -= len(description) - min_length
path.extend([i, "description"])
# Note: the last time we call strip_string we could preserve a few
# more bytes up to a total length of MAX_EVENT_BYTES. Since that's
# not strictly required, we leave it out for now for simplicity.
span["description"] = _flatten_annotated(
strip_string(description, max_length=min_length)
)
del path[-2:]
del meta_stack[len(path) + 1 :]

if excess_bytes <= 0:
break
path.pop()
del meta_stack[len(path) + 1 :]
return _flatten_annotated(strip_string(obj))

#
# Start of serialize() function
#
disable_capture_event.set(True)
try:
rv = _serialize_node(event, **kwargs)
if meta_stack and isinstance(rv, dict):
rv["_meta"] = meta_stack[0]

sum_span_description_bytes = sum(span_description_bytes)
if smart_transaction_trimming and sum_span_description_bytes > 0:
sl0thentr0py marked this conversation as resolved.
Show resolved Hide resolved
span_count = len(event.get("spans") or [])
# This is an upper bound of how many bytes all descriptions would
# consume if the usual string truncation in _serialize_node_impl
# would have taken place, not accounting for the metadata attached
# as event["_meta"].
descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH

# If by not truncating descriptions we ended up with more bytes than
# per the usual string truncation, check if the event is too large
# and we need to truncate some descriptions.
#
# This is guarded with an if statement to avoid JSON-encoding the
# event unnecessarily.
if sum_span_description_bytes > descriptions_budget_bytes:
original_bytes = len(json_dumps(rv))
excess_bytes = original_bytes - MAX_EVENT_BYTES
if excess_bytes > 0:
# Event is too large, will likely be discarded by the
# server. Trim it down before sending.
_truncate_span_descriptions(rv, event, excess_bytes)

# Span descriptions truncated, set or reset _meta.
#
# We run the same code earlier because we want to account
# for _meta when calculating original_bytes, the number of
# bytes in the JSON-encoded event.
if meta_stack and isinstance(rv, dict):
rv["_meta"] = meta_stack[0]
return rv
serialized_event = _serialize_node(event, **kwargs)
if meta_stack and isinstance(serialized_event, dict):
serialized_event["_meta"] = meta_stack[0]

return serialized_event
finally:
disable_capture_event.set(False)
40 changes: 14 additions & 26 deletions tests/integrations/sqlalchemy/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

from sentry_sdk import capture_message, start_transaction, configure_scope
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH
from sentry_sdk.serializer import MAX_EVENT_BYTES
from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH


def test_orm_queries(sentry_init, capture_events):
Expand Down Expand Up @@ -143,7 +143,6 @@ def test_long_sql_query_preserved(sentry_init, capture_events):
sentry_init(
traces_sample_rate=1,
integrations=[SqlalchemyIntegration()],
_experiments={"smart_transaction_trimming": True},
)
events = capture_events()

Expand All @@ -158,11 +157,10 @@ def test_long_sql_query_preserved(sentry_init, capture_events):
assert description.endswith("SELECT 98 UNION SELECT 99")


def test_too_large_event_truncated(sentry_init, capture_events):
def test_large_event_not_truncated(sentry_init, capture_events):
sentry_init(
traces_sample_rate=1,
integrations=[SqlalchemyIntegration()],
_experiments={"smart_transaction_trimming": True},
)
events = capture_events()

Expand All @@ -178,36 +176,26 @@ def processor(event, hint):
engine = create_engine("sqlite:///:memory:")
with start_transaction(name="test"):
with engine.connect() as con:
for _ in range(2000):
for _ in range(1500):
con.execute(" UNION ".join("SELECT {}".format(i) for i in range(100)))

(event,) = events

# Because of attached metadata in the "_meta" key, we may send out a little
# bit more than MAX_EVENT_BYTES.
max_bytes = 1.2 * MAX_EVENT_BYTES
assert len(json_dumps(event)) < max_bytes
assert len(json_dumps(event)) > MAX_EVENT_BYTES

# Some spans are discarded.
assert len(event["spans"]) == 1000

for i, span in enumerate(event["spans"]):
description = span["description"]

assert description.startswith("SELECT ")
if str(i) in event["_meta"]["spans"]:
# Description must have been truncated
assert len(description) == 10
assert description.endswith("...")
else:
# Description was not truncated, check for original length
assert len(description) == 1583
assert description.endswith("SELECT 98 UNION SELECT 99")

# Smoke check the meta info for one of the spans.
assert next(iter(event["_meta"]["spans"].values())) == {
"description": {"": {"len": 1583, "rem": [["!limit", "x", 7, 10]]}}
}
# Span descriptions are not truncated.
description = event["spans"][0]["description"]
assert len(description) == 1583
assert description.startswith("SELECT 0")
assert description.endswith("SELECT 98 UNION SELECT 99")

description = event["spans"][999]["description"]
assert len(description) == 1583
assert description.startswith("SELECT 0")
assert description.endswith("SELECT 98 UNION SELECT 99")

# Smoke check that truncation of other fields has not changed.
assert len(event["message"]) == MAX_STRING_LENGTH
Expand Down