Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(grouping): Clean up message normalization #53479

Merged
merged 13 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/sentry/grouping/strategies/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
# strategy to disable itself. Recursion is detected by the outer
# strategy.
"is_recursion": False,
# This turns on the automatic message trimming by the message
# strategy.
"trim_message": False,
# This turns on the automatic message trimming and parameter substitution
# by the message strategy.
"normalize_message": False,
# newstyle: enables the legacy function logic. This is only used
# by the newstyle:2019-04-05 strategy. Once this is no longer used
# this can go away entirely.
Expand Down Expand Up @@ -66,7 +66,7 @@
"use_package_fallback": False,
# Remove platform differences in native frames
"native_fuzzing": False,
# Ignore exception types for native if they are platform specific error
# Ignore exception types for native if they are platform-specific error
# codes. Normally SDKs are supposed to disable error-type grouping with
# the `synthetic` flag in the event, but a lot of error types we can
# also detect on the backend.
Expand Down Expand Up @@ -105,7 +105,7 @@ def register_strategy_config(id, **kwargs):
* Some known weaknesses with regards to grouping of native frames
""",
initial_context={
"trim_message": False,
"normalize_message": False,
},
enhancements_base="legacy:2019-03-12",
)
Expand Down Expand Up @@ -135,7 +135,7 @@ def register_strategy_config(id, **kwargs):
"javascript_fuzzing": True,
"contextline_platforms": ("javascript", "node", "python", "php", "ruby"),
"with_context_line_file_origin_bug": True,
"trim_message": True,
"normalize_message": True,
"with_exception_value_fallback": True,
},
enhancements_base="common:2019-03-23",
Expand Down Expand Up @@ -230,7 +230,7 @@ def register_strategy_config(id, **kwargs):
hidden=True,
initial_context={
"legacy_function_logic": False,
"trim_message": True,
"normalize_message": True,
"with_exception_value_fallback": True,
},
)
48 changes: 31 additions & 17 deletions src/sentry/grouping/strategies/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from sentry.interfaces.message import Message
from sentry.utils import metrics

_irrelevant_re = re.compile(
_parameterization_regex = re.compile(
# The `(?x)` tells the regex compiler to ingore comments and unescaped whitespace,
# so we can use newlines and indentation for better legibility.
r"""(?x)
(?P<email>
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*
Expand Down Expand Up @@ -96,47 +98,59 @@
\b\d+\b
) |
(?P<quoted_str>
# The `=` here guarantees we'll only match the value half of key-value pairs,
# rather than all quoted strings
='([\w\s]+)'
)
"""
)


def trim_message_for_grouping(string: str) -> str:
"""Replace values from a group's message to hide P.I.I. and improve grouping when no
stacktrace available.
def normalize_message_for_grouping(message: str) -> str:
"""Replace values from a group's message with placeholders (to hide P.I.I. and
improve grouping when no stacktrace is available) and trim to at most 2 lines.
"""
s = "\n".join(islice((x for x in string.splitlines() if x.strip()), 2)).strip()
if s != string:
s += "..."
trimmed = "\n".join(
# If there are multiple lines, grab the first two non-empty ones.
islice(
(x for x in message.splitlines() if x.strip()),
2,
)
)
if trimmed != message:
trimmed += "..."

def _handle_match(match: Match[str]) -> str:
# e.g. hex, 0x40000015
# Find the first (should be only) non-None match entry, and sub in the placeholder. For
# example, given the groupdict item `('hex', '0x40000015')`, this returns '<hex>' as a
# replacement for the original value in the string.
for key, value in match.groupdict().items():
if value is not None:
# key can be one of the keys from _irrelevant_re, thus, not a large cardinality
# tracking the key helps distinguish what kinds of replacements are happening
# `key` can only be one of the keys from `_parameterization_regex`, thus, not a large
# cardinality. Tracking the key helps distinguish what kinds of replacements are happening.
metrics.incr("grouping.value_trimmed_from_message", tags={"key": key})
# For quoted_str we want to preserver the = symbol
# For `quoted_str` we want to preserve the `=` symbol, which we include in
# the match in order not to replace random quoted strings in contexts other
# than key-value pairs
return f"=<{key}>" if key == "quoted_str" else f"<{key}>"
return ""

return _irrelevant_re.sub(_handle_match, s)
return _parameterization_regex.sub(_handle_match, trimmed)


@strategy(ids=["message:v1"], interface=Message, score=0)
@produces_variants(["default"])
def message_v1(
interface: Message, event: Event, context: GroupingContext, **meta: Any
) -> ReturnedVariants:
if context["trim_message"]:
message_in = interface.message or interface.formatted or ""
message_trimmed = trim_message_for_grouping(message_in)
hint = "stripped common values" if message_in != message_trimmed else None
if context["normalize_message"]:
raw = interface.message or interface.formatted or ""
normalized = normalize_message_for_grouping(raw)
hint = "stripped event-specific values" if raw != normalized else None
return {
context["variant"]: GroupingComponent(
id="message",
values=[message_trimmed],
values=[normalized],
hint=hint,
)
}
Expand Down
14 changes: 7 additions & 7 deletions src/sentry/grouping/strategies/newstyle.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
strategy,
)
from sentry.grouping.strategies.hierarchical import get_stacktrace_hierarchy
from sentry.grouping.strategies.message import trim_message_for_grouping
from sentry.grouping.strategies.message import normalize_message_for_grouping
from sentry.grouping.strategies.utils import has_url_origin, remove_non_stacktrace_variants
from sentry.grouping.utils import hash_from_values
from sentry.interfaces.exception import Exception as ChainedException
Expand Down Expand Up @@ -642,12 +642,12 @@ def single_exception(
id="value",
)

value_in = interface.value
if value_in is not None:
value_trimmed = trim_message_for_grouping(value_in)
hint = "stripped common values" if value_in != value_trimmed else None
if value_trimmed:
value_component.update(values=[value_trimmed], hint=hint)
raw = interface.value
if raw is not None:
normalized = normalize_message_for_grouping(raw)
hint = "stripped event-specific values" if raw != normalized else None
if normalized:
value_component.update(values=[normalized], hint=hint)

if stacktrace_component.contributes and value_component.contributes:
value_component.update(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ app:
exception*
type*
"MyApp.SuchWowException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
"MyApp.AmazingException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ app:
exception*
type*
"MyApp.CoolException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
"MyApp.BeansException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"Error"
value* (stripped common values)
value* (stripped event-specific values)
"Loading chunk <int> failed.\n(timeout: <url>"
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"Error"
value* (stripped common values)
value* (stripped event-specific values)
"<date>: <email> logged in (error <int>) time spent <float> --- correlation id <uuid>, checksum <sha1> (md5 <md5>); payload timestamp <date> (submitted from <ip> via <ip> via <ip>) at offset <hex>"
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ default:
hash: "e02854e73673e108fe54768e0f19317b"
component:
default*
message* (stripped common values)
message* (stripped event-specific values)
"Error key1=<quoted_str> key2=<int> key3=False key4=<date> other_date=datetime.datetime(<int>, <int>, <int>, <int>, <int>, tzinfo=d..."
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ app:
"getChildAt"
type*
"ApplicationNotResponding"
value* (stripped common values)
value* (stripped event-specific values)
"Application Not Responding for at least <int> ms."
threads (ignored because this variant does not have a contributing stacktrace, but the system variant does)
stacktrace
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ app:
exception*
type*
"MyApp.Exception"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ app:
exception*
type*
"MyApp.SuchWowException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
"MyApp.AmazingException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ app:
exception*
type*
"MyApp.CoolException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
"MyApp.BeansException"
value* (stripped common values)
value* (stripped event-specific values)
"Test <int>"
exception*
type*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ app:
"ImageLoaderMachOCompressed::rebase"
type (ignored because exception is synthetic)
"EXC_BAD_ACCESS / 0x00000032"
value* (stripped common values)
value* (stripped event-specific values)
"Fatal Error: EXC_BAD_ACCESS / <hex>"
--------------------------------------------------------------------------
system:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ app:
"__pthread_kill"
type (ignored because exception is synthetic)
"0x00000000 / 0x00000000"
value* (stripped common values)
value* (stripped event-specific values)
"Fatal Error: <hex> / <hex>"
--------------------------------------------------------------------------
system:
Expand Down
Loading
Loading