Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(related_issues): API to find groups caused by the same root error #66992

Merged
merged 21 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -619,9 +619,11 @@ disable_error_code = [
# beginning: stronger typing
[[tool.mypy.overrides]]
module = [
"sentry.api.endpoints.issues.*",
"sentry.buffer.base",
"sentry.buffer.redis",
"sentry.eventstore.reprocessing.redis",
"sentry.issues.related.*",
"sentry.utils.redis",
"sentry.utils.redis_metrics",
"sentry.utils.locking.backends.redis",
Expand All @@ -636,6 +638,7 @@ module = [
"sentry.relay.config.metric_extraction",
"sentry.snuba.metrics.extraction",
"sentry.tasks.reprocessing2",
"tests.sentry.api.endpoints.issues.*",
"tests.sentry.tasks.test_on_demand_metrics",
"tests.sentry.relay.config.test_metric_extraction",
]
Expand Down
19 changes: 19 additions & 0 deletions src/sentry/api/endpoints/issues/related_issues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from rest_framework.request import Request
from rest_framework.response import Response

from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.bases.group import GroupEndpoint
from sentry.issues.related import find_related_issues
from sentry.models.group import Group


@region_silo_endpoint
class RelatedIssuesEndpoint(GroupEndpoint):
owner = ApiOwner.ISSUES
publish_status = {"GET": ApiPublishStatus.EXPERIMENTAL}

def get(self, _: Request, group: Group) -> Response:
related_issues = find_related_issues(group)
return Response({key: [g.id for g in groups] for key, groups in related_issues.items()})
15 changes: 15 additions & 0 deletions src/sentry/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sentry.api.endpoints.group_similar_issues_embeddings import (
GroupSimilarIssuesEmbeddingsEndpoint,
)
from sentry.api.endpoints.issues.related_issues import RelatedIssuesEndpoint
from sentry.api.endpoints.org_auth_token_details import OrgAuthTokenDetailsEndpoint
from sentry.api.endpoints.org_auth_tokens import OrgAuthTokensEndpoint
from sentry.api.endpoints.organization_events_root_cause_analysis import (
Expand Down Expand Up @@ -635,6 +636,8 @@
# to the organization (and queryable via short ID)


# NOTE: Start adding to ISSUES_URLS instead of here because (?:issues|groups)
# cannot be reversed and we prefer to always use issues instead of groups
def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
return [
re_path(
Expand Down Expand Up @@ -800,6 +803,14 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
),
]

ISSUES_URLS = [
re_path(
r"^(?P<issue_id>[^\/]+)/related-issues/$",
RelatedIssuesEndpoint.as_view(),
name="sentry-api-0-issues-related-issues",
),
]

RELOCATION_URLS = [
re_path(
r"^$",
Expand Down Expand Up @@ -2971,6 +2982,10 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
r"^(?:issues|groups)/",
include(create_group_urls("sentry-api-0")),
),
re_path(
r"^issues/",
include(ISSUES_URLS),
),
# Organizations
re_path(
r"^organizations/",
Expand Down
9 changes: 9 additions & 0 deletions src/sentry/issues/related/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Summary

Issues in Sentry are created based on unique fingerprints based on the information from an event (e.g. the stack trace or the message). The Related Issues feature associates different issues based on the heuristics we will describe. This satisfies the desire of many customers to act on various issues together.

In the future, we will be implementing super groups ([read Armin's RFC on supergroups](https://github.com/getsentry/rfcs/pull/29)).

## Same root cause

In many cases, a bug or an environmental failure will create many Sentry issues which can be merged.
19 changes: 19 additions & 0 deletions src/sentry/issues/related/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""This module exports a function to find related issues. It groups them by type."""

from sentry.models.group import Group

from .same_root_cause import same_root_cause_analysis

__all__ = ["find_related_issues"]

RELATED_ISSUES_ALGORITHMS = {
"same_root_cause": same_root_cause_analysis,
}


def find_related_issues(group: Group) -> dict[str, list[Group]]:
related_issues = {}
for key, func in RELATED_ISSUES_ALGORITHMS.items():
related_issues[key] = func(group)

return related_issues or {}
29 changes: 29 additions & 0 deletions src/sentry/issues/related/same_root_cause.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Module to evaluate if groups have the same root cause
#
# The first case this module handles is environmental failures.
#
# Refer to README in module for more details.
from typing import Any

from sentry.models.group import Group


def same_root_cause_analysis(group: Group) -> list[Group]:
"""Analyze and create a group set if the group was caused by the same root cause."""
# XXX: This function is not optimal since we can't query the data field which is a GzippedDictField
project_groups = Group.objects.filter(project=group.project_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unbounded, you should use RangeQuerySetWrapper here and also limit the number of groups returned

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would also probably be helpful to just fetch the fields you need rather than the entire Group

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Thanks, Dan!

same_error_type_groups = [g for g in project_groups if compare_groups(g, group)]
return same_error_type_groups or []


def compare_groups(groupA: Group, groupB: Group) -> bool:
return match_criteria(_extract_values(groupA), _extract_values(groupB))


def match_criteria(a: dict[str, str | None], b: dict[str, str | None]) -> bool:
# XXX: In future iterations we will be able to use similar titles rather than an exact match
return a["type"] == b["type"] and a["title"] == b["title"]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is very simplistic but it works in a lot of cases.



def _extract_values(group: Group) -> dict[str, Any]:
return {"title": group.title, "type": group.data.get("metadata", {}).get("type")}
12 changes: 9 additions & 3 deletions src/sentry/testutils/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,8 +674,10 @@ class APITestCase(BaseTestCase, BaseAPITestCase):
Extend APITestCase to inherit access to `client`, an object with methods
that simulate API calls to Sentry, and the helper `get_response`, which
combines and simplifies a lot of tedious parts of making API calls in tests.
When creating API tests, use a new class per endpoint-method pair. The class
must set the string `endpoint`.
When creating API tests, use a new class per endpoint-method pair.

The class must set the string `endpoint`.
If your endpoint requires kwargs implement the `reverse_url` method.
"""

# We need Django to flush all databases.
Expand All @@ -701,7 +703,11 @@ def get_response(self, *args, **params):
* raw_data: (Optional) Sometimes we want to precompute the JSON body.
:returns Response object
"""
url = reverse(self.endpoint, args=args)
url = (
self.reverse_url()
if hasattr(self, "reverse_url")
else reverse(self.endpoint, args=args)
)
# In some cases we want to pass querystring params to put/post, handle this here.
if "qs_params" in params:
query_string = urlencode(params.pop("qs_params"), doseq=True)
Expand Down
48 changes: 48 additions & 0 deletions tests/sentry/api/endpoints/issues/test_related_issues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Any

from django.urls import reverse

from sentry.testutils.cases import APITestCase
from sentry.testutils.silo import region_silo_test


@region_silo_test
class RelatedIssuesTest(APITestCase):
endpoint = "sentry-api-0-issues-related-issues"

def setUp(self) -> None:
super().setUp()
self.login_as(user=self.user)
self.organization = self.create_organization(owner=self.user)
self.error_type = "ApiTimeoutError"
self.error_value = "Timed out attempting to reach host: api.github.com"
# You need to set this value in your test before calling the API
self.group_id = None

def reverse_url(self) -> str:
return reverse(self.endpoint, kwargs={"issue_id": self.group_id})

def _data(self, type: str, value: str) -> dict[str, Any]:
return {"type": "error", "metadata": {"type": type, "value": value}}

def test_same_root_related_issues(self) -> None:
# This is the group we're going to query about
group = self.create_group(data=self._data(self.error_type, self.error_value))
self.group_id = group.id
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will allow reverse_url to point to this specific group.


groups_data = [
self._data("ApiError", self.error_value),
self._data(self.error_type, "Unreacheable host: api.github.com"),
self._data(self.error_type, ""),
# Only this group will be related
self._data(self.error_type, self.error_value),
]
# XXX: See if we can get this code to be closer to how save_event generates groups
for datum in groups_data:
self.create_group(data=datum)

response = self.get_success_response()
# The UI will then make normal calls to get issues-stats
# For instance, this URL
# https://us.sentry.io/api/0/organizations/sentry/issues-stats/?groups=4741828952&groups=4489703641&statsPeriod=24h
assert response.json() == {"same_root_cause": [1, 5]}
Loading