Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/sentry/tasks/autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,28 @@ def generate_issue_summary_only(group_id: int) -> None:
get_issue_summary(
group=group, source=SeerAutomationSource.POST_PROCESS, should_run_automation=False
)
# TODO: Generate fixability score here and check for in run_automation for triage signals V0
# Currently fixability will only be generated after 10 when run_automation is called


@instrumented_task(
name="sentry.tasks.autofix.run_automation_for_group",
namespace=ingest_errors_tasks,
processing_deadline_duration=35,
retry=Retry(times=1),
)
def run_automation_for_group(group_id: int) -> None:
"""
Run automation directly for a group (assumes summary and fixability already exist).
Used for triage signals flow when event count >= 10 and summary exists.
"""
from sentry.seer.autofix.issue_summary import _run_automation

group = Group.objects.get(id=group_id)
event = group.get_latest_event()

if not event:
logger.warning("run_automation_for_group.no_event_found", extra={"group_id": group_id})
return

_run_automation(group=group, user=None, event=event, source=SeerAutomationSource.POST_PROCESS)
85 changes: 70 additions & 15 deletions src/sentry/tasks/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1595,33 +1595,88 @@ def check_if_flags_sent(job: PostProcessJob) -> None:


def kick_off_seer_automation(job: PostProcessJob) -> None:
from sentry.seer.autofix.issue_summary import get_issue_summary_lock_key
from sentry.seer.autofix.issue_summary import (
get_issue_summary_cache_key,
get_issue_summary_lock_key,
)
from sentry.seer.autofix.utils import (
is_issue_eligible_for_seer_automation,
is_seer_scanner_rate_limited,
)
from sentry.tasks.autofix import start_seer_automation
from sentry.tasks.autofix import (
generate_issue_summary_only,
run_automation_for_group,
start_seer_automation,
)

event = job["event"]
group = event.group

# Only run on issues with no existing scan - TODO: Update condition for triage signals V0
if group.seer_fixability_score is not None:
return
# Default behaviour
if not features.has("projects:triage-signals-v0", group.project):
# Only run on issues with no existing scan
if group.seer_fixability_score is not None:
return

if is_issue_eligible_for_seer_automation(group) is False:
return
if is_issue_eligible_for_seer_automation(group) is False:
return

# Don't run if there's already a task in progress for this issue
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=1, name=lock_name)
if lock.locked():
return
# Don't run if there's already a task in progress for this issue
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=1, name=lock_name)
if lock.locked():
return

if is_seer_scanner_rate_limited(group.project, group.organization):
return
if is_seer_scanner_rate_limited(group.project, group.organization):
return

start_seer_automation.delay(group.id)
else:
# Triage signals V0 behaviour

# If event count < 10, only generate summary (no automation)
if group.times_seen < 10:
# Check if summary exists in cache
cache_key = get_issue_summary_cache_key(group.id)
if cache.get(cache_key) is not None:
return

# Check if we're already generating the summary
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=5, name=lock_name)
if lock.locked():
return

# Generate summary (no automation)
if is_issue_eligible_for_seer_automation(group):
if not is_seer_scanner_rate_limited(group.project, group.organization):
generate_issue_summary_only.delay(group.id)
else:
# Event count >= 10: run automation
# Check seer_last_triggered first (long-term check to avoid re-running)
if group.seer_autofix_last_triggered is not None:
return

# Early returns for eligibility checks (cheap checks first)
if not is_issue_eligible_for_seer_automation(group):
return
if is_seer_scanner_rate_limited(group.project, group.organization):
return

# Now acquire a longer lock to avoid race conditions when starting the automation
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=30, name=lock_name)
if lock.locked():
return

start_seer_automation.delay(group.id)
# Check if summary exists in cache
cache_key = get_issue_summary_cache_key(group.id)
if cache.get(cache_key) is not None:
# Summary exists, run automation directly
run_automation_for_group.delay(group.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this case, what guards against re-triggering fixability scoring on every event which is outside the 5 minute dispatch window?

since this will happen unconditionally:

run_automation_for_group -> run_automation -> _generate_fixability_score -> calls seer API

the fixability endpoint will return the fixability score if it already exists for the issue, but it will need to hit the issue summary DB for that. seems expensive to do for every new event when we can check group.seer_fixability_score in sentry

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 5 min cache add check (short term check) + seer_autofix_last_triggered (long term check) ensures that run_automation_for_group is never triggered more than once -> fixability is never triggered more than once. Lmk if that makes sense.

Copy link
Contributor

@kddubey kddubey Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that sounds good, thanks for clarifying (see comment below)

Copy link
Contributor

@kddubey kddubey Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait but seer_autofix_last_triggered is only set if the issue is deemed fixable-enough

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But _is_issue_fixable is useless once triage signals is launched. Once triage signals is launched, AutofixAutomationTuningSettings is either ALWAYS or OFF.
I guess I should add an extra check here to check if it's not set to OFF right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made the change.

else:
# No summary yet, generate summary + run automation in one go
start_seer_automation.delay(group.id)


GROUP_CATEGORY_POST_PROCESS_PIPELINE = {
Expand Down
183 changes: 183 additions & 0 deletions tests/sentry/tasks/test_post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3032,6 +3032,186 @@ def test_kick_off_seer_automation_with_hide_ai_features_enabled(
mock_start_seer_automation.assert_not_called()


class TriageSignalsV0TestMixin(BasePostProgressGroupMixin):
"""Tests for the triage signals V0 flow behind the projects:triage-signals-v0 feature flag."""

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement_for_scanner",
return_value=True,
)
@patch("sentry.tasks.autofix.generate_issue_summary_only.delay")
@with_feature({"organizations:gen-ai-features": True, "projects:triage-signals-v0": True})
def test_triage_signals_event_count_less_than_10_no_cache(
self, mock_generate_summary_only, mock_get_seer_org_acknowledgement
):
"""Test that with event count < 10 and no cached summary, we generate summary only (no automation)."""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)

# Ensure event count < 10
group = event.group
assert group.times_seen < 10

self.call_post_process_group(
is_new=True,
is_regression=False,
is_new_group_environment=True,
event=event,
)

# Should call generate_issue_summary_only (not start_seer_automation)
mock_generate_summary_only.assert_called_once_with(group.id)

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement_for_scanner",
return_value=True,
)
@patch("sentry.tasks.autofix.generate_issue_summary_only.delay")
@with_feature({"organizations:gen-ai-features": True, "projects:triage-signals-v0": True})
def test_triage_signals_event_count_less_than_10_with_cache(
self, mock_generate_summary_only, mock_get_seer_org_acknowledgement
):
"""Test that with event count < 10 and cached summary exists, we do nothing."""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)

# Cache a summary for this group
from sentry.seer.autofix.issue_summary import get_issue_summary_cache_key

group = event.group
cache_key = get_issue_summary_cache_key(group.id)
cache.set(cache_key, {"summary": "test summary"}, 3600)

self.call_post_process_group(
is_new=True,
is_regression=False,
is_new_group_environment=True,
event=event,
)

# Should not call anything since summary exists
mock_generate_summary_only.assert_not_called()

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement_for_scanner",
return_value=True,
)
@patch("sentry.tasks.autofix.run_automation_for_group.delay")
@with_feature({"organizations:gen-ai-features": True, "projects:triage-signals-v0": True})
def test_triage_signals_event_count_gte_10_with_cache(
self, mock_run_automation, mock_get_seer_org_acknowledgement
):
"""Test that with event count >= 10 and cached summary exists, we run automation directly."""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)

# Update group times_seen to simulate >= 10 events
group = event.group
group.times_seen = 10
group.save()
# Also update the event's cached group reference
event.group.times_seen = 10

# Cache a summary for this group
from sentry.seer.autofix.issue_summary import get_issue_summary_cache_key

cache_key = get_issue_summary_cache_key(group.id)
cache.set(cache_key, {"summary": "test summary"}, 3600)

self.call_post_process_group(
is_new=False,
is_regression=False,
is_new_group_environment=False,
event=event,
)

# Should call run_automation_for_group since summary exists
mock_run_automation.assert_called_once_with(group.id)

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement_for_scanner",
return_value=True,
)
@patch("sentry.tasks.autofix.start_seer_automation.delay")
@with_feature({"organizations:gen-ai-features": True, "projects:triage-signals-v0": True})
def test_triage_signals_event_count_gte_10_no_cache(
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
):
"""Test that with event count >= 10 and no cached summary, we generate summary + run automation."""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)

# Update group times_seen to simulate >= 10 events
group = event.group
group.times_seen = 10
group.save()
# Also update the event's cached group reference
event.group.times_seen = 10

self.call_post_process_group(
is_new=False,
is_regression=False,
is_new_group_environment=False,
event=event,
)

# Should call start_seer_automation to generate summary + run automation
mock_start_seer_automation.assert_called_once_with(group.id)

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement_for_scanner",
return_value=True,
)
@patch("sentry.tasks.autofix.run_automation_for_group.delay")
@with_feature({"organizations:gen-ai-features": True, "projects:triage-signals-v0": True})
def test_triage_signals_event_count_gte_10_skips_with_seer_last_triggered(
self, mock_run_automation, mock_get_seer_org_acknowledgement
):
"""Test that with event count >= 10 and seer_autofix_last_triggered set, we skip automation."""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)

# Update group times_seen and seer_autofix_last_triggered
group = event.group
group.times_seen = 10
group.seer_autofix_last_triggered = timezone.now()
group.save()
# Also update the event's cached group reference
event.group.times_seen = 10
event.group.seer_autofix_last_triggered = group.seer_autofix_last_triggered

# Cache a summary for this group
from sentry.seer.autofix.issue_summary import get_issue_summary_cache_key

cache_key = get_issue_summary_cache_key(group.id)
cache.set(cache_key, {"summary": "test summary"}, 3600)

self.call_post_process_group(
is_new=False,
is_regression=False,
is_new_group_environment=False,
event=event,
)

# Should not call automation since seer_autofix_last_triggered is set
mock_run_automation.assert_not_called()


class SeerAutomationHelperFunctionsTestMixin(BasePostProgressGroupMixin):
"""Unit tests for is_issue_eligible_for_seer_automation."""

Expand Down Expand Up @@ -3105,6 +3285,7 @@ class PostProcessGroupErrorTest(
InboxTestMixin,
ResourceChangeBoundsTestMixin,
KickOffSeerAutomationTestMixin,
TriageSignalsV0TestMixin,
SeerAutomationHelperFunctionsTestMixin,
RuleProcessorTestMixin,
ServiceHooksTestMixin,
Expand Down Expand Up @@ -3194,6 +3375,7 @@ class PostProcessGroupPerformanceTest(
SnoozeTestSkipSnoozeMixin,
PerformanceIssueTestCase,
KickOffSeerAutomationTestMixin,
TriageSignalsV0TestMixin,
):
def create_event(self, data, project_id, assert_no_errors=True):
fingerprint = data["fingerprint"][0] if data.get("fingerprint") else "some_group"
Expand Down Expand Up @@ -3313,6 +3495,7 @@ class PostProcessGroupGenericTest(
RuleProcessorTestMixin,
SnoozeTestMixin,
KickOffSeerAutomationTestMixin,
TriageSignalsV0TestMixin,
):
def create_event(self, data, project_id, assert_no_errors=True):
data["type"] = "generic"
Expand Down
Loading