Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sentry/seer/autofix/issue_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def _is_issue_fixable(group: Group, fixability_score: float) -> bool:
return False


def _run_automation(
def run_automation(
group: Group,
user: User | RpcUser | AnonymousUser,
event: GroupEvent,
Expand Down Expand Up @@ -403,7 +403,7 @@ def _generate_summary(

if should_run_automation:
try:
_run_automation(group, user, event, source)
run_automation(group, user, event, source)
except Exception:
logger.exception(
"Error auto-triggering autofix from issue summary", extra={"group_id": group.id}
Expand Down
29 changes: 29 additions & 0 deletions src/sentry/tasks/autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,32 @@ def generate_issue_summary_only(group_id: int) -> None:
get_issue_summary(
group=group, source=SeerAutomationSource.POST_PROCESS, should_run_automation=False
)
# TODO: Generate fixability score here and check for in run_automation for triage signals V0
# Currently fixability will only be generated after 10 when run_automation is called


@instrumented_task(
name="sentry.tasks.autofix.run_automation_for_group",
namespace=ingest_errors_tasks,
processing_deadline_duration=35,
retry=Retry(times=1),
)
def run_automation_for_group(group_id: int) -> None:
"""
Run automation directly for a group (assumes summary and fixability already exist).
Used for triage signals flow when event count >= 10 and summary exists.
"""
from django.contrib.auth.models import AnonymousUser

from sentry.seer.autofix.issue_summary import run_automation

group = Group.objects.get(id=group_id)
event = group.get_latest_event()

if not event:
logger.warning("run_automation_for_group.no_event_found", extra={"group_id": group_id})
return

run_automation(
group=group, user=AnonymousUser(), event=event, source=SeerAutomationSource.POST_PROCESS
)
87 changes: 72 additions & 15 deletions src/sentry/tasks/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1595,33 +1595,90 @@ def check_if_flags_sent(job: PostProcessJob) -> None:


def kick_off_seer_automation(job: PostProcessJob) -> None:
from sentry.seer.autofix.issue_summary import get_issue_summary_lock_key
from sentry.seer.autofix.issue_summary import (
get_issue_summary_cache_key,
get_issue_summary_lock_key,
)
from sentry.seer.autofix.utils import (
is_issue_eligible_for_seer_automation,
is_seer_scanner_rate_limited,
)
from sentry.tasks.autofix import start_seer_automation
from sentry.tasks.autofix import (
generate_issue_summary_only,
run_automation_for_group,
start_seer_automation,
)

event = job["event"]
group = event.group

# Only run on issues with no existing scan - TODO: Update condition for triage signals V0
if group.seer_fixability_score is not None:
return
# Default behaviour
if not features.has("projects:triage-signals-v0", group.project):
# Only run on issues with no existing scan
if group.seer_fixability_score is not None:
return

if is_issue_eligible_for_seer_automation(group) is False:
return
if is_issue_eligible_for_seer_automation(group) is False:
return

# Don't run if there's already a task in progress for this issue
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=1, name=lock_name)
if lock.locked():
return
# Don't run if there's already a task in progress for this issue
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=1, name=lock_name)
if lock.locked():
return

if is_seer_scanner_rate_limited(group.project, group.organization):
return
if is_seer_scanner_rate_limited(group.project, group.organization):
return

start_seer_automation.delay(group.id)
else:
# Triage signals V0 behaviour

# If event count < 10, only generate summary (no automation)
if group.times_seen < 10:
# Check if summary exists in cache
cache_key = get_issue_summary_cache_key(group.id)
if cache.get(cache_key) is not None:
return

# Check if we're already generating the summary
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=5, name=lock_name)
if lock.locked():
return

# Generate summary (no automation)
if is_issue_eligible_for_seer_automation(group):
if not is_seer_scanner_rate_limited(group.project, group.organization):
generate_issue_summary_only.delay(group.id)
else:
# Event count >= 10: run automation
# Check seer_last_triggered first (long-term check to avoid re-running)
if group.seer_autofix_last_triggered is not None:
return

# Early returns for eligibility checks (cheap checks first)
if not is_issue_eligible_for_seer_automation(group):
return
if is_seer_scanner_rate_limited(group.project, group.organization):
return

start_seer_automation.delay(group.id)
# Check if we're already processing automation for this group
automation_dispatch_cache_key = f"seer-automation-dispatched:{group.id}"
if cache.get(automation_dispatch_cache_key) is not None:
return # Another process already dispatched automation

# Set cache with 5 minute TTL to prevent duplicate dispatches
cache.set(automation_dispatch_cache_key, True, timeout=300)

# Check if summary exists in cache
cache_key = get_issue_summary_cache_key(group.id)
if cache.get(cache_key) is not None:
# Summary exists, run automation directly
run_automation_for_group.delay(group.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this case, what guards against re-triggering fixability scoring on every event which is outside the 5 minute dispatch window?

since this will happen unconditionally:

run_automation_for_group -> run_automation -> _generate_fixability_score -> calls seer API

the fixability endpoint will return the fixability score if it already exists for the issue, but it will need to hit the issue summary DB for that. seems expensive to do for every new event when we can check group.seer_fixability_score in sentry

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 5 min cache add check (short term check) + seer_autofix_last_triggered (long term check) ensures that run_automation_for_group is never triggered more than once -> fixability is never triggered more than once. Lmk if that makes sense.

Copy link
Contributor

@kddubey kddubey Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that sounds good, thanks for clarifying (see comment below)

Copy link
Contributor

@kddubey kddubey Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait but seer_autofix_last_triggered is only set if the issue is deemed fixable-enough

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But _is_issue_fixable is useless once triage signals is launched. Once triage signals is launched, AutofixAutomationTuningSettings is either ALWAYS or OFF.
I guess I should add an extra check here to check if it's not set to OFF right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made the change.

else:
# No summary yet, generate summary + run automation in one go
start_seer_automation.delay(group.id)


GROUP_CATEGORY_POST_PROCESS_PIPELINE = {
Expand Down
30 changes: 15 additions & 15 deletions tests/sentry/seer/autofix/test_issue_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
_fetch_user_preference,
_get_event,
_get_stopping_point_from_fixability,
_run_automation,
get_issue_summary,
run_automation,
)
from sentry.seer.autofix.utils import AutofixStoppingPoint
from sentry.seer.models import SummarizeIssueResponse, SummarizeIssueScores
Expand Down Expand Up @@ -611,7 +611,7 @@ def test_get_issue_summary_with_web_vitals_issue(
mock_trigger_autofix_task.assert_called_once()

@patch("sentry.seer.autofix.issue_summary.get_seer_org_acknowledgement")
@patch("sentry.seer.autofix.issue_summary._run_automation")
@patch("sentry.seer.autofix.issue_summary.run_automation")
@patch("sentry.seer.autofix.issue_summary._get_trace_tree_for_event")
@patch("sentry.seer.autofix.issue_summary._call_seer")
@patch("sentry.seer.autofix.issue_summary._get_event")
Expand All @@ -623,7 +623,7 @@ def test_get_issue_summary_continues_when_automation_fails(
mock_run_automation,
mock_get_acknowledgement,
):
"""Test that issue summary is still returned when _run_automation throws an exception."""
"""Test that issue summary is still returned when run_automation throws an exception."""
mock_get_acknowledgement.return_value = True

# Set up event and seer response
Expand All @@ -641,7 +641,7 @@ def test_get_issue_summary_continues_when_automation_fails(
)
mock_call_seer.return_value = mock_summary

# Make _run_automation raise an exception
# Make run_automation raise an exception
mock_run_automation.side_effect = Exception("Automation failed")

# Call get_issue_summary and verify it still returns successfully
Expand All @@ -652,7 +652,7 @@ def test_get_issue_summary_continues_when_automation_fails(
expected_response["event_id"] = event.event_id
assert summary_data == convert_dict_key_case(expected_response, snake_to_camel_case)

# Verify _run_automation was called and failed
# Verify run_automation was called and failed
mock_run_automation.assert_called_once()
mock_call_seer.assert_called_once()

Expand Down Expand Up @@ -681,7 +681,7 @@ def test_get_issue_summary_handles_trace_tree_errors(
possible_cause="cause",
),
) as mock_call_seer,
patch("sentry.seer.autofix.issue_summary._run_automation"),
patch("sentry.seer.autofix.issue_summary.run_automation"),
patch(
"sentry.seer.autofix.issue_summary.get_seer_org_acknowledgement",
return_value=True,
Expand All @@ -693,7 +693,7 @@ def test_get_issue_summary_handles_trace_tree_errors(
mock_call_seer.assert_called_once_with(self.group, serialized_event, None)

@patch("sentry.seer.autofix.issue_summary.get_seer_org_acknowledgement")
@patch("sentry.seer.autofix.issue_summary._run_automation")
@patch("sentry.seer.autofix.issue_summary.run_automation")
@patch("sentry.seer.autofix.issue_summary._get_trace_tree_for_event")
@patch("sentry.seer.autofix.issue_summary._call_seer")
@patch("sentry.seer.autofix.issue_summary._get_event")
Expand All @@ -705,7 +705,7 @@ def test_get_issue_summary_with_should_run_automation_false(
mock_run_automation,
mock_get_acknowledgement,
):
"""Test that should_run_automation=False prevents _run_automation from being called."""
"""Test that should_run_automation=False prevents run_automation from being called."""
mock_get_acknowledgement.return_value = True
event = Mock(
event_id="test_event_id",
Expand Down Expand Up @@ -743,7 +743,7 @@ def test_get_issue_summary_with_should_run_automation_false(
mock_call_seer.assert_called_once_with(self.group, serialized_event, {"trace": "tree"})
mock_get_acknowledgement.assert_called_once_with(self.group.organization)

# Verify that _run_automation was NOT called
# Verify that run_automation was NOT called
mock_run_automation.assert_not_called()

# Check if the cache was set correctly
Expand Down Expand Up @@ -798,7 +798,7 @@ def test_high_fixability_code_changes(
possible_cause="c",
scores=SummarizeIssueScores(fixability_score=0.70),
)
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
mock_trigger.assert_called_once()
assert mock_trigger.call_args[1]["stopping_point"] == AutofixStoppingPoint.CODE_CHANGES

Expand All @@ -822,7 +822,7 @@ def test_medium_fixability_solution(
possible_cause="c",
scores=SummarizeIssueScores(fixability_score=0.50),
)
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
mock_trigger.assert_called_once()
assert mock_trigger.call_args[1]["stopping_point"] == AutofixStoppingPoint.SOLUTION

Expand All @@ -848,7 +848,7 @@ def test_without_feature_flag(self, mock_gen, mock_budget, mock_state, mock_rate
with self.feature(
{"organizations:gen-ai-features": True, "projects:triage-signals-v0": False}
):
_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)

mock_trigger.assert_called_once()
assert mock_trigger.call_args[1]["stopping_point"] is None
Expand Down Expand Up @@ -1001,7 +1001,7 @@ def test_user_preference_limits_high_fixability(
)
mock_fetch.return_value = "solution"

_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)

mock_trigger.assert_called_once()
# Should be limited to SOLUTION by user preference
Expand Down Expand Up @@ -1031,7 +1031,7 @@ def test_fixability_limits_permissive_user_preference(
)
mock_fetch.return_value = "open_pr"

_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)

mock_trigger.assert_called_once()
# Should use SOLUTION from fixability, not OPEN_PR from user
Expand Down Expand Up @@ -1061,7 +1061,7 @@ def test_no_user_preference_uses_fixability_only(
)
mock_fetch.return_value = None

_run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)
run_automation(self.group, self.user, self.event, SeerAutomationSource.ALERT)

mock_trigger.assert_called_once()
# Should use OPEN_PR from fixability
Expand Down
Loading
Loading