diff --git a/tasks/label_analysis.py b/tasks/label_analysis.py index 0ecebbaea..24bae155f 100644 --- a/tasks/label_analysis.py +++ b/tasks/label_analysis.py @@ -2,12 +2,10 @@ from typing import List, Optional, Set, Tuple import sentry_sdk -from shared import torngit from shared.celery_config import label_analysis_task_name from shared.labelanalysis import LabelAnalysisRequestState from app import celery_app -from database.models.core import Commit from database.models.labelanalysis import LabelAnalysisRequest from database.models.staticanalysis import StaticAnalysisSuite from helpers.labels import get_all_report_labels, get_labels_per_session @@ -31,7 +29,7 @@ class LabelAnalysisRequestProcessingTask(BaseCodecovTask): name = label_analysis_task_name async def run_async(self, db_session, request_id, *args, **kwargs): - label_analysis_request = ( + label_analysis_request: LabelAnalysisRequest = ( db_session.query(LabelAnalysisRequest) .filter(LabelAnalysisRequest.id_ == request_id) .first() @@ -51,9 +49,13 @@ async def run_async(self, db_session, request_id, *args, **kwargs): "Starting label analysis request", extra=dict( request_id=request_id, + external_id=label_analysis_request.external_id, commit=label_analysis_request.head_commit.commitid, ), ) + if label_analysis_request.state_id == LabelAnalysisRequestState.FINISHED.db_id: + return self._handle_larq_already_calculated(label_analysis_request) + try: lines_relevant_to_diff = await self._get_lines_relevant_to_diff( label_analysis_request @@ -90,6 +92,7 @@ async def run_async(self, db_session, request_id, *args, **kwargs): extra=dict( request_id=request_id, commit=label_analysis_request.head_commit.commitid, + external_id=label_analysis_request.external_id, ), ) label_analysis_request.result = None @@ -107,6 +110,8 @@ async def run_async(self, db_session, request_id, *args, **kwargs): has_relevant_lines=(lines_relevant_to_diff is not None), has_base_report=(base_report is not None), commit=label_analysis_request.head_commit.commitid, + external_id=label_analysis_request.external_id, + request_id=request_id, ), ) label_analysis_request.state_id = LabelAnalysisRequestState.FINISHED.db_id @@ -120,6 +125,43 @@ async def run_async(self, db_session, request_id, *args, **kwargs): label_analysis_request.result = result return result + def _handle_larq_already_calculated(self, larq: LabelAnalysisRequest): + # This means we already calculated everything + # Except possibly the absent labels + log.info( + "Label analysis was request already calculated", + extra=dict( + request_id=larq.id, + external_id=larq.external_id, + commit=larq.head_commit.commitid, + ), + ) + if larq.requested_labels: + saved_result = larq.result + all_saved_labels = set( + saved_result.get("present_report_labels", []) + + saved_result.get("present_diff_labels", []) + + saved_result.get("global_level_labels", []) + ) + executable_lines_saved_labels = set( + saved_result.get("present_diff_labels", []) + ) + global_saved_labels = set(saved_result.get("global_level_labels", [])) + result = self.calculate_final_result( + requested_labels=larq.requested_labels, + existing_labels=( + all_saved_labels, + executable_lines_saved_labels, + global_saved_labels, + ), + commit_sha=larq.head_commit.commitid, + ) + larq.result = result # Save the new result + return { **result, 'success': True } + # No requested labels mean we don't have to calculate again + # This shouldn't actually happen + return { **larq.result, 'success': True } + def _get_requested_labels( self, label_analysis_request: LabelAnalysisRequest, dbsession ): @@ -155,6 +197,8 @@ async def _get_lines_relevant_to_diff( extra=dict( lines_relevant_to_diff=executable_lines_relevant_to_diff, commit=label_analysis_request.head_commit.commitid, + external_id=label_analysis_request.external_id, + request_id=label_analysis_request.id_, ), ) return executable_lines_relevant_to_diff @@ -179,6 +223,7 @@ async def _get_parsed_git_diff( "Label analysis failed to parse git diff", extra=dict( request_id=label_analysis_request.id, + external_id=label_analysis_request.external_id, commit=label_analysis_request.head_commit.commitid, ), exc_info=True, diff --git a/tasks/tests/unit/test_label_analysis.py b/tasks/tests/unit/test_label_analysis.py index 9d06f10d0..f49fc26cf 100644 --- a/tasks/tests/unit/test_label_analysis.py +++ b/tasks/tests/unit/test_label_analysis.py @@ -405,7 +405,7 @@ def sample_report_with_labels(): @pytest.mark.asyncio -async def test_simple_call_without_requested_labels( +async def test_simple_call_without_requested_labels_then_with_requested_labels( dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider ): mocker.patch.object( @@ -500,6 +500,29 @@ async def test_simple_call_without_requested_labels( "present_report_labels": expected_present_report_labels, "global_level_labels": ["applejuice", "justjuice", "orangejuice"], } + # Now we call the task again, this time with the requested labels. + # This illustrates what should happen if we patch the labels after calculating + # And trigger the task again to save the new results + larf.requested_labels = ["tangerine", "pear", "banana", "apple"] + dbsession.flush() + res = await task.run_async(dbsession, larf.id) + expected_present_diff_labels = ["banana"] + expected_present_report_labels = ["apple", "banana"] + expected_absent_labels = ["pear", "tangerine"] + assert res == { + "absent_labels": expected_absent_labels, + "present_diff_labels": expected_present_diff_labels, + "present_report_labels": expected_present_report_labels, + "success": True, + "global_level_labels": [], + } + assert larf.result == { + "absent_labels": expected_absent_labels, + "present_diff_labels": expected_present_diff_labels, + "present_report_labels": expected_present_report_labels, + "global_level_labels": [], + } + @pytest.mark.asyncio