From 66ab117fdcb04c2772c6265c95c79eb798a345c5 Mon Sep 17 00:00:00 2001 From: Gguidini Date: Wed, 19 Jul 2023 16:46:01 +0200 Subject: [PATCH] Different handling label analysis if result already calculated This change is intended to work with the PATCHing of labels by the CLI after collecting them. Currently doing that doesn't trigger another task in the worker. SO if the labels arraive after we finished calculating we are left with incomplete results on the database. That is fine as the CLI will calculate again. I think this change might have been a bit premature, but the idea is to have a different way of handling the calculation if we already have results. Then we simply calculate the final result using the saved labels and add in the requested labels that we might not have had the first time around. This would be much much faster than calculating everything again. --- tasks/label_analysis.py | 47 +++++++++++++++++++++++++ tasks/tests/unit/test_label_analysis.py | 25 ++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/tasks/label_analysis.py b/tasks/label_analysis.py index 26ddfc4a3..421613548 100644 --- a/tasks/label_analysis.py +++ b/tasks/label_analysis.py @@ -62,9 +62,13 @@ async def run_async(self, db_session, request_id, *args, **kwargs): "Starting label analysis request", extra=dict( request_id=request_id, + external_id=label_analysis_request.external_id, commit=label_analysis_request.head_commit.commitid, ), ) + if label_analysis_request.state_id == LabelAnalysisRequestState.FINISHED.db_id: + return self._handle_larq_already_calculated(label_analysis_request) + try: lines_relevant_to_diff = await self._get_lines_relevant_to_diff( label_analysis_request @@ -100,6 +104,7 @@ async def run_async(self, db_session, request_id, *args, **kwargs): extra=dict( request_id=request_id, commit=label_analysis_request.head_commit.commitid, + external_id=label_analysis_request.external_id, ), ) label_analysis_request.result = None @@ -124,6 +129,8 @@ async def run_async(self, db_session, request_id, *args, **kwargs): has_relevant_lines=(lines_relevant_to_diff is not None), has_base_report=(base_report is not None), commit=label_analysis_request.head_commit.commitid, + external_id=label_analysis_request.external_id, + request_id=request_id, ), ) label_analysis_request.state_id = LabelAnalysisRequestState.FINISHED.db_id @@ -153,6 +160,43 @@ def add_processing_error( self.errors.append(error.to_representation()) self.dbsession.add(error) + def _handle_larq_already_calculated(self, larq: LabelAnalysisRequest): + # This means we already calculated everything + # Except possibly the absent labels + log.info( + "Label analysis was request already calculated", + extra=dict( + request_id=larq.id, + external_id=larq.external_id, + commit=larq.head_commit.commitid, + ), + ) + if larq.requested_labels: + saved_result = larq.result + all_saved_labels = set( + saved_result.get("present_report_labels", []) + + saved_result.get("present_diff_labels", []) + + saved_result.get("global_level_labels", []) + ) + executable_lines_saved_labels = set( + saved_result.get("present_diff_labels", []) + ) + global_saved_labels = set(saved_result.get("global_level_labels", [])) + result = self.calculate_final_result( + requested_labels=larq.requested_labels, + existing_labels=( + all_saved_labels, + executable_lines_saved_labels, + global_saved_labels, + ), + commit_sha=larq.head_commit.commitid, + ) + larq.result = result # Save the new result + return { **result, 'success': True } + # No requested labels mean we don't have to calculate again + # This shouldn't actually happen + return { **larq.result, 'success': True } + def _get_requested_labels(self, label_analysis_request: LabelAnalysisRequest): if label_analysis_request.requested_labels: return label_analysis_request.requested_labels @@ -186,6 +230,8 @@ async def _get_lines_relevant_to_diff( extra=dict( lines_relevant_to_diff=executable_lines_relevant_to_diff, commit=label_analysis_request.head_commit.commitid, + external_id=label_analysis_request.external_id, + request_id=label_analysis_request.id_, ), ) return executable_lines_relevant_to_diff @@ -210,6 +256,7 @@ async def _get_parsed_git_diff( "Label analysis failed to parse git diff", extra=dict( request_id=label_analysis_request.id, + external_id=label_analysis_request.external_id, commit=label_analysis_request.head_commit.commitid, ), ) diff --git a/tasks/tests/unit/test_label_analysis.py b/tasks/tests/unit/test_label_analysis.py index cc06d2f16..3eea09283 100644 --- a/tasks/tests/unit/test_label_analysis.py +++ b/tasks/tests/unit/test_label_analysis.py @@ -406,7 +406,7 @@ def sample_report_with_labels(): @pytest.mark.asyncio -async def test_simple_call_without_requested_labels( +async def test_simple_call_without_requested_labels_then_with_requested_labels( dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider ): mocker.patch.object( @@ -502,6 +502,29 @@ async def test_simple_call_without_requested_labels( "present_report_labels": expected_present_report_labels, "global_level_labels": ["applejuice", "justjuice", "orangejuice"], } + # Now we call the task again, this time with the requested labels. + # This illustrates what should happen if we patch the labels after calculating + # And trigger the task again to save the new results + larf.requested_labels = ["tangerine", "pear", "banana", "apple"] + dbsession.flush() + res = await task.run_async(dbsession, larf.id) + expected_present_diff_labels = ["banana"] + expected_present_report_labels = ["apple", "banana"] + expected_absent_labels = ["pear", "tangerine"] + assert res == { + "absent_labels": expected_absent_labels, + "present_diff_labels": expected_present_diff_labels, + "present_report_labels": expected_present_report_labels, + "success": True, + "global_level_labels": [], + } + assert larf.result == { + "absent_labels": expected_absent_labels, + "present_diff_labels": expected_present_diff_labels, + "present_report_labels": expected_present_report_labels, + "global_level_labels": [], + } + @pytest.mark.asyncio