Different handling label analysis if result already calculated (#21)

* Different handling label analysis if result already calculated This change is intended to work with the PATCHing of labels by the CLI after collecting them. Currently doing that doesn't trigger another task in the worker. SO if the labels arraive after we finished calculating we are left with incomplete results on the database. That is fine as the CLI will calculate again. I think this change might have been a bit premature, but the idea is to have a different way of handling the calculation if we already have results. Then we simply calculate the final result using the saved labels and add in the requested labels that we might not have had the first time around. This would be much much faster than calculating everything again. * chore: Update branch Updating branch and making sure all tests are OK.
codecov · Sep 11, 2023 · 6d39922 · 6d39922
1 parent 3056f09
commit 6d39922
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 1 deletion.
diff --git a/tasks/label_analysis.py b/tasks/label_analysis.py
@@ -62,9 +62,16 @@ async def run_async(self, db_session, request_id, *args, **kwargs):
             "Starting label analysis request",
             extra=dict(
                 request_id=request_id,
+                external_id=label_analysis_request.external_id,
                 commit=label_analysis_request.head_commit.commitid,
             ),
         )
+
+        if label_analysis_request.state_id == LabelAnalysisRequestState.FINISHED.db_id:
+            # Indicates that this request has been calculated already
+            # We might need to update the requested labels
+            return self._handle_larq_already_calculated(label_analysis_request)
+
         try:
             lines_relevant_to_diff = await self._get_lines_relevant_to_diff(
                 label_analysis_request
@@ -100,6 +107,7 @@ async def run_async(self, db_session, request_id, *args, **kwargs):
                 extra=dict(
                     request_id=request_id,
                     commit=label_analysis_request.head_commit.commitid,
+                    external_id=label_analysis_request.external_id,
                 ),
             )
             label_analysis_request.result = None
@@ -124,6 +132,8 @@ async def run_async(self, db_session, request_id, *args, **kwargs):
                 has_relevant_lines=(lines_relevant_to_diff is not None),
                 has_base_report=(base_report is not None),
                 commit=label_analysis_request.head_commit.commitid,
+                external_id=label_analysis_request.external_id,
+                request_id=request_id,
             ),
         )
         label_analysis_request.state_id = LabelAnalysisRequestState.FINISHED.db_id
@@ -153,6 +163,44 @@ def add_processing_error(
         self.errors.append(error.to_representation())
         self.dbsession.add(error)
 
+    def _handle_larq_already_calculated(self, larq: LabelAnalysisRequest):
+        # This means we already calculated everything
+        # Except possibly the absent labels
+        log.info(
+            "Label analysis request was already calculated",
+            extra=dict(
+                request_id=larq.id,
+                external_id=larq.external_id,
+                commit=larq.head_commit.commitid,
+            ),
+        )
+        if larq.requested_labels:
+            saved_result = larq.result
+            all_saved_labels = set(
+                saved_result.get("present_report_labels", [])
+                + saved_result.get("present_diff_labels", [])
+                + saved_result.get("global_level_labels", [])
+            )
+            executable_lines_saved_labels = set(
+                saved_result.get("present_diff_labels", [])
+            )
+            global_saved_labels = set(saved_result.get("global_level_labels", []))
+            result = self.calculate_final_result(
+                requested_labels=larq.requested_labels,
+                existing_labels=(
+                    all_saved_labels,
+                    executable_lines_saved_labels,
+                    global_saved_labels,
+                ),
+                commit_sha=larq.head_commit.commitid,
+            )
+            larq.result = result  # Save the new result
+            return {**result, "success": True, "errors": []}
+        # No requested labels mean we don't have any new information
+        # So we don't need to calculate again
+        # This shouldn't actually happen
+        return {**larq.result, "success": True, "errors": []}
+
     def _get_requested_labels(self, label_analysis_request: LabelAnalysisRequest):
         if label_analysis_request.requested_labels:
             return label_analysis_request.requested_labels
@@ -186,6 +234,8 @@ async def _get_lines_relevant_to_diff(
                 extra=dict(
                     lines_relevant_to_diff=executable_lines_relevant_to_diff,
                     commit=label_analysis_request.head_commit.commitid,
+                    external_id=label_analysis_request.external_id,
+                    request_id=label_analysis_request.id_,
                 ),
             )
             return executable_lines_relevant_to_diff
@@ -210,6 +260,7 @@ async def _get_parsed_git_diff(
                 "Label analysis failed to parse git diff",
                 extra=dict(
                     request_id=label_analysis_request.id,
+                    external_id=label_analysis_request.external_id,
                     commit=label_analysis_request.head_commit.commitid,
                 ),
             )

diff --git a/tasks/tests/unit/test_label_analysis.py b/tasks/tests/unit/test_label_analysis.py
@@ -406,7 +406,7 @@ def sample_report_with_labels():
 
 
 @pytest.mark.asyncio
-async def test_simple_call_without_requested_labels(
+async def test_simple_call_without_requested_labels_then_with_requested_labels(
     dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
 ):
     mocker.patch.object(
@@ -502,6 +502,29 @@ async def test_simple_call_without_requested_labels(
         "present_report_labels": expected_present_report_labels,
         "global_level_labels": ["applejuice", "justjuice", "orangejuice"],
     }
+    # Now we call the task again, this time with the requested labels.
+    # This illustrates what should happen if we patch the labels after calculating
+    # And trigger the task again to save the new results
+    larf.requested_labels = ["tangerine", "pear", "banana", "apple"]
+    dbsession.flush()
+    res = await task.run_async(dbsession, larf.id)
+    expected_present_diff_labels = ["banana"]
+    expected_present_report_labels = ["apple", "banana"]
+    expected_absent_labels = ["pear", "tangerine"]
+    assert res == {
+        "absent_labels": expected_absent_labels,
+        "present_diff_labels": expected_present_diff_labels,
+        "present_report_labels": expected_present_report_labels,
+        "success": True,
+        "global_level_labels": [],
+        "errors": [],
+    }
+    assert larf.result == {
+        "absent_labels": expected_absent_labels,
+        "present_diff_labels": expected_present_diff_labels,
+        "present_report_labels": expected_present_report_labels,
+        "global_level_labels": [],
+    }
 
 
 @pytest.mark.asyncio