Skip to content

Commit

Permalink
Different handling label analysis if result already calculated (#21)
Browse files Browse the repository at this point in the history
* Different handling label analysis if result already calculated

This change is intended to work with the PATCHing of labels by the CLI after collecting them.
Currently doing that doesn't trigger another task in the worker. SO if the labels arraive after we finished calculating we are left with incomplete results on the database. That is fine as the CLI will calculate again.

I think this change might have been a bit premature, but the idea is to have a different way of handling the calculation if we already have results. Then we simply calculate the final result using the saved labels and add in the requested labels that we might not have had the first time around. This would be much much faster than calculating everything again.

* chore: Update branch

Updating branch and making sure all tests are OK.
  • Loading branch information
giovanni-guidini committed Sep 11, 2023
1 parent 3056f09 commit 6d39922
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 1 deletion.
51 changes: 51 additions & 0 deletions tasks/label_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,16 @@ async def run_async(self, db_session, request_id, *args, **kwargs):
"Starting label analysis request",
extra=dict(
request_id=request_id,
external_id=label_analysis_request.external_id,
commit=label_analysis_request.head_commit.commitid,
),
)

if label_analysis_request.state_id == LabelAnalysisRequestState.FINISHED.db_id:
# Indicates that this request has been calculated already
# We might need to update the requested labels
return self._handle_larq_already_calculated(label_analysis_request)

try:
lines_relevant_to_diff = await self._get_lines_relevant_to_diff(
label_analysis_request
Expand Down Expand Up @@ -100,6 +107,7 @@ async def run_async(self, db_session, request_id, *args, **kwargs):
extra=dict(
request_id=request_id,
commit=label_analysis_request.head_commit.commitid,
external_id=label_analysis_request.external_id,
),
)
label_analysis_request.result = None
Expand All @@ -124,6 +132,8 @@ async def run_async(self, db_session, request_id, *args, **kwargs):
has_relevant_lines=(lines_relevant_to_diff is not None),
has_base_report=(base_report is not None),
commit=label_analysis_request.head_commit.commitid,
external_id=label_analysis_request.external_id,
request_id=request_id,
),
)
label_analysis_request.state_id = LabelAnalysisRequestState.FINISHED.db_id
Expand Down Expand Up @@ -153,6 +163,44 @@ def add_processing_error(
self.errors.append(error.to_representation())
self.dbsession.add(error)

def _handle_larq_already_calculated(self, larq: LabelAnalysisRequest):
# This means we already calculated everything
# Except possibly the absent labels
log.info(
"Label analysis request was already calculated",
extra=dict(
request_id=larq.id,
external_id=larq.external_id,
commit=larq.head_commit.commitid,
),
)
if larq.requested_labels:
saved_result = larq.result
all_saved_labels = set(
saved_result.get("present_report_labels", [])
+ saved_result.get("present_diff_labels", [])
+ saved_result.get("global_level_labels", [])
)
executable_lines_saved_labels = set(
saved_result.get("present_diff_labels", [])
)
global_saved_labels = set(saved_result.get("global_level_labels", []))
result = self.calculate_final_result(
requested_labels=larq.requested_labels,
existing_labels=(
all_saved_labels,
executable_lines_saved_labels,
global_saved_labels,
),
commit_sha=larq.head_commit.commitid,
)
larq.result = result # Save the new result
return {**result, "success": True, "errors": []}
# No requested labels mean we don't have any new information
# So we don't need to calculate again
# This shouldn't actually happen
return {**larq.result, "success": True, "errors": []}

def _get_requested_labels(self, label_analysis_request: LabelAnalysisRequest):
if label_analysis_request.requested_labels:
return label_analysis_request.requested_labels
Expand Down Expand Up @@ -186,6 +234,8 @@ async def _get_lines_relevant_to_diff(
extra=dict(
lines_relevant_to_diff=executable_lines_relevant_to_diff,
commit=label_analysis_request.head_commit.commitid,
external_id=label_analysis_request.external_id,
request_id=label_analysis_request.id_,
),
)
return executable_lines_relevant_to_diff
Expand All @@ -210,6 +260,7 @@ async def _get_parsed_git_diff(
"Label analysis failed to parse git diff",
extra=dict(
request_id=label_analysis_request.id,
external_id=label_analysis_request.external_id,
commit=label_analysis_request.head_commit.commitid,
),
)
Expand Down
25 changes: 24 additions & 1 deletion tasks/tests/unit/test_label_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ def sample_report_with_labels():


@pytest.mark.asyncio
async def test_simple_call_without_requested_labels(
async def test_simple_call_without_requested_labels_then_with_requested_labels(
dbsession, mock_storage, mocker, sample_report_with_labels, mock_repo_provider
):
mocker.patch.object(
Expand Down Expand Up @@ -502,6 +502,29 @@ async def test_simple_call_without_requested_labels(
"present_report_labels": expected_present_report_labels,
"global_level_labels": ["applejuice", "justjuice", "orangejuice"],
}
# Now we call the task again, this time with the requested labels.
# This illustrates what should happen if we patch the labels after calculating
# And trigger the task again to save the new results
larf.requested_labels = ["tangerine", "pear", "banana", "apple"]
dbsession.flush()
res = await task.run_async(dbsession, larf.id)
expected_present_diff_labels = ["banana"]
expected_present_report_labels = ["apple", "banana"]
expected_absent_labels = ["pear", "tangerine"]
assert res == {
"absent_labels": expected_absent_labels,
"present_diff_labels": expected_present_diff_labels,
"present_report_labels": expected_present_report_labels,
"success": True,
"global_level_labels": [],
"errors": [],
}
assert larf.result == {
"absent_labels": expected_absent_labels,
"present_diff_labels": expected_present_diff_labels,
"present_report_labels": expected_present_report_labels,
"global_level_labels": [],
}


@pytest.mark.asyncio
Expand Down

0 comments on commit 6d39922

Please sign in to comment.