Skip to content

Commit

Permalink
fix: don't eager merge sessions into report (#336)
Browse files Browse the repository at this point in the history
context: https://l.codecov.dev/xvGZaM

Because we eagerly merge sessions when processing a report the following scenario can happen:
1. You upload an empty report with a flag set to carryforward
2. There will be a new session for that upload (even though it's empty)
3. In the next commit the session will be carried forward

If no real upload was made to clean up old carriedforward sessions from empty uploads
they would accumulate in the report (because `_adjust_sessions` only runs *after* the
processing is done, and *doesn't run* if it's an empty upload). What prompted this
investigation is that a customer was fund with thousands of carryforward session in
some of their commits.

So I'm exploiting the fact that merges into the report need to be made from within a lock
and getting the ID for the session _without_ actually adding it to the original report.

If the report is successful we add it and run `_adjust_sessions`. If the report was empty,
we just never add it to the report, as if it didn't existed.

OBS.: Even though I use the litle exploit I believe that passing the `sessionid` to
`ReportBuilder` is unecessary. We can likely not get a sessionid at all when processin the report
and it would still work. I decided to leave this for it's own change tho.

Also I decided to bump the severity of the messages that indicate a report is empty.
Users are not expected to upload empty reports, so it's at least sensible it should be
a warning.
  • Loading branch information
giovanni-guidini authored Mar 18, 2024
1 parent 97ce1f7 commit 77e34ba
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 39 deletions.
2 changes: 1 addition & 1 deletion services/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ def build_report_from_raw_content(
upload_obj=upload,
)
except ReportEmptyError:
log.info(
log.warning(
"Report %s is empty",
reportid,
extra=dict(repoid=commit.repoid, commit=commit.commitid),
Expand Down
10 changes: 9 additions & 1 deletion services/report/raw_upload_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,14 @@ def process_raw_upload(
else:
ignored_file_lines = None

session = session or Session()
# Get a sesisonid to merge into
# anything merged into the original_report
# will take on this sessionid
sessionid, session = original_report.add_session(session or Session())
# But we don't actually merge yet in case the report is empty.
# This is done to avoid garbage sessions to build up in the report
# How can you be sure this will be the sessionid used when you actually merge it? Remember that this piece of code runs inside a lock u.u
sessionid = original_report.next_session_number()
session.id = sessionid
if env:
session.env = dict([e.split("=", 1) for e in env.split("\n") if "=" in e])
Expand Down Expand Up @@ -182,6 +186,10 @@ def process_raw_upload(
# none of the reports processed contributed any new labels to it.
# So we assume there are no labels and just reset the _labels_index of temporary_report
temporary_report.labels_index = None
# Now we actually add the session to the original_report
# Because we know that the processing was successful
sessionid, session = original_report.add_session(session)
# Adjust sessions removed carryforward sessions that are being replaced
session_manipulation_result = _adjust_sessions(
original_report,
temporary_report,
Expand Down
2 changes: 1 addition & 1 deletion services/report/report_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def process_report(
f"worker.services.report.processors.{processor.name}.failure"
)
raise
log.info(
log.warning(
"File format could not be recognized",
extra=dict(
report_filename=name, first_line=first_line[:100], report_type=report_type
Expand Down
96 changes: 60 additions & 36 deletions services/report/tests/unit/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ def test_process_raw_upload_empty_report(self):
session=Session(flags=["fruits"]),
flags=[],
)
assert len(original_report.sessions) == 2
assert sorted(original_report.flags.keys()) == ["fruits", "unit"]
assert len(original_report.sessions) == 1
assert sorted(original_report.flags.keys()) == ["unit"]
assert original_report.files == ["file_1.go", "file_2.py"]
assert original_report.flags["unit"].totals == ReportTotals(
files=2,
Expand All @@ -215,21 +215,7 @@ def test_process_raw_upload_empty_report(self):
complexity_total=0,
diff=0,
)
assert original_report.flags["fruits"].totals == ReportTotals(
files=0,
lines=0,
hits=0,
misses=0,
partials=0,
coverage=None,
branches=0,
methods=0,
messages=0,
sessions=1,
complexity=0,
complexity_total=0,
diff=0,
)
assert original_report.flags.get("fruits") is None
general_totals, json_data = original_report.to_database()
assert general_totals == {
"f": 2,
Expand All @@ -241,7 +227,7 @@ def test_process_raw_upload_empty_report(self):
"b": 1,
"d": 0,
"M": 0,
"s": 2,
"s": 1,
"C": 10,
"N": 2,
"diff": None,
Expand Down Expand Up @@ -279,22 +265,7 @@ def test_process_raw_upload_empty_report(self):
"e": None,
"st": "uploaded",
"se": {},
},
"1": {
"t": None,
"d": None,
"a": None,
"f": ["fruits"],
"c": None,
"n": None,
"N": None,
"j": None,
"u": None,
"p": None,
"e": None,
"st": "uploaded",
"se": {},
},
}
},
}

Expand Down Expand Up @@ -1015,9 +986,8 @@ def test_process_raw_upload_multiple_raw_reports(self, mocker):


class TestProcessRawUploadCarryforwardFlags(BaseTestCase):
def test_process_raw_upload_with_carryforwarded_flags(self):
def _generate_sample_report(self) -> EditableReport:
original_report = EditableReport()
upload_flags = ["somethingold", "flag_two"]
first_file = EditableReportFile("banana.py")
first_file.append(100, ReportLine.create(1, sessions=[LineSession(0, 1)]))
first_file.append(200, ReportLine.create(0, sessions=[LineSession(0, 0)]))
Expand Down Expand Up @@ -1062,6 +1032,11 @@ def test_process_raw_upload_with_carryforwarded_flags(self):
],
"third.c": [(2, 1, None, [[1, 1, None, None, None]], None, None)],
}
return original_report

def test_process_raw_upload_with_carryforwarded_flags(self):
original_report = self._generate_sample_report()
upload_flags = ["somethingold", "flag_two"]
raw_content = dumps(
{
"coverage": {
Expand Down Expand Up @@ -1229,3 +1204,52 @@ def test_process_raw_upload_with_carryforwarded_flags(self):
(1, 1, None, [[session.id, 1, None, None, None]], None, None)
],
}

def test_process_raw_upload_carryforward_flag_empty_upload(self):
"""Tests that an upload with carryforward flag doesn't create a new session
in the original report for the flag it was uploaded with.
That is, a new session is created in the commit's report only if the upload
was properly processed
"""
original_report = self._generate_sample_report()
original_sessions = original_report.sessions
expected_sessions = {
0: Session(flags=["super"]),
1: Session(flags=["somethingold"], session_type=SessionType.carriedforward),
}
assert isinstance(original_sessions, dict)
assert list(original_sessions.keys()) == [0, 1]
assert original_sessions == expected_sessions
empty_uploaded_report = LegacyParsedRawReport(
toc=None,
env=None,
report_fixes=None,
uploaded_files=[
ParsedUploadedReportFile(
filename="/Users/path/to/app.coverage.json",
file_contents=BytesIO(
"Bogus report that shouldn't match any parser".encode()
),
),
],
)
session = Session(flags=["somethingold"])
with pytest.raises(ReportEmptyError):
process.process_raw_upload(
UserYaml(
{
"flag_management": {
"individual_flags": [
{"name": "somethingold", "carryforward": True}
]
}
}
),
original_report,
empty_uploaded_report,
["somethingold"],
session=session,
)
assert list(original_report.sessions.keys()) == [0, 1]
assert original_report.sessions == expected_sessions

0 comments on commit 77e34ba

Please sign in to comment.