Skip to content

Commit

Permalink
feat: move Pull.flare to storage (#57)
Browse files Browse the repository at this point in the history
This commit is the initial work to move `Pull.flare` to storage.
This column has crashed the DB in only 1 occasion in the past.

Notice that Pull doesn't have `commit` (because multiple commits refer to the same pull).
To deal with that I changed the writing of data to have 2 paths: one with commit, and one
without the commit. So pulls data will be written in the repo level (in storage).

closes codecov/engineering-team#70
  • Loading branch information
giovanni-guidini committed Aug 15, 2023
1 parent d16beed commit 3dbbeed
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 20 deletions.
30 changes: 30 additions & 0 deletions database/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,36 @@ def get_head_commit_notifications(self):
)
return []

def get_repository(self):
return self.repository

def get_commitid(self):
return None

@property
def external_id(self):
return self.pullid

@property
def id(self):
return self.id_

def should_write_to_storage(self) -> bool:
if self.repository is None or self.repository.owner is None:
return False
is_codecov_repo = self.repository.owner.username == "codecov"
return should_write_data_to_storage_config_check(
master_switch_key="pull_flare",
is_codecov_repo=is_codecov_repo,
repoid=self.repository.repoid,
)

_flare = Column("flare", postgresql.JSON)
_flare_storage_path = Column("flare_storage_path", types.Text, nullable=True)
flare = ArchiveField(
should_write_to_storage_fn=should_write_to_storage, default_value={}
)


class CommitNotification(CodecovBaseModel):
__tablename__ = "commit_notifications"
Expand Down
11 changes: 9 additions & 2 deletions database/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import logging
from functools import lru_cache
from typing import Any, Callable
from typing import Any, Callable, Optional

from shared.storage.exceptions import FileNotInStorageError
from shared.utils.ReportEncoder import ReportEncoder
Expand All @@ -18,16 +18,23 @@ def __subclasscheck__(cls, subclass):
and callable(subclass.get_repository)
and hasattr(subclass, "get_commitid")
and callable(subclass.get_commitid)
and hasattr(subclass, "external_id")
)


class ArchiveFieldInterface(metaclass=ArchiveFieldInterfaceMeta):
"""Any class that uses ArchiveField must implement this interface"""

external_id: str

def get_repository(self):
"""Returns the repository object associated with self"""
raise NotImplementedError()

def get_commitid(self) -> str:
def get_commitid(self) -> Optional[str]:
"""Returns the commitid associated with self.
If no commitid is associated return None.
"""
raise NotImplementedError()


Expand Down
35 changes: 22 additions & 13 deletions services/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
class MinioEndpoints(Enum):
chunks = "{version}/repos/{repo_hash}/commits/{commitid}/{chunks_file_name}.txt"
json_data = "{version}/repos/{repo_hash}/commits/{commitid}/json_data/{table}/{field}/{external_id}.json"
json_data_no_commit = (
"{version}/repos/{repo_hash}/json_data/{table}/{field}/{external_id}.json"
)
profiling_summary = "{version}/repos/{repo_hash}/profilingsummaries/{profiling_commit_id}/{location}"
raw = "v4/raw/{date}/{repo_hash}/{commit_sha}/{reportid}.txt"
profiling_collection = "{version}/repos/{repo_hash}/profilingcollections/{profiling_commit_id}/{location}"
Expand Down Expand Up @@ -200,20 +203,26 @@ def write_json_data_to_storage(
*,
encoder=ReportEncoder,
):
path = MinioEndpoints.json_data.get_path(
version="v4",
repo_hash=self.storage_hash,
commitid=commit_id,
table=table,
field=field,
external_id=external_id,
)
stringified_data = json.dumps(data, cls=encoder)
if table == "reports_reportdetails" and not ("meta" in stringified_data):
log.warning(
"Saving files_array data without meta",
extra=dict(commit=commit_id, data=stringified_data, path=path),
if commit_id is None:
# Some classes don't have a commit associated with them
# For example Pull belongs to multiple commits.
path = MinioEndpoints.json_data_no_commit.get_path(
version="v4",
repo_hash=self.storage_hash,
table=table,
field=field,
external_id=external_id,
)
else:
path = MinioEndpoints.json_data.get_path(
version="v4",
repo_hash=self.storage_hash,
commitid=commit_id,
table=table,
field=field,
external_id=external_id,
)
stringified_data = json.dumps(data, cls=encoder)
self.write_file(path, stringified_data)
return path

Expand Down
15 changes: 10 additions & 5 deletions services/tests/test_repository_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,8 @@ async def test_fetch_and_update_pull_request_information_from_commit_new_pull_co
assert res.head == commit.commitid
assert res.commentid is None
assert res.diff is None
assert res.flare is None
assert res._flare is None
assert res._flare_storage_path is None
assert (
res.author
== dbsession.query(Owner)
Expand Down Expand Up @@ -1007,7 +1008,8 @@ async def test_fetch_and_update_pull_request_information_from_commit_existing_pu
assert res.head == commit.commitid
assert res.commentid is None
assert res.diff is None
assert res.flare is None
assert res._flare is None
assert res._flare_storage_path is None
assert (
res.author
== dbsession.query(Owner)
Expand Down Expand Up @@ -1091,7 +1093,8 @@ async def test_fetch_and_update_pull_request_multiple_pulls_same_repo(
assert res.head == commit.commitid
assert res.commentid is None
assert res.diff is None
assert res.flare is None
assert res._flare is None
assert res._flare_storage_path is None
assert (
res.author
== dbsession.query(Owner)
Expand Down Expand Up @@ -1181,7 +1184,8 @@ async def test_fetch_and_update_pull_request_information_from_commit_different_c
assert res.head == commit.commitid
assert res.commentid is None
assert res.diff is None
assert res.flare is None
assert res._flare is None
assert res._flare_storage_path is None
assert (
res.author
== dbsession.query(Owner)
Expand Down Expand Up @@ -1252,7 +1256,8 @@ async def test_fetch_and_update_pull_request_information_no_compared_to(
assert res.head is None
assert res.commentid is None
assert res.diff is None
assert res.flare is None
assert res._flare is None
assert res._flare_storage_path is None
assert (
res.author
== dbsession.query(Owner)
Expand Down
50 changes: 50 additions & 0 deletions services/tests/unit/test_archive_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,53 @@ def test_write_report_details_to_storage(self, mocker, dbsession):
is_already_gzipped=False,
reduced_redundancy=False,
)

def test_write_report_details_to_storage_no_commitid(self, mocker, dbsession):
repo = RepositoryFactory()
dbsession.add(repo)
dbsession.flush()
mock_write_file = mocker.patch.object(MinioStorageService, "write_file")

data = [
{
"filename": "file_1.go",
"file_index": 0,
"file_totals": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2, 0],
"session_totals": {
"0": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2],
"meta": {"session_count": 1},
},
"diff_totals": None,
},
{
"filename": "file_2.py",
"file_index": 1,
"file_totals": [0, 2, 1, 0, 1, "50.00000", 1, 0, 0, 0, 0, 0, 0],
"session_totals": {
"0": [0, 2, 1, 0, 1, "50.00000", 1],
"meta": {"session_count": 1},
},
"diff_totals": None,
},
]
archive_service = ArchiveService(repository=repo)
commitid = None
external_id = "some-uuid4-id"
path = archive_service.write_json_data_to_storage(
commit_id=commitid,
table="reports_reportdetails",
field="files_array",
external_id=external_id,
data=data,
)
assert (
path
== f"v4/repos/{archive_service.storage_hash}/json_data/reports_reportdetails/files_array/{external_id}.json"
)
mock_write_file.assert_called_with(
archive_service.root,
path,
json.dumps(data),
is_already_gzipped=False,
reduced_redundancy=False,
)

0 comments on commit 3dbbeed

Please sign in to comment.