Skip to content

Commit

Permalink
Merge pull request #10 from codecov/matt/platform-team-issue-91
Browse files Browse the repository at this point in the history
fix: sync_repos: add new repos to db when sync_repos=True instead of only updating existing
  • Loading branch information
matt-codecov authored Jul 19, 2023
2 parents 5fce60c + 4c3d00a commit bdf7ceb
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 65 deletions.
6 changes: 3 additions & 3 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
git+ssh://[email protected]/codecov/shared.git@30212ca03f4cc53e1dd64434b7f77cec2e300e3b#egg=shared
git+ssh://[email protected]/codecov/shared.git@bc9933c6d61555ab36f310103feb6edd381d928f#egg=shared
git+ssh://[email protected]/codecov/[email protected]#egg=codecovopentelem
boto3
celery
click
coverage
factory-boy
google-cloud-storage
google-cloud-storage>=2.10.0
httpx
analytics-python==1.3.0b1
lxml>=4.9.1
Expand All @@ -32,4 +32,4 @@ stripe
timestring
vcrpy
opentelemetry-instrumentation-celery
opentelemetry-sdk
opentelemetry-sdk
35 changes: 15 additions & 20 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,32 +77,33 @@ deprecated==1.2.12
# via opentelemetry-api
ecdsa==0.16.1
# via tlslite-ng
exceptiongroup==1.0.0
# via pytest
factory-boy==3.2.0
# via -r requirements.in
faker==8.8.2
# via factory-boy
freezegun==1.2.2
# via pytest-freezegun
google-api-core==1.26.1
# via google-cloud-core
google-auth==1.27.1
google-api-core==2.11.1
# via
# google-cloud-core
# google-cloud-storage
google-auth==2.21.0
# via
# google-api-core
# google-cloud-core
# google-cloud-storage
google-cloud-core==1.6.0
# shared
google-cloud-core==2.3.3
# via google-cloud-storage
google-cloud-storage==1.36.2
google-cloud-storage==2.10.0
# via
# -r requirements.in
# shared
google-crc32c==1.1.2
# via google-resumable-media
google-resumable-media==1.2.0
google-resumable-media==2.5.0
# via google-cloud-storage
googleapis-common-protos==1.53.0
googleapis-common-protos==1.59.1
# via google-api-core
h11==0.12.0
# via httpcore
Expand Down Expand Up @@ -162,16 +163,15 @@ opentelemetry-semantic-conventions==0.23b2
# opentelemetry-instrumentation-celery
# opentelemetry-sdk
packaging==20.9
# via
# google-api-core
# pytest
# via pytest
pluggy==0.13.1
# via pytest
prompt-toolkit==3.0.28
# via click-repl
protobuf==3.20.3
# via
# google-api-core
# googleapis-common-protos
# shared
psycopg2==2.9.3
# via -r requirements.in
Expand Down Expand Up @@ -221,9 +221,8 @@ python-json-logger==0.1.11
pytz==2022.1
# via
# celery
# google-api-core
# timestring
pyyaml==6.0
pyyaml==6.0.1
# via
# -r requirements.in
# vcrpy
Expand All @@ -247,17 +246,14 @@ s3transfer==0.3.4
# via boto3
sentry-sdk==1.19.1
# via -r requirements.in
shared @ git+ssh://[email protected]/codecov/shared.git@30212ca03f4cc53e1dd64434b7f77cec2e300e3b
shared @ git+ssh://[email protected]/codecov/shared.git@bc9933c6d61555ab36f310103feb6edd381d928f
# via -r requirements.in
six==1.15.0
# via
# analytics-python
# click-repl
# ecdsa
# google-api-core
# google-auth
# google-cloud-core
# google-resumable-media
# python-dateutil
# sqlalchemy-utils
# vcrpy
Expand Down Expand Up @@ -287,15 +283,14 @@ timestring==1.6.4
# via -r requirements.in
tlslite-ng==0.8.0-alpha39
# via shared
tomli==2.0.1
# via pytest
typing==3.7.4.3
# via shared
typing-extensions==4.6.3
# via shared
urllib3==1.26.13
# via
# botocore
# google-auth
# minio
# requests
# sentry-sdk
Expand Down
51 changes: 41 additions & 10 deletions tasks/sync_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from redis.exceptions import LockError
from shared.celery_config import sync_repos_task_name
from shared.torngit.exceptions import TorngitClientError
from sqlalchemy import and_

from app import celery_app
from database.models import Owner, Repository
Expand Down Expand Up @@ -77,17 +78,47 @@ async def run_async(
log.warning("Unable to sync repos because another task is already doing it")

async def sync_repos_using_integration(self, db_session, git, ownerid, username):
repo_service_ids = await git.list_repos_using_installation(username)
if repo_service_ids:
repo_service_ids = list(map(str, repo_service_ids))
if repo_service_ids:
db_session.query(Repository).filter(
Repository.ownerid == ownerid,
Repository.service_id.in_(repo_service_ids),
Repository.using_integration.isnot(True),
).update(
{Repository.using_integration: True}, synchronize_session=False
repos = await git.list_repos_using_installation(username)
if repos:
service_ids = {repo["id"] for repo in repos}
if service_ids:
# Querying through the `Repository` model is cleaner, but we
# need to go through the table object instead if we want to
# use a Postgres `RETURNING` clause like this.
table = Repository.__table__
update_statement = (
table.update()
.returning(table.columns.service_id)
.where(
and_(
table.columns.ownerid == ownerid,
table.columns.service_id.in_(service_ids),
)
)
.values(using_integration=True)
)
result = db_session.execute(update_statement)
updated_service_ids = {r[0] for r in result.fetchall()}

# The set of repos our app can see minus the set of repos we
# just updated = the set of repos we need to insert.
missing_service_ids = service_ids - updated_service_ids
missing_repos = [
repo for repo in repos if repo["id"] in missing_service_ids
]

for repo in missing_repos:
new_repo = Repository(
ownerid=ownerid,
service_id=repo["id"],
name=repo["name"],
language=repo["language"],
private=repo["private"],
branch=repo["default_branch"],
using_integration=True,
)
db_session.add(new_repo)
db_session.flush()
else:
db_session.query(Repository).filter(
Repository.ownerid == ownerid, Repository.using_integration.is_(True)
Expand Down
78 changes: 46 additions & 32 deletions tasks/tests/unit/test_sync_repos_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,11 @@ async def test_only_public_repos_not_in_db(

@pytest.mark.asyncio
async def test_sync_repos_using_integration(
self, mocker, mock_configuration, dbsession, codecov_vcr, mock_redis
self,
mocker,
dbsession,
mock_owner_provider,
mock_redis,
):
token = "ecd73a086eadc85db68747a66bdbd662a785a072"
user = OwnerFactory.create(
Expand All @@ -495,48 +499,58 @@ async def test_sync_repos_using_integration(
)
dbsession.add(user)

repo_pytest = RepositoryFactory.create(
private=False,
name="pytest",
using_integration=False,
service_id="159089634",
owner=user,
)
repo_spack = RepositoryFactory.create(
private=False,
name="spack",
using_integration=True,
service_id="164948070",
owner=user,
)
repo_pub = RepositoryFactory.create(
private=False,
name="pub",
using_integration=None,
service_id="213786132",
owner=user,
)
dbsession.add(repo_pytest)
dbsession.add(repo_spack)
dbsession.add(repo_pub)
def repo_obj(service_id, name, language, private, branch, using_integration):
return {
"id": service_id,
"name": name,
"language": language,
"private": private,
"default_branch": branch,
"using_integration": using_integration,
}

mock_repos = [
repo_obj("159089634", "pytest", "python", False, "main", True),
repo_obj("164948070", "spack", "python", False, "develop", False),
repo_obj("213786132", "pub", "dart", False, "master", None),
repo_obj("555555555", "soda", "python", False, "main", None),
]

# Mock GitHub response for repos that are visible to our app
mock_owner_provider.list_repos_using_installation.return_value = mock_repos

# Three of the four repositories we can see are already in the database.
# Will we update `using_integration` correctly?
preseeded_repos = []
for repo in mock_repos[:-1]:
preseeded_repos.append(
RepositoryFactory.create(
private=repo["private"],
name=repo["name"],
using_integration=repo["using_integration"],
service_id=repo["id"],
owner=user,
)
)

for repo in preseeded_repos:
dbsession.add(repo)
dbsession.flush()

await SyncReposTask().run_async(
dbsession, ownerid=user.ownerid, using_integration=True
)

dbsession.commit()

repos = (
dbsession.query(Repository)
.filter(
Repository.service_id.in_(
(repo_pytest.service_id, repo_spack.service_id, repo_pub.service_id)
)
)
.filter(Repository.service_id.in_((repo["id"] for repo in mock_repos)))
.all()
)
assert len(repos) == 3

# We pre-seeded 3 repos in the database, but we should have added the
# 4th based on our GitHub response
assert len(repos) == 4

assert user.permission == [] # there were no private repos
for repo in repos:
Expand Down

0 comments on commit bdf7ceb

Please sign in to comment.