Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: update sync_pull experiment #507

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 27 additions & 13 deletions tasks/sync_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import sqlalchemy.orm
from asgiref.sync import async_to_sync
from redis.exceptions import LockError
from sentry_sdk import metrics as sentry_metrics
from shared.celery_config import notify_task_name, pulls_task_name
from shared.metrics import Counter, Histogram
from shared.reports.types import Change
from shared.torngit.exceptions import TorngitClientError
from shared.yaml import UserYaml
Expand All @@ -35,6 +35,25 @@

log = logging.getLogger(__name__)

TASK_RUN_COUNT = Counter(
"sync_pull_task_run_count",
"Number of SyncPull tasks that were executed (per wait group)",
["wait_group"],
)

TASK_REJECTED_COUNT = Counter(
"sync_pull_task_rejected_count",
"Number of SyncPull tasks that were _not_ executed because they couldn't get a lock (per wait group)",
["wait_group"],
)

TIME_FOR_LOCK_HISTOGRAM = Histogram(
"sync_pull_time_to_get_lock",
"Distribution of the time a SyncPull task was waiting for the lock (per wait group)",
["wait_group"],
unit="milliseconds",
)


class PullSyncTask(BaseCodecovTask, name=pulls_task_name):
"""
Expand Down Expand Up @@ -67,26 +86,20 @@ def run_impl(
pullid = int(pullid)
repoid = int(repoid)
lock_name = f"pullsync_{repoid}_{pullid}"
wait_for_lock_timeout_seconds = 5
metrics_tag = "long_timeout"
if SYNC_PULL_LOCK_TIMEOUT.check_value(identifier=repoid):
wait_for_lock_timeout_seconds = 1
metrics_tag = "short_timeout"
wait_for_lock_timeout_seconds, metrics_tag = SYNC_PULL_LOCK_TIMEOUT.check_value(
identifier=repoid, default=(5, "long_timeout")
)
start_wait = time.monotonic()
try:
with redis_connection.lock(
lock_name,
timeout=60 * 5,
blocking_timeout=wait_for_lock_timeout_seconds,
):
sentry_metrics.incr(
"sync_pull_task_run_count", tags={"wait": metrics_tag}
)
TASK_RUN_COUNT.labels(wait_group=metrics_tag).inc()
time_to_get_lock_seconds = time.monotonic() - start_wait
sentry_metrics.distribution(
"sync_pull_task_time_to_get_lock_seconds",
time_to_get_lock_seconds,
tags={"wait": metrics_tag},
TIME_FOR_LOCK_HISTOGRAM.labels(wait_group=metrics_tag).observe(
time_to_get_lock_seconds * 1000
)
return self.run_impl_within_lock(
db_session,
Expand All @@ -101,6 +114,7 @@ def run_impl(
"Unable to acquire PullSync lock. Not retrying because pull is being synced already",
extra=dict(pullid=pullid, repoid=repoid),
)
TASK_REJECTED_COUNT.labels(wait_group=metrics_tag).inc()
return {
"notifier_called": False,
"commit_updates_done": {"merged_count": 0, "soft_deleted_count": 0},
Expand Down
Loading