Skip to content

Commit

Permalink
docs: Add comment in case we observe the manageprocess command being …
Browse files Browse the repository at this point in the history
…too slow due to the synchronous deletion of large numbers of files
  • Loading branch information
jpmckinney committed Nov 7, 2024
1 parent 332f039 commit f223dbc
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion data_registry/process_manager/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,14 @@ def process(collection: models.Collection) -> None:

logger.debug("Job %s has succeeded (%s: %s)", job, country, collection)

other_jobs = collection.job_set.exclude(pk=job.pk)
# Keep the other most recent successful job as backup.
other_jobs = collection.job_set.exclude(pk=job.pk)
backup_job = other_jobs.successful().order_by("start").values_list("pk", flat=True).last()
if backup_job:
other_jobs = other_jobs.exclude(pk=backup_job)

# There must be at most one incomplete job per collection, for deletion to not conflict with iteration.
for old_job in other_jobs.filter(start__lt=now() - datetime.timedelta(days=365)):
# Note: The Collect task's wipe() method can be slow.
old_job.delete()
logger.debug("Old job %s has been deleted (%s: %s)", old_job, country, collection)

0 comments on commit f223dbc

Please sign in to comment.