Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove files after build #5680

Merged
merged 31 commits into from
Jun 7, 2019
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
061066a
Fix tests
stsewd May 8, 2019
11debc3
Add task to remove files after build
stsewd May 8, 2019
0a6445e
Fix more tests
stsewd May 8, 2019
e44120b
Merge branch 'master' into remove-files-after-build
stsewd May 9, 2019
96ef713
Fix docstring
stsewd May 9, 2019
b3c8d63
Fix typos
stsewd May 9, 2019
9bde8d3
Only remove files from the current builder
stsewd May 13, 2019
726c7ea
Merge branch 'master' into remove-files-after-build
stsewd May 13, 2019
ac79017
Merge branch 'master' into remove-files-after-build
stsewd May 14, 2019
47f2b67
Update commands
stsewd May 14, 2019
c425869
Clean files after sync versions via webhooks
stsewd May 14, 2019
3f6095d
Add feature flag and lock
stsewd May 14, 2019
2bf07e5
Remove comment
stsewd May 14, 2019
b613bbb
Remove artifacts too
stsewd May 14, 2019
955286a
Merge branch 'master' into remove-files-after-build
stsewd May 17, 2019
9ba356c
Move lock to top level tasks
stsewd May 17, 2019
f806708
Merge branch 'master' into remove-files-after-build
stsewd May 30, 2019
ce44698
Put log.info
stsewd May 30, 2019
3245cd0
Merge branch 'master' into remove-files-after-build
stsewd Jun 3, 2019
cd350c2
Use try/finally blocks to run clean_build_files in the same server
stsewd Jun 4, 2019
8558c20
Revert lock
stsewd Jun 4, 2019
caeb3ab
Untouch files
stsewd Jun 4, 2019
faedd02
Merge branch 'master' into remove-files-after-build
stsewd Jun 5, 2019
eca17fe
Merge branch 'master' into remove-files-after-build
stsewd Jun 5, 2019
a65dc38
Refactor
stsewd Jun 6, 2019
2d6352d
Merge branch 'master' into remove-files-after-build
stsewd Jun 6, 2019
167dcbf
Fix tests
stsewd Jun 6, 2019
478935d
Fix mock
stsewd Jun 6, 2019
9ca1107
Add tests
stsewd Jun 6, 2019
3e3a532
Ignore artifacts path
stsewd Jun 6, 2019
2d045fa
Typo
stsewd Jun 7, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions readthedocs/core/management/commands/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ def handle(self, *args, **options):
for slug in args:
version = utils.version_from_slug(slug, LATEST)
tasks.sync_repository_task(version.pk)
tasks.clean_build_task(version.pk)
6 changes: 6 additions & 0 deletions readthedocs/core/management/commands/update_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def handle(self, *args, **options):
build_pk=build.pk,
version_pk=version.pk,
)
tasks.clean_build_task(version.pk)
else:
p = Project.all_objects.get(slug=slug)
log.info('Building %s', p)
Expand All @@ -94,6 +95,7 @@ def handle(self, *args, **options):
force=force,
version_pk=version.pk,
)
tasks.clean_build_task(version.pk)
else:
log.info('Updating all docs')
for project in Project.objects.all():
Expand All @@ -102,3 +104,7 @@ def handle(self, *args, **options):
project.pk,
force=force,
)
version = project.versions.get(
slug=project.get_default_version()
)
tasks.clean_build_task(version.pk)
3 changes: 2 additions & 1 deletion readthedocs/core/management/commands/update_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.core.management.base import BaseCommand

from readthedocs.builds.models import Version
from readthedocs.projects.tasks import update_docs_task
from readthedocs.projects.tasks import clean_build_task, update_docs_task


class Command(BaseCommand):
Expand All @@ -20,3 +20,4 @@ def handle(self, *args, **options):
record=False,
version_pk=version.pk,
)
clean_build_task(version.pk)
2 changes: 1 addition & 1 deletion readthedocs/core/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def prepare_build(
),
clean_build_task.signature(
humitos marked this conversation as resolved.
Show resolved Hide resolved
args=(version.pk,),
inmutable=True,
immutable=True,
)
),
build,
Expand Down
15 changes: 13 additions & 2 deletions readthedocs/core/views/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import re

from celery import chain
from django.http import HttpResponse, HttpResponseNotFound
from django.shortcuts import redirect
from django.views.decorators.csrf import csrf_exempt
Expand All @@ -12,7 +13,7 @@
from readthedocs.core.utils import trigger_build
from readthedocs.projects import constants
from readthedocs.projects.models import Feature, Project
from readthedocs.projects.tasks import sync_repository_task
from readthedocs.projects.tasks import clean_build_task, sync_repository_task


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -100,7 +101,17 @@ def sync_versions(project):
if not version:
log.info('Unable to sync from %s version', version_identifier)
return None
sync_repository_task.delay(version.pk)
task = chain(
sync_repository_task.signature(
args=(version.pk,),
immutable=True,
),
clean_build_task.signature(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a doubt here.

I understand that when we receive a webhook this task is called to sync the versions. So, if the default_version is triggered for a build as well, it could happen that this clean_build_task removes the files while the build is still building.

In other words, since calling sync_versions may trigger a build for stable version, I think that there is one case where this could break. Consider this scenario:

  1. the project default_version is set as stable
  2. we receive a webhook that calls sync_versions and it triggers a build for stable
  3. the build for the stable version starts in the same builder
  4. clean_build_task is called with stable version because it was the default_version of the project
  5. files for stable version are removed from the builder while it was building

I understand that if we merge #5695 as well (lock the whole build), step number 5) won't happen because clean_build_task tries to acquire the lock at this line: https://github.com/rtfd/readthedocs.org/pull/5680/files/0a6445edb9621256f28f673e1a9ae8d4a3831704..b613bbb98f99fb507fbe84826d8fa90de2db4358#diff-b9399e1d3499066c5564f98a620e8881R1434

Please, double check this and confirm if I'm right or wrong. Although, it seems to be a very improbable case :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably we should only be cleaning up the files for the specific version we're building?

This is another vote for single build per builder in my mind. We're wasting a lot of time worrying about this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm acquiring a lock for the clean build task in

https://github.com/rtfd/readthedocs.org/pull/5680/files#diff-b9399e1d3499066c5564f98a620e8881R1434

And skipping the deletion if there is another task with a lock. That avoids the problem of deleting stuff if we have another task expecting files to be there.

I only need to update the code acquire a lock in the sync_versions task after #5695, that way clean_build will skip the deletion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably we should only be cleaning up the files for the specific version we're building?

We are doing that

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And skipping the deletion if there is another task with a lock. That avoids the problem of deleting stuff if we have another task expecting files to be there.

Since we are not locking the whole build, this can still happen in between the locks, right?

args=(version.pk,),
immutable=True,
),
)
task.delay()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line will trigger a chain of sync_repository_task and clean_build_task. How do we guarantee that these tasks are going to be executed both in the same server?

return version.slug
except Exception:
log.exception('Unknown sync versions exception')
Expand Down
7 changes: 6 additions & 1 deletion readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,7 @@ def add_features(sender, **kwargs):
SHARE_SPHINX_DOCTREE = 'share_sphinx_doctree'
USE_PDF_LATEXMK = 'use_pdf_latexmk'
DEFAULT_TO_MKDOCS_0_17_3 = 'default_to_mkdocs_0_17_3'
CLEAN_AFTER_BUILD = 'clean_after_build'

FEATURES = (
(USE_SPHINX_LATEST, _('Use latest version of Sphinx')),
Expand Down Expand Up @@ -1395,7 +1396,11 @@ def add_features(sender, **kwargs):
),
(
DEFAULT_TO_MKDOCS_0_17_3,
_('Install mkdocs 0.17.3 by default')
_('Install mkdocs 0.17.3 by default'),
),
(
CLEAN_AFTER_BUILD,
_('Clean all files used in the build process'),
),
)

Expand Down
21 changes: 14 additions & 7 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
)
from readthedocs.doc_builder.loader import get_builder_class
from readthedocs.doc_builder.python_environments import Conda, Virtualenv
from readthedocs.projects.models import APIProject
from readthedocs.projects.models import APIProject, Feature
from readthedocs.sphinx_domains.models import SphinxDomain
from readthedocs.vcs_support import utils as vcs_support_utils
from readthedocs.worker import app
Expand Down Expand Up @@ -1417,18 +1417,25 @@ def warn(self, msg):
delete_queryset.delete()


@app.task(max_retries=5, default_retry_delay=7 * 60)
@app.task(max_retries=3, default_retry_delay=7 * 60)
def clean_build_task(version_id):
stsewd marked this conversation as resolved.
Show resolved Hide resolved
"""Clean the build artifacts of the given version."""
"""Clean the files used in the build of the given version."""
version = Version.objects.get_object_or_log(pk=version_id)
if not version:
if (
not version or
not version.project.has_feature(Feature.CLEAN_AFTER_BUILD)
):
return
stsewd marked this conversation as resolved.
Show resolved Hide resolved
del_dirs = [
os.path.join(version.project.doc_path, dir_, version.slug)
for dir_ in ('checkout', 'envs', 'conda')
for dir_ in ('checkouts', 'artifacts', 'envs', 'conda')
stsewd marked this conversation as resolved.
Show resolved Hide resolved
]
for del_dir in del_dirs:
broadcast(type='build', task=remove_dirs, args=[(del_dir,)])
try:
with version.project.repo_nonblockinglock(version):
log.info('Removing: %s', del_dirs)
remove_dirs(del_dirs)
except vcs_support_utils.LockTimeout:
log.info('Another task is running. Not removing: %s', del_dirs)


def _manage_imported_files(version, path, commit):
Expand Down
124 changes: 108 additions & 16 deletions readthedocs/rtd_tests/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,8 +873,12 @@ def test_github_webhook_for_tags(self, trigger_build):
[mock.call(force=True, version=self.version_tag, project=self.project)],
)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
def test_github_create_event(self, sync_repository_task, trigger_build):
@mock.patch('readthedocs.core.views.hooks.chain')
def test_github_create_event(
self, chain, sync_repository_task, clean_build_task, trigger_build
):
client = APIClient()

headers = {GITHUB_EVENT_HEADER: GITHUB_CREATE}
Expand All @@ -890,10 +894,23 @@ def test_github_create_event(self, sync_repository_task, trigger_build):
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
def test_github_delete_event(self, sync_repository_task, trigger_build):
@mock.patch('readthedocs.core.views.hooks.chain')
def test_github_delete_event(
self, chain, sync_repository_task, clean_build_task, trigger_build
):
client = APIClient()

headers = {GITHUB_EVENT_HEADER: GITHUB_DELETE}
Expand All @@ -909,7 +926,16 @@ def test_github_delete_event(self, sync_repository_task, trigger_build):
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

def test_github_parse_ref(self, trigger_build):
wh = GitHubWebhookView()
Expand Down Expand Up @@ -1097,9 +1123,11 @@ def test_gitlab_webhook_for_tags(self, trigger_build):
)
trigger_build.assert_not_called()

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
@mock.patch('readthedocs.core.views.hooks.chain')
def test_gitlab_push_hook_creation(
self, sync_repository_task, trigger_build,
self, chain, sync_repository_task, clean_build_task, trigger_build,
):
client = APIClient()
self.gitlab_payload.update(
Expand All @@ -1117,11 +1145,22 @@ def test_gitlab_push_hook_creation(
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
@mock.patch('readthedocs.core.views.hooks.chain')
def test_gitlab_push_hook_deletion(
self, sync_repository_task, trigger_build,
self, chain, sync_repository_task, clean_build_task, trigger_build,
):
client = APIClient()
self.gitlab_payload.update(
Expand All @@ -1139,11 +1178,22 @@ def test_gitlab_push_hook_deletion(
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
@mock.patch('readthedocs.core.views.hooks.chain')
def test_gitlab_tag_push_hook_creation(
self, sync_repository_task, trigger_build,
self, chain, sync_repository_task, clean_build_task, trigger_build,
):
client = APIClient()
self.gitlab_payload.update(
Expand All @@ -1162,11 +1212,22 @@ def test_gitlab_tag_push_hook_creation(
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
@mock.patch('readthedocs.core.views.hooks.chain')
def test_gitlab_tag_push_hook_deletion(
self, sync_repository_task, trigger_build,
self, chain, sync_repository_task, clean_build_task, trigger_build,
):
client = APIClient()
self.gitlab_payload.update(
Expand All @@ -1185,7 +1246,16 @@ def test_gitlab_tag_push_hook_deletion(
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

def test_gitlab_invalid_webhook(self, trigger_build):
"""GitLab webhook unhandled event."""
Expand Down Expand Up @@ -1335,9 +1405,11 @@ def test_bitbucket_webhook(self, trigger_build):
)
self.assertEqual(trigger_build_call_count, trigger_build.call_count)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
@mock.patch('readthedocs.core.views.hooks.chain')
def test_bitbucket_push_hook_creation(
self, sync_repository_task, trigger_build,
self, chain, sync_repository_task, clean_build_task, trigger_build,
):
client = APIClient()
self.bitbucket_payload['push']['changes'][0]['old'] = None
Expand All @@ -1352,11 +1424,22 @@ def test_bitbucket_push_hook_creation(
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

@mock.patch('readthedocs.core.views.hooks.clean_build_task')
@mock.patch('readthedocs.core.views.hooks.sync_repository_task')
@mock.patch('readthedocs.core.views.hooks.chain')
def test_bitbucket_push_hook_deletion(
self, sync_repository_task, trigger_build,
self, chain, sync_repository_task, clean_build_task, trigger_build,
):
client = APIClient()
self.bitbucket_payload['push']['changes'][0]['new'] = None
Expand All @@ -1371,7 +1454,16 @@ def test_bitbucket_push_hook_deletion(
self.assertEqual(resp.data['versions'], [LATEST])
trigger_build.assert_not_called()
latest_version = self.project.versions.get(slug=LATEST)
sync_repository_task.delay.assert_called_with(latest_version.pk)
chain.assert_called_with(
sync_repository_task.signature(
args=(latest_version.pk,),
immutable=True,
),
clean_build_task.signature(
args=(latest_version.pk,),
immutable=True,
),
)

def test_bitbucket_invalid_webhook(self, trigger_build):
"""Bitbucket webhook unhandled event."""
Expand Down