Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix empty url_cache_thumbnails/yyyy-mm-dd/ directories being left behind #10924

Merged
merged 6 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10924.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where empty `yyyy-mm-dd/` directories would be left behind in the media store's `url_cache_thumbnails/` directory.
74 changes: 43 additions & 31 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@

ONE_HOUR = 60 * 60 * 1000
ONE_DAY = 24 * ONE_HOUR
IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY


@attr.s(slots=True, frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -496,6 +497,27 @@ async def _expire_url_cache_data(self) -> None:
logger.info("Still running DB updates; skipping expiry")
return

def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
"""Attempt to remove the given chain of parent directories

Args:
dirs: The list of directory paths to delete, with children appearing
before their parents.
"""
for dir in dirs:
try:
os.rmdir(dir)
except FileNotFoundError:
# Already deleted, continue with deleting the rest
pass
except OSError as e:
# Failed, skip deleting the rest of the parent dirs
if e.errno != errno.ENOTEMPTY:
logger.warning(
"Failed to remove media directory: %r: %s", dir, e
)
break

# First we delete expired url cache entries
media_ids = await self.store.get_expired_url_cache(now)

Expand All @@ -504,20 +526,16 @@ async def _expire_url_cache_data(self) -> None:
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

removed_media.append(media_id)

try:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
os.rmdir(dir)
except Exception:
pass
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
try_remove_parent_dirs(dirs)

await self.store.delete_url_cache(removed_media)

Expand All @@ -530,44 +548,38 @@ async def _expire_url_cache_data(self) -> None:
# These may be cached for a bit on the client (i.e., they
# may have a room open with a preview url thing open).
# So we wait a couple of days before deleting, just in case.
expire_before = now - 2 * ONE_DAY
expire_before = now - IMAGE_CACHE_EXPIRY_MS
media_ids = await self.store.get_url_cache_media_before(expire_before)

removed_media = []
for media_id in media_ids:
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

try:
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
for dir in dirs:
os.rmdir(dir)
except Exception:
pass
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
try_remove_parent_dirs(dirs)

thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
try:
shutil.rmtree(thumbnail_dir)
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
# If the path doesn't exist, meh
if e.errno != errno.ENOENT:
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue
logger.warning("Failed to remove media: %r: %s", media_id, e)
continue

removed_media.append(media_id)

try:
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
for dir in dirs:
os.rmdir(dir)
except Exception:
pass
dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
# Note that one of the directories to be deleted has already been
# removed by the `rmtree` above.
try_remove_parent_dirs(dirs)

await self.store.delete_url_cache_media(removed_media)

Expand Down
31 changes: 31 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
from twisted.test.proto_helpers import AccumulatingProtocol

from synapse.config.oembed import OEmbedEndpointConfig
from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
from synapse.util.stringutils import parse_and_validate_mxc_uri

from tests import unittest
from tests.server import FakeTransport
from tests.test_utils import SMALL_PNG
from tests.utils import MockClock

try:
import lxml
Expand Down Expand Up @@ -851,3 +853,32 @@ def test_storage_providers_exclude_thumbnails(self):
404,
"URL cache thumbnail was unexpectedly retrieved from a storage provider",
)

def test_cache_expiry(self):
"""Test that URL cache files and thumbnails are cleaned up properly on expiry."""
self.preview_url.clock = MockClock()

_host, media_id = self._download_image()

file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
media_id
)
thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
media_id
)
thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
media_id
)

self.assertTrue(os.path.isfile(file_path))
self.assertTrue(os.path.isdir(thumbnail_dir))

self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
self.get_success(self.preview_url._expire_url_cache_data())

for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
self.assertFalse(
os.path.exists(path),
f"{os.path.relpath(path, self.media_store_path)} was not deleted",
)