From d387b8eaf1d6b03d9d816eff9e6df4384e8c9151 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 27 Sep 2021 12:48:32 +0100 Subject: [PATCH 1/6] Catch `FileNotFoundError`s instead of errnos in `_expire_url_cache_data` --- synapse/rest/media/v1/preview_url_resource.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 79a42b24556e..3402b03b286a 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import datetime -import errno import fnmatch import itertools import logging @@ -504,11 +503,11 @@ async def _expire_url_cache_data(self) -> None: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) + except FileNotFoundError: + pass # If the path doesn't exist, meh except OSError as e: - # If the path doesn't exist, meh - if e.errno != errno.ENOENT: - logger.warning("Failed to remove media: %r: %s", media_id, e) - continue + logger.warning("Failed to remove media: %r: %s", media_id, e) + continue removed_media.append(media_id) @@ -538,11 +537,11 @@ async def _expire_url_cache_data(self) -> None: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) + except FileNotFoundError: + pass # If the path doesn't exist, meh except OSError as e: - # If the path doesn't exist, meh - if e.errno != errno.ENOENT: - logger.warning("Failed to remove media: %r: %s", media_id, e) - continue + logger.warning("Failed to remove media: %r: %s", media_id, e) + continue try: dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) @@ -554,11 +553,11 @@ async def _expire_url_cache_data(self) -> None: thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) try: shutil.rmtree(thumbnail_dir) + except FileNotFoundError: + pass # If the path doesn't exist, meh except OSError as e: - # If the path doesn't exist, meh - if e.errno != errno.ENOENT: - logger.warning("Failed to remove media: %r: %s", media_id, e) - continue + logger.warning("Failed to remove media: %r: %s", media_id, e) + continue removed_media.append(media_id) From ed23655a3d2c25feecad5974be5f70bf328226f3 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 27 Sep 2021 12:50:13 +0100 Subject: [PATCH 2/6] Extract 2 day expiry threshold into a constant --- synapse/rest/media/v1/preview_url_resource.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 3402b03b286a..e2e9e052bbad 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -72,6 +72,7 @@ ONE_HOUR = 60 * 60 * 1000 ONE_DAY = 24 * ONE_HOUR +IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -529,7 +530,7 @@ async def _expire_url_cache_data(self) -> None: # These may be cached for a bit on the client (i.e., they # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. - expire_before = now - 2 * ONE_DAY + expire_before = now - IMAGE_CACHE_EXPIRY_MS media_ids = await self.store.get_url_cache_media_before(expire_before) removed_media = [] From 02d6f54abffa9d0e2910d5ba67e5ea4d5b77d336 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 27 Sep 2021 12:50:38 +0100 Subject: [PATCH 3/6] Fix empty `url_cache_thumbnails/yyyy-mm-dd/` directories being left behind --- synapse/rest/media/v1/preview_url_resource.py | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index e2e9e052bbad..8df617230306 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -512,12 +512,14 @@ async def _expire_url_cache_data(self) -> None: removed_media.append(media_id) - try: - dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) - for dir in dirs: + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + for dir in dirs: + try: os.rmdir(dir) - except Exception: - pass + except FileNotFoundError: + pass # Already deleted, continue with deleting the rest + except Exception: + break # Failed, skip deleting the rest of the parent dirs await self.store.delete_url_cache(removed_media) @@ -544,12 +546,14 @@ async def _expire_url_cache_data(self) -> None: logger.warning("Failed to remove media: %r: %s", media_id, e) continue - try: - dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) - for dir in dirs: + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + for dir in dirs: + try: os.rmdir(dir) - except Exception: - pass + except FileNotFoundError: + pass # Already deleted, continue with deleting the rest + except Exception: + break # Failed, skip deleting the rest of the parent dirs thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) try: @@ -562,12 +566,16 @@ async def _expire_url_cache_data(self) -> None: removed_media.append(media_id) - try: - dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) - for dir in dirs: + dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) + # Note that one of the directories to be deleted has already been + # removed by the `rmtree` above. + for dir in dirs: + try: os.rmdir(dir) - except Exception: - pass + except FileNotFoundError: + pass # Already deleted, continue with deleting the rest + except Exception: + break # Failed, skip deleting the rest of the parent dirs await self.store.delete_url_cache_media(removed_media) From ecd5148f2a446bf5bd8e738aa0ea5c8756db8e97 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 27 Sep 2021 12:53:34 +0100 Subject: [PATCH 4/6] Add test for URL cache expiry --- tests/rest/media/v1/test_url_preview.py | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 4d09b5d07ef7..ce43de780b51 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -21,11 +21,13 @@ from twisted.test.proto_helpers import AccumulatingProtocol from synapse.config.oembed import OEmbedEndpointConfig +from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS from synapse.util.stringutils import parse_and_validate_mxc_uri from tests import unittest from tests.server import FakeTransport from tests.test_utils import SMALL_PNG +from tests.utils import MockClock try: import lxml @@ -851,3 +853,32 @@ def test_storage_providers_exclude_thumbnails(self): 404, "URL cache thumbnail was unexpectedly retrieved from a storage provider", ) + + def test_cache_expiry(self): + """Test that URL cache files and thumbnails are cleaned up properly on expiry.""" + self.preview_url.clock = MockClock() + + _host, media_id = self._download_image() + + file_path = self.preview_url.filepaths.url_cache_filepath(media_id) + file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete( + media_id + ) + thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory( + media_id + ) + thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete( + media_id + ) + + self.assertTrue(os.path.isfile(file_path)) + self.assertTrue(os.path.isdir(thumbnail_dir)) + + self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1) + self.get_success(self.preview_url._expire_url_cache_data()) + + for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs: + self.assertFalse( + os.path.exists(path), + f"{os.path.relpath(path, self.media_store_path)} was not deleted", + ) From 64733e9a0070dcc2ebbe4f149fb6b3fd767573ae Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 27 Sep 2021 13:03:25 +0100 Subject: [PATCH 5/6] Add newsfile --- changelog.d/10924.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10924.bugfix diff --git a/changelog.d/10924.bugfix b/changelog.d/10924.bugfix new file mode 100644 index 000000000000..c73a51e32fe2 --- /dev/null +++ b/changelog.d/10924.bugfix @@ -0,0 +1 @@ +Fix a bug where empty `yyyy-mm-dd/` directories would be left behind in the media store's `url_cache_thumbnails/` directory. From 01492d6cc3c3dbfb187cde2955431e41a18336b0 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Tue, 28 Sep 2021 14:37:32 +0100 Subject: [PATCH 6/6] Log failure to remove directories --- synapse/rest/media/v1/preview_url_resource.py | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 8df617230306..044f44a3977e 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import datetime +import errno import fnmatch import itertools import logging @@ -496,6 +497,27 @@ async def _expire_url_cache_data(self) -> None: logger.info("Still running DB updates; skipping expiry") return + def try_remove_parent_dirs(dirs: Iterable[str]) -> None: + """Attempt to remove the given chain of parent directories + + Args: + dirs: The list of directory paths to delete, with children appearing + before their parents. + """ + for dir in dirs: + try: + os.rmdir(dir) + except FileNotFoundError: + # Already deleted, continue with deleting the rest + pass + except OSError as e: + # Failed, skip deleting the rest of the parent dirs + if e.errno != errno.ENOTEMPTY: + logger.warning( + "Failed to remove media directory: %r: %s", dir, e + ) + break + # First we delete expired url cache entries media_ids = await self.store.get_expired_url_cache(now) @@ -513,13 +535,7 @@ async def _expire_url_cache_data(self) -> None: removed_media.append(media_id) dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) - for dir in dirs: - try: - os.rmdir(dir) - except FileNotFoundError: - pass # Already deleted, continue with deleting the rest - except Exception: - break # Failed, skip deleting the rest of the parent dirs + try_remove_parent_dirs(dirs) await self.store.delete_url_cache(removed_media) @@ -547,13 +563,7 @@ async def _expire_url_cache_data(self) -> None: continue dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) - for dir in dirs: - try: - os.rmdir(dir) - except FileNotFoundError: - pass # Already deleted, continue with deleting the rest - except Exception: - break # Failed, skip deleting the rest of the parent dirs + try_remove_parent_dirs(dirs) thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) try: @@ -569,13 +579,7 @@ async def _expire_url_cache_data(self) -> None: dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) # Note that one of the directories to be deleted has already been # removed by the `rmtree` above. - for dir in dirs: - try: - os.rmdir(dir) - except FileNotFoundError: - pass # Already deleted, continue with deleting the rest - except Exception: - break # Failed, skip deleting the rest of the parent dirs + try_remove_parent_dirs(dirs) await self.store.delete_url_cache_media(removed_media)