From 1780fb101767d1c5a35c0af7613672b3058ac72f Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 8 Jul 2024 14:27:40 -0600 Subject: [PATCH 1/3] Refund unused rate limit on downloads --- synapse/http/matrixfederationclient.py | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 749b01dd0ea..2df5d958cca 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1488,9 +1488,11 @@ async def get_file( headers = dict(response.headers.getAllRawHeaders()) expected_size = response.length + using_max_size = False # if we don't get an expected length then use the max length if expected_size == UNKNOWN_LENGTH: expected_size = max_size + using_max_size = True logger.debug( f"File size unknown, assuming file is max allowable size: {max_size}" ) @@ -1550,6 +1552,20 @@ async def get_file( e, ) raise + + # We need to refund the bucket difference if the expected size was unknown + # at the start of the request. + if using_max_size: + excess_bytes = max_size - length + logger.debug( + f"File size now known: {length}. Refunding {excess_bytes} back to user." + ) + download_ratelimiter.record_action( + requester=None, + key=ip_address, + n_actions=excess_bytes * -1, # perform a refund + ) + logger.info( "{%s} [%s] Completed: %d %s [%d bytes] %s %s", request.txn_id, @@ -1632,9 +1648,11 @@ async def federation_get_file( headers = dict(response.headers.getAllRawHeaders()) expected_size = response.length + using_max_size = False # if we don't get an expected length then use the max length if expected_size == UNKNOWN_LENGTH: expected_size = max_size + using_max_size = True logger.debug( f"File size unknown, assuming file is max allowable size: {max_size}" ) @@ -1733,6 +1751,19 @@ async def federation_get_file( str_url, output_stream, expected_size ) + # We need to refund the bucket difference if the expected size was unknown + # at the start of the request. + if using_max_size: + excess_bytes = max_size - length + logger.debug( + f"File size now known: {length}. Refunding {excess_bytes} back to user." + ) + download_ratelimiter.record_action( + requester=None, + key=ip_address, + n_actions=excess_bytes * -1, # perform a refund + ) + logger.info( "{%s} [%s] Completed: %d %s [%d bytes] %s %s", request.txn_id, From d667f0863f7a88ecc451332703cbdcde96a804fc Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 8 Jul 2024 15:02:08 -0600 Subject: [PATCH 2/3] Test new behaviour --- tests/media/test_media_storage.py | 33 +++++++++++++++++++++------ tests/rest/client/test_media.py | 37 ++++++++++++++++++++++++------- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index 70912e22f8d..5bfd48e3052 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -1086,10 +1086,29 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse: ) assert channel2.code == 200 - # eleventh will hit ratelimit - channel3 = self.make_request( - "GET", - "/_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyx", - shorthand=False, - ) - assert channel3.code == 429 + # If the refund code is working correctly, there should be ~200MB of free space + # in the limit. This is because we're assuming each file is 50MB, but are only + # consuming 30MB files (per the @patch on this test). We should be able to get + # 6 more requests through now, before rate limiting on the 7th or 8th (we can + # expect that 6.667 requests will make it at 200MB / 30MB, which may be just + # enough for a slow test run to get an additional request through). + for i in range(6): + channel3 = self.make_request( + "GET", + f"/_matrix/media/v3/download/remote.org/abcd{i}", + shorthand=False, + ) + assert channel3.code == 200 + + # Try the rate limit again, testing the eighth request fails. We discard the + # seventh request because it may or may not be rate limited (see above). The + # eighth request would *definitely* be rate limited though, so we test that. + for i in range(2): + channel4 = self.make_request( + "GET", + f"/_matrix/media/v3/download/remote.org/abcde{i}", + shorthand=False, + ) + if i == 1: # test for second request (which will be #8) + assert channel4.code == 429 + # else, discard 7th request because it's unhelpful diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py index 7f2caed7d5d..b3ffb346749 100644 --- a/tests/rest/client/test_media.py +++ b/tests/rest/client/test_media.py @@ -1816,6 +1816,7 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse: def test_download_ratelimit_max_size_sub(self) -> None: """ Test that if no content-length is provided, the default max size is applied instead + and the difference is refunded to the user. """ # mock out actually sending the request @@ -1841,14 +1842,34 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse: ) assert channel2.code == 200 - # eleventh will hit ratelimit - channel3 = self.make_request( - "GET", - "/_matrix/client/v1/media/download/remote.org/abcd", - shorthand=False, - access_token=self.tok, - ) - assert channel3.code == 429 + # If the refund code is working correctly, there should be ~200MB of free space + # in the limit. This is because we're assuming each file is 50MB, but are only + # consuming 30MB files (per the @patch on this test). We should be able to get + # 6 more requests through now, before rate limiting on the 7th or 8th (we can + # expect that 6.667 requests will make it at 200MB / 30MB, which may be just + # enough for a slow test run to get an additional request through). + for i in range(6): + channel3 = self.make_request( + "GET", + f"/_matrix/client/v1/media/download/remote.org/abcd{i}", + shorthand=False, + access_token=self.tok, + ) + assert channel3.code == 200 + + # Try the rate limit again, testing the eighth request fails. We discard the + # seventh request because it may or may not be rate limited (see above). The + # eighth request would *definitely* be rate limited though, so we test that. + for i in range(2): + channel4 = self.make_request( + "GET", + f"/_matrix/client/v1/media/download/remote.org/abcde{i}", + shorthand=False, + access_token=self.tok, + ) + if i == 1: # test for second request (which will be #8) + assert channel4.code == 429 + # else, discard 7th request because it's unhelpful def test_file_download(self) -> None: content = io.BytesIO(b"file_to_stream") From 4e494d895516f22c6bbee0e9a2ad1ec399f39805 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 8 Jul 2024 15:08:37 -0600 Subject: [PATCH 3/3] Changelog --- changelog.d/17417.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17417.bugfix diff --git a/changelog.d/17417.bugfix b/changelog.d/17417.bugfix new file mode 100644 index 00000000000..2cdd8b02aac --- /dev/null +++ b/changelog.d/17417.bugfix @@ -0,0 +1 @@ +Relax rate limiting behaviour on federated media downloads of unknown size. \ No newline at end of file