Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a maximum size for well-known lookups. #8950

Merged
merged 5 commits into from
Dec 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/8950.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a maximum size of 50 kilobytes to .well-known lookups.
32 changes: 18 additions & 14 deletions synapse/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,11 +720,14 @@ async def get_file(

try:
length = await make_deferred_yieldable(
readBodyToFile(response, output_stream, max_size)
read_body_with_max_size(response, output_stream, max_size)
)
except BodyExceededMaxSize:
SynapseError(
502,
"Requested file is too large > %r bytes" % (max_size,),
Codes.TOO_LARGE,
)
except SynapseError:
# This can happen e.g. because the body is too large.
raise
except Exception as e:
raise SynapseError(502, ("Failed to download remote body: %s" % e)) from e

Expand All @@ -748,7 +751,11 @@ def _timeout_to_request_timed_out_error(f: Failure):
return f


class _ReadBodyToFileProtocol(protocol.Protocol):
class BodyExceededMaxSize(Exception):
clokep marked this conversation as resolved.
Show resolved Hide resolved
"""The maximum allowed size of the HTTP body was exceeded."""


class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
def __init__(
self, stream: BinaryIO, deferred: defer.Deferred, max_size: Optional[int]
):
Expand All @@ -761,13 +768,7 @@ def dataReceived(self, data: bytes) -> None:
self.stream.write(data)
self.length += len(data)
if self.max_size is not None and self.length >= self.max_size:
self.deferred.errback(
SynapseError(
502,
"Requested file is too large > %r bytes" % (self.max_size,),
Codes.TOO_LARGE,
)
)
self.deferred.errback(BodyExceededMaxSize())
self.deferred = defer.Deferred()
self.transport.loseConnection()

Expand All @@ -782,12 +783,15 @@ def connectionLost(self, reason: Failure) -> None:
self.deferred.errback(reason)


def readBodyToFile(
def read_body_with_max_size(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to make this async and handle the Deferred logic internally, but the call from the MatrixFederationHttpClient adds a timeout, which involves having both a timeout and a reactor. The timeout could be made optional easily, but needed both or neither is kind of meh.

response: IResponse, stream: BinaryIO, max_size: Optional[int]
) -> defer.Deferred:
"""
Read a HTTP response body to a file-object. Optionally enforcing a maximum file size.

If the maximum file size is reached, the returned Deferred will resolve to a
Failure with a BodyExceededMaxSize exception.

Args:
response: The HTTP response to read from.
stream: The file-object to write to.
Expand All @@ -798,7 +802,7 @@ def readBodyToFile(
"""

d = defer.Deferred()
response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size))
response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size))
return d


Expand Down
25 changes: 23 additions & 2 deletions synapse/http/federation/well_known_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@
import logging
import random
import time
from io import BytesIO
from typing import Callable, Dict, Optional, Tuple

import attr

from twisted.internet import defer
from twisted.internet.interfaces import IReactorTime
from twisted.web.client import RedirectAgent, readBody
from twisted.web.client import RedirectAgent
from twisted.web.http import stringToDatetime
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent, IResponse

from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size
from synapse.logging.context import make_deferred_yieldable
from synapse.util import Clock, json_decoder
from synapse.util.caches.ttlcache import TTLCache
Expand Down Expand Up @@ -53,6 +55,9 @@
# lower bound for .well-known cache period
WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60

# The maximum size (in bytes) to allow a well-known file to be.
WELL_KNOWN_MAX_SIZE = 50 * 1024 # 50 KiB

# Attempt to refetch a cached well-known N% of the TTL before it expires.
# e.g. if set to 0.2 and we have a cached entry with a TTL of 5mins, then
# we'll start trying to refetch 1 minute before it expires.
Expand Down Expand Up @@ -229,6 +234,9 @@ async def _make_well_known_request(
server_name: name of the server, from the requested url
retry: Whether to retry the request if it fails.

Raises:
_FetchWellKnownFailure if we fail to lookup a result

Returns:
Returns the response object and body. Response may be a non-200 response.
"""
Expand All @@ -250,7 +258,11 @@ async def _make_well_known_request(
b"GET", uri, headers=Headers(headers)
)
)
body = await make_deferred_yieldable(readBody(response))
body_stream = BytesIO()
await make_deferred_yieldable(
read_body_with_max_size(response, body_stream, WELL_KNOWN_MAX_SIZE)
)
body = body_stream.getvalue()

if 500 <= response.code < 600:
raise Exception("Non-200 response %s" % (response.code,))
Expand All @@ -259,6 +271,15 @@ async def _make_well_known_request(
except defer.CancelledError:
# Bail if we've been cancelled
raise
except BodyExceededMaxSize:
# If the well-known file was too large, do not keep attempting
# to download it, but consider it a temporary error.
logger.warning(
"Requested .well-known file for %s is too large > %r bytes",
server_name.decode("ascii"),
WELL_KNOWN_MAX_SIZE,
)
raise _FetchWellKnownFailure(temporary=True)
except Exception as e:
if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS:
logger.info("Error fetching %s: %s", uri_str, e)
Expand Down
13 changes: 11 additions & 2 deletions synapse/http/matrixfederationclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,19 @@
import synapse.metrics
import synapse.util.retryutils
from synapse.api.errors import (
Codes,
FederationDeniedError,
HttpResponseException,
RequestSendFailed,
SynapseError,
)
from synapse.http import QuieterFileBodyProducer
from synapse.http.client import (
BlacklistingAgentWrapper,
BlacklistingReactorWrapper,
BodyExceededMaxSize,
encode_query_args,
readBodyToFile,
read_body_with_max_size,
)
from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
from synapse.logging.context import make_deferred_yieldable
Expand Down Expand Up @@ -975,9 +978,15 @@ async def get_file(
headers = dict(response.headers.getAllRawHeaders())

try:
d = readBodyToFile(response, output_stream, max_size)
d = read_body_with_max_size(response, output_stream, max_size)
d.addTimeout(self.default_timeout, self.reactor)
length = await make_deferred_yieldable(d)
except BodyExceededMaxSize:
msg = "Requested file is too large > %r bytes" % (max_size,)
logger.warning(
"{%s} [%s] %s", request.txn_id, request.destination, msg,
)
SynapseError(502, msg, Codes.TOO_LARGE)
except Exception as e:
logger.warning(
"{%s} [%s] Error reading response: %s",
Expand Down
27 changes: 27 additions & 0 deletions tests/http/federation/test_matrix_federation_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
from synapse.http.federation.srv_resolver import Server
from synapse.http.federation.well_known_resolver import (
WELL_KNOWN_MAX_SIZE,
WellKnownResolver,
_cache_period_from_headers,
)
Expand Down Expand Up @@ -1107,6 +1108,32 @@ def test_well_known_cache_with_temp_failure(self):
r = self.successResultOf(fetch_d)
self.assertEqual(r.delegated_server, None)

def test_well_known_too_large(self):
"""A well-known query that returns a result which is too large should be rejected."""
self.reactor.lookups["testserv"] = "1.2.3.4"

fetch_d = defer.ensureDeferred(
self.well_known_resolver.get_well_known(b"testserv")
)

# there should be an attempt to connect on port 443 for the .well-known
clients = self.reactor.tcpClients
self.assertEqual(len(clients), 1)
(host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
self.assertEqual(host, "1.2.3.4")
self.assertEqual(port, 443)

self._handle_well_known_connection(
client_factory,
expected_sni=b"testserv",
response_headers={b"Cache-Control": b"max-age=1000"},
content=b'{ "m.server": "' + (b"a" * WELL_KNOWN_MAX_SIZE) + b'" }',
)

# The result is sucessful, but disabled delegation.
r = self.successResultOf(fetch_d)
self.assertIsNone(r.delegated_server)

def test_srv_fallbacks(self):
"""Test that other SRV results are tried if the first one fails.
"""
Expand Down