Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Allow specifying the value of Accept-Language header for URL previews #7265

Merged
merged 6 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/7265.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a config option for specifying the value of the Accept-Language HTTP header when generating URL previews.
25 changes: 25 additions & 0 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,31 @@ media_store_path: "DATADIR/media_store"
#
#max_spider_size: 10M

# A list of values for the Accept-Language HTTP header used when
# downloading webpages during URL preview generation. This allows
# Synapse to specify the preferred languages that URL previews should
# be in when communicating with remote servers.
#
# Each value is a IETF language tag; a 2-3 letter identifier for a
# language, optionally followed by subtags separated by '-', specifying
# a country or region variant.
#
# Multiple values can be provided, and a weight can be added to each by
# using quality value syntax (;q=). '*' translates to any language.
#
# Defaults to "en".
#
# Example:
#
# url_preview_accept_language:
# - en-UK
# - en-US;q=0.9
# - fr;q=0.8
# - *;q=0.7
#
url_preview_accept_language:
# - en


## Captcha ##
# See docs/CAPTCHA_SETUP for full details of configuring this.
Expand Down
29 changes: 29 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ def read_config(self, config, **kwargs):

self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ())

self.url_preview_accept_language = config.get(
"url_preview_accept_language"
) or ["en"]

def generate_config_section(self, data_dir_path, **kwargs):
media_store = os.path.join(data_dir_path, "media_store")
uploads_path = os.path.join(data_dir_path, "uploads")
Expand Down Expand Up @@ -329,6 +333,31 @@ def generate_config_section(self, data_dir_path, **kwargs):
# The largest allowed URL preview spidering size in bytes
#
#max_spider_size: 10M

# A list of values for the Accept-Language HTTP header used when
# downloading webpages during URL preview generation. This allows
# Synapse to specify the preferred languages that URL previews should
# be in when communicating with remote servers.
#
# Each value is a IETF language tag; a 2-3 letter identifier for a
# language, optionally followed by subtags separated by '-', specifying
# a country or region variant.
#
# Multiple values can be provided, and a weight can be added to each by
# using quality value syntax (;q=). '*' translates to any language.
#
# Defaults to "en".
#
# Example:
#
# url_preview_accept_language:
# - en-UK
# - en-US;q=0.9
# - fr;q=0.8
# - *;q=0.7
#
url_preview_accept_language:
# - en
"""
% locals()
)
8 changes: 6 additions & 2 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(self, hs, media_repo, media_storage):
self.media_storage = media_storage

self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
self.url_preview_accept_language = hs.config.url_preview_accept_language

# memory cache mapping urls to an ObservableDeferred returning
# JSON-encoded OG metadata
Expand Down Expand Up @@ -315,9 +316,12 @@ async def _download_url(self, url, user):

with self.media_storage.store_into_file(file_info) as (f, fname, finish):
try:
logger.debug("Trying to get url '%s'", url)
logger.debug("Trying to get preview for url '%s'", url)
length, headers, uri, code = await self.client.get_file(
url, output_stream=f, max_size=self.max_spider_size
url,
output_stream=f,
max_size=self.max_spider_size,
headers={"Accept Language": self.url_preview_accept_language},
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
)
except SynapseError:
# Pass SynapseErrors through directly, so that the servlet
Expand Down
55 changes: 55 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ def make_homeserver(self, reactor, clock):
)
config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
config["url_preview_url_blacklist"] = []
config["url_preview_accept_language"] = [
"en-UK",
"en-US;q=0.9",
"fr;q=0.8",
"*;q=0.7",
]

self.storage_path = self.mktemp()
self.media_store_path = self.mktemp()
Expand Down Expand Up @@ -507,3 +513,52 @@ def test_OPTIONS(self):
self.pump()
self.assertEqual(channel.code, 200)
self.assertEqual(channel.json_body, {})

def test_accept_language_config_option(self):
"""
Accept-Language header is sent to the remote server
"""
self.lookups["example.com"] = [(IPv4Address, "8.8.8.8")]

# Build and make a request to the server
request, channel = self.make_request(
"GET", "url_preview?url=http://example.com", shorthand=False
)
request.render(self.preview_url)
self.pump()

# Extract Synapse's tcp client
client = self.reactor.tcpClients[0][2].buildProtocol(None)

# Build a fake remote server to reply with
server = AccumulatingProtocol()

# Connect the two together
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))

# Tell Synapse that it has received some data from the remote server
client.dataReceived(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
% (len(self.end_content),)
+ self.end_content
)

# Move the reactor along until we get a response on our original channel
self.pump()
self.assertEqual(channel.code, 200)
self.assertEqual(
channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
)

# Check that the server received the Accept-Language header as part
# of the request from Synapse
self.assertIn(
(
b"Accept language: en-UK\r\n"
b"Accept language: en-US;q=0.9\r\n"
b"Accept language: fr;q=0.8\r\n"
b"Accept language: *;q=0.7"
),
server.data,
)