Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

allow specifying https:// proxy #10411

Merged
merged 31 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8529878
allow specifying https:// proxy
Bubu Jan 13, 2021
c9cb0a8
add back note about supported schemes
Bubu Jun 17, 2021
f889cdf
isort
Bubu Jun 17, 2021
9326de3
Add test for connections to https proxy
richvdh Jun 23, 2021
ef657bf
fix comment wrapping
Bubu Jun 24, 2021
76698d8
tls_options_factory isn't Optional
Bubu Jun 24, 2021
bca0c04
make proxy_parse tests a separate class
Bubu Jun 24, 2021
728bbfb
Merge branch 'https_proxy' of github.com:Bubu/synapse into rework_pro…
Bubu Jun 24, 2021
9275a29
add comment about supported proxy protocols to proxyagent
Bubu Jun 24, 2021
59e6f5a
fix scheme, username:password order confusion in comment
Bubu Jun 24, 2021
35378a0
Update synapse/http/proxyagent.py
richvdh Jul 7, 2021
d455b74
Merge remote-tracking branch 'synapse/develop' into https_proxy_new
dklimpel Jul 15, 2021
486d9ae
fix merge
dklimpel Jul 15, 2021
faa93d3
add type hints to test
dklimpel Jul 15, 2021
7aba100
add type hints
dklimpel Jul 16, 2021
8ce213d
newsfile
dklimpel Jul 16, 2021
62a98e7
`parse_proxy` use urllib return credentials
dklimpel Jul 16, 2021
df66024
remove `parse_username_password`
dklimpel Jul 16, 2021
8fdfc41
update `ProxyParserTests`
dklimpel Jul 16, 2021
1aaedaa
lint
dklimpel Jul 16, 2021
2ace68b
fix imports in tests
dklimpel Jul 16, 2021
82b18d9
2nd fix imports in tests
dklimpel Jul 16, 2021
3524c80
fix and add new tests
dklimpel Jul 16, 2021
292bdf0
small typo in tests
dklimpel Jul 16, 2021
9c205e1
update f-strings
dklimpel Jul 19, 2021
af0eb94
Apply suggestions from code review
dklimpel Jul 20, 2021
047e3a2
update after review
dklimpel Jul 20, 2021
584904f
update doc string
dklimpel Jul 20, 2021
4176286
lint
dklimpel Jul 20, 2021
6da1b9a
Fix up https proxy tests
richvdh Jul 27, 2021
b2a4928
change from `RuntimeError` to `ValueError`
dklimpel Jul 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10411.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for https connections to a proxy server. Contributed by @Bubu and @dklimpel.
184 changes: 109 additions & 75 deletions synapse/http/proxyagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,32 @@
import base64
import logging
import re
from typing import Optional, Tuple
from urllib.request import getproxies_environment, proxy_bypass_environment
from typing import Any, Dict, Optional, Tuple
from urllib.parse import urlparse
from urllib.request import ( # type: ignore[attr-defined]
getproxies_environment,
proxy_bypass_environment,
)

import attr
from zope.interface import implementer

from twisted.internet import defer
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
from twisted.python.failure import Failure
from twisted.web.client import URI, BrowserLikePolicyForHTTPS, _AgentBase
from twisted.web.client import (
URI,
BrowserLikePolicyForHTTPS,
HTTPConnectionPool,
_AgentBase,
)
from twisted.web.error import SchemeNotSupported
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent, IPolicyForHTTPS
from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS

from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint
from synapse.types import ISynapseReactor

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -63,35 +74,38 @@ class ProxyAgent(_AgentBase):
reactor might have some blacklisting applied (i.e. for DNS queries),
but we need unblocked access to the proxy.

contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the
contextFactory: A factory for TLS contexts, to control the
verification parameters of OpenSSL. The default is to use a
`BrowserLikePolicyForHTTPS`, so unless you have special
requirements you can leave this as-is.

connectTimeout (Optional[float]): The amount of time that this Agent will wait
connectTimeout: The amount of time that this Agent will wait
for the peer to accept a connection, in seconds. If 'None',
HostnameEndpoint's default (30s) will be used.

This is used for connections to both proxies and destination servers.

bindAddress (bytes): The local address for client sockets to bind to.
bindAddress: The local address for client sockets to bind to.

pool (HTTPConnectionPool|None): connection pool to be used. If None, a
pool: connection pool to be used. If None, a
non-persistent pool instance will be created.

use_proxy (bool): Whether proxy settings should be discovered and used
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables.

Raises:
RuntimeError if use_proxy is set and the environment variables
contain an invalid proxy specification.
"""

def __init__(
self,
reactor,
proxy_reactor=None,
reactor: IReactorCore,
proxy_reactor: Optional[ISynapseReactor] = None,
contextFactory: Optional[IPolicyForHTTPS] = None,
connectTimeout=None,
bindAddress=None,
pool=None,
use_proxy=False,
connectTimeout: Optional[float] = None,
bindAddress: Optional[bytes] = None,
pool: Optional[HTTPConnectionPool] = None,
use_proxy: bool = False,
):
contextFactory = contextFactory or BrowserLikePolicyForHTTPS()

Expand All @@ -102,7 +116,7 @@ def __init__(
else:
self.proxy_reactor = proxy_reactor

self._endpoint_kwargs = {}
self._endpoint_kwargs: Dict[str, Any] = {}
if connectTimeout is not None:
self._endpoint_kwargs["timeout"] = connectTimeout
if bindAddress is not None:
Expand All @@ -117,24 +131,26 @@ def __init__(
https_proxy = proxies["https"].encode() if "https" in proxies else None
no_proxy = proxies["no"] if "no" in proxies else None

# Parse credentials from http and https proxy connection string if present
self.http_proxy_creds, http_proxy = parse_username_password(http_proxy)
self.https_proxy_creds, https_proxy = parse_username_password(https_proxy)

self.http_proxy_endpoint = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, **self._endpoint_kwargs
self.http_proxy_endpoint, self.http_proxy_creds = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
)

self.https_proxy_endpoint = _http_proxy_endpoint(
https_proxy, self.proxy_reactor, **self._endpoint_kwargs
self.https_proxy_endpoint, self.https_proxy_creds = _http_proxy_endpoint(
https_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
)

self.no_proxy = no_proxy

self._policy_for_https = contextFactory
self._reactor = reactor

def request(self, method, uri, headers=None, bodyProducer=None):
def request(
self,
method: bytes,
uri: bytes,
headers: Optional[Headers] = None,
bodyProducer: Optional[IBodyProducer] = None,
) -> defer.Deferred:
"""
Issue a request to the server indicated by the given uri.

Expand All @@ -146,16 +162,15 @@ def request(self, method, uri, headers=None, bodyProducer=None):
See also: twisted.web.iweb.IAgent.request

Args:
method (bytes): The request method to use, such as `GET`, `POST`, etc
method: The request method to use, such as `GET`, `POST`, etc

uri (bytes): The location of the resource to request.
uri: The location of the resource to request.

headers (Headers|None): Extra headers to send with the request
headers: Extra headers to send with the request

bodyProducer (IBodyProducer|None): An object which can generate bytes to
make up the body of this request (for example, the properly encoded
contents of a file for a file upload). Or, None if the request is to
have no body.
bodyProducer: An object which can generate bytes to make up the body of
this request (for example, the properly encoded contents of a file for
a file upload). Or, None if the request is to have no body.

Returns:
Deferred[IResponse]: completes when the header of the response has
Expand Down Expand Up @@ -253,70 +268,89 @@ def request(self, method, uri, headers=None, bodyProducer=None):
)


def _http_proxy_endpoint(proxy: Optional[bytes], reactor, **kwargs):
def _http_proxy_endpoint(
proxy: Optional[bytes],
reactor: IReactorCore,
tls_options_factory: IPolicyForHTTPS,
**kwargs,
) -> Tuple[Optional[IStreamClientEndpoint], Optional[ProxyCredentials]]:
"""Parses an http proxy setting and returns an endpoint for the proxy

Args:
proxy: the proxy setting in the form: [<username>:<password>@]<host>[:<port>]
Note that compared to other apps, this function currently lacks support
for specifying a protocol schema (i.e. protocol://...).
proxy: the proxy setting in the form: [scheme://][<username>:<password>@]<host>[:<port>]
This currently supports http:// and https:// proxies.
A hostname without scheme is assumed to be http.

reactor: reactor to be used to connect to the proxy

tls_options_factory: the TLS options to use when connecting through a https proxy

kwargs: other args to be passed to HostnameEndpoint

Returns:
interfaces.IStreamClientEndpoint|None: endpoint to use to connect to the proxy,
or None
a tuple of
endpoint to use to connect to the proxy, or None
ProxyCredentials or if no credentials were found, or None

Raise:
RuntimeError if proxy has no hostname or unsupported scheme.
"""
if proxy is None:
return None
return None, None

# Parse the connection string
host, port = parse_host_port(proxy, default_port=1080)
return HostnameEndpoint(reactor, host, port, **kwargs)
# Note: urlsplit/urlparse cannot be used here as that does not work (for Python
# 3.9+) on scheme-less proxies, e.g. host:port.
scheme, host, port, credentials = parse_proxy(proxy)

proxy_endpoint = HostnameEndpoint(reactor, host, port, **kwargs)

def parse_username_password(proxy: bytes) -> Tuple[Optional[ProxyCredentials], bytes]:
"""
Parses the username and password from a proxy declaration e.g
username:password@hostname:port.
if scheme == b"https":
tls_options = tls_options_factory.creatorForNetloc(host, port)
proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint)

Args:
proxy: The proxy connection string.
return proxy_endpoint, credentials

Returns
An instance of ProxyCredentials and the proxy connection string with any credentials
stripped, i.e u:p@host:port -> host:port. If no credentials were found, the
ProxyCredentials instance is replaced with None.
"""
if proxy and b"@" in proxy:
# We use rsplit here as the password could contain an @ character
credentials, proxy_without_credentials = proxy.rsplit(b"@", 1)
return ProxyCredentials(credentials), proxy_without_credentials

return None, proxy
def parse_proxy(
proxy: bytes, default_scheme: bytes = b"http", default_port: int = 1080
) -> Tuple[bytes, bytes, int, Optional[ProxyCredentials]]:
"""
Parse a proxy connection string.

Given a HTTP proxy URL, breaks it down into components and checks that it
has a hostname (otherwise it is not useful to us when trying to find a
proxy) and asserts that the URL has a scheme we support.

def parse_host_port(hostport: bytes, default_port: int = None) -> Tuple[bytes, int]:
"""
Parse the hostname and port from a proxy connection byte string.

Args:
hostport: The proxy connection string. Must be in the form 'host[:port]'.
default_port: The default port to return if one is not found in `hostport`.
proxy: The proxy connection string. Must be in the form '[scheme://][<username>:<password>@]host[:port]'.
default_scheme: The default scheme to return if one is not found in `proxy`. Defaults to http
default_port: The default port to return if one is not found in `proxy`. Defaults to 1080

Returns:
A tuple containing the hostname and port. Uses `default_port` if one was not found.
A tuple containing the scheme, hostname, port and ProxyCredentials.
If no credentials were found, the ProxyCredentials instance is replaced with None.

Raise:
RuntimeError if proxy has no hostname or unsupported scheme.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why RuntimeError rather than ValueError?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, but I see that as a bug in sygnal rather than something we ought to replicate here.

"""
if b":" in hostport:
host, port = hostport.rsplit(b":", 1)
try:
port = int(port)
return host, port
except ValueError:
# the thing after the : wasn't a valid port; presumably this is an
# IPv6 address.
pass
# First check if we have a scheme present
# Note: urlsplit/urlparse cannot be used (for Python # 3.9+) on scheme-less proxies, e.g. host:port.
if b"://" not in proxy:
proxy = b"".join([default_scheme, b"://", proxy])

url = urlparse(proxy)

if not url.hostname:
raise RuntimeError("Proxy URL did not contain a hostname! Please specify one.")

if url.scheme not in (b"http", b"https"):
raise RuntimeError(
f"Unknown proxy scheme {url.scheme !s}; only 'http' and 'https' is supported."
)

credentials = None
if url.username and url.password:
credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))

return hostport, default_port
return url.scheme, url.hostname, url.port or default_port, credentials
Loading