Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 076dead

Browse files
authored
allow specifying https:// proxy (#10411)
1 parent e16eab2 commit 076dead

File tree

3 files changed

+450
-133
lines changed

3 files changed

+450
-133
lines changed

changelog.d/10411.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add support for https connections to a proxy server. Contributed by @Bubu and @dklimpel.

synapse/http/proxyagent.py

Lines changed: 109 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,32 @@
1414
import base64
1515
import logging
1616
import re
17-
from typing import Optional, Tuple
18-
from urllib.request import getproxies_environment, proxy_bypass_environment
17+
from typing import Any, Dict, Optional, Tuple
18+
from urllib.parse import urlparse
19+
from urllib.request import ( # type: ignore[attr-defined]
20+
getproxies_environment,
21+
proxy_bypass_environment,
22+
)
1923

2024
import attr
2125
from zope.interface import implementer
2226

2327
from twisted.internet import defer
2428
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
29+
from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
2530
from twisted.python.failure import Failure
26-
from twisted.web.client import URI, BrowserLikePolicyForHTTPS, _AgentBase
31+
from twisted.web.client import (
32+
URI,
33+
BrowserLikePolicyForHTTPS,
34+
HTTPConnectionPool,
35+
_AgentBase,
36+
)
2737
from twisted.web.error import SchemeNotSupported
2838
from twisted.web.http_headers import Headers
29-
from twisted.web.iweb import IAgent, IPolicyForHTTPS
39+
from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS
3040

3141
from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint
42+
from synapse.types import ISynapseReactor
3243

3344
logger = logging.getLogger(__name__)
3445

@@ -63,35 +74,38 @@ class ProxyAgent(_AgentBase):
6374
reactor might have some blacklisting applied (i.e. for DNS queries),
6475
but we need unblocked access to the proxy.
6576
66-
contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the
77+
contextFactory: A factory for TLS contexts, to control the
6778
verification parameters of OpenSSL. The default is to use a
6879
`BrowserLikePolicyForHTTPS`, so unless you have special
6980
requirements you can leave this as-is.
7081
71-
connectTimeout (Optional[float]): The amount of time that this Agent will wait
82+
connectTimeout: The amount of time that this Agent will wait
7283
for the peer to accept a connection, in seconds. If 'None',
7384
HostnameEndpoint's default (30s) will be used.
74-
7585
This is used for connections to both proxies and destination servers.
7686
77-
bindAddress (bytes): The local address for client sockets to bind to.
87+
bindAddress: The local address for client sockets to bind to.
7888
79-
pool (HTTPConnectionPool|None): connection pool to be used. If None, a
89+
pool: connection pool to be used. If None, a
8090
non-persistent pool instance will be created.
8191
82-
use_proxy (bool): Whether proxy settings should be discovered and used
92+
use_proxy: Whether proxy settings should be discovered and used
8393
from conventional environment variables.
94+
95+
Raises:
96+
ValueError if use_proxy is set and the environment variables
97+
contain an invalid proxy specification.
8498
"""
8599

86100
def __init__(
87101
self,
88-
reactor,
89-
proxy_reactor=None,
102+
reactor: IReactorCore,
103+
proxy_reactor: Optional[ISynapseReactor] = None,
90104
contextFactory: Optional[IPolicyForHTTPS] = None,
91-
connectTimeout=None,
92-
bindAddress=None,
93-
pool=None,
94-
use_proxy=False,
105+
connectTimeout: Optional[float] = None,
106+
bindAddress: Optional[bytes] = None,
107+
pool: Optional[HTTPConnectionPool] = None,
108+
use_proxy: bool = False,
95109
):
96110
contextFactory = contextFactory or BrowserLikePolicyForHTTPS()
97111

@@ -102,7 +116,7 @@ def __init__(
102116
else:
103117
self.proxy_reactor = proxy_reactor
104118

105-
self._endpoint_kwargs = {}
119+
self._endpoint_kwargs: Dict[str, Any] = {}
106120
if connectTimeout is not None:
107121
self._endpoint_kwargs["timeout"] = connectTimeout
108122
if bindAddress is not None:
@@ -117,24 +131,26 @@ def __init__(
117131
https_proxy = proxies["https"].encode() if "https" in proxies else None
118132
no_proxy = proxies["no"] if "no" in proxies else None
119133

120-
# Parse credentials from http and https proxy connection string if present
121-
self.http_proxy_creds, http_proxy = parse_username_password(http_proxy)
122-
self.https_proxy_creds, https_proxy = parse_username_password(https_proxy)
123-
124-
self.http_proxy_endpoint = _http_proxy_endpoint(
125-
http_proxy, self.proxy_reactor, **self._endpoint_kwargs
134+
self.http_proxy_endpoint, self.http_proxy_creds = _http_proxy_endpoint(
135+
http_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
126136
)
127137

128-
self.https_proxy_endpoint = _http_proxy_endpoint(
129-
https_proxy, self.proxy_reactor, **self._endpoint_kwargs
138+
self.https_proxy_endpoint, self.https_proxy_creds = _http_proxy_endpoint(
139+
https_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
130140
)
131141

132142
self.no_proxy = no_proxy
133143

134144
self._policy_for_https = contextFactory
135145
self._reactor = reactor
136146

137-
def request(self, method, uri, headers=None, bodyProducer=None):
147+
def request(
148+
self,
149+
method: bytes,
150+
uri: bytes,
151+
headers: Optional[Headers] = None,
152+
bodyProducer: Optional[IBodyProducer] = None,
153+
) -> defer.Deferred:
138154
"""
139155
Issue a request to the server indicated by the given uri.
140156
@@ -146,16 +162,15 @@ def request(self, method, uri, headers=None, bodyProducer=None):
146162
See also: twisted.web.iweb.IAgent.request
147163
148164
Args:
149-
method (bytes): The request method to use, such as `GET`, `POST`, etc
165+
method: The request method to use, such as `GET`, `POST`, etc
150166
151-
uri (bytes): The location of the resource to request.
167+
uri: The location of the resource to request.
152168
153-
headers (Headers|None): Extra headers to send with the request
169+
headers: Extra headers to send with the request
154170
155-
bodyProducer (IBodyProducer|None): An object which can generate bytes to
156-
make up the body of this request (for example, the properly encoded
157-
contents of a file for a file upload). Or, None if the request is to
158-
have no body.
171+
bodyProducer: An object which can generate bytes to make up the body of
172+
this request (for example, the properly encoded contents of a file for
173+
a file upload). Or, None if the request is to have no body.
159174
160175
Returns:
161176
Deferred[IResponse]: completes when the header of the response has
@@ -253,70 +268,89 @@ def request(self, method, uri, headers=None, bodyProducer=None):
253268
)
254269

255270

256-
def _http_proxy_endpoint(proxy: Optional[bytes], reactor, **kwargs):
271+
def _http_proxy_endpoint(
272+
proxy: Optional[bytes],
273+
reactor: IReactorCore,
274+
tls_options_factory: IPolicyForHTTPS,
275+
**kwargs,
276+
) -> Tuple[Optional[IStreamClientEndpoint], Optional[ProxyCredentials]]:
257277
"""Parses an http proxy setting and returns an endpoint for the proxy
258278
259279
Args:
260-
proxy: the proxy setting in the form: [<username>:<password>@]<host>[:<port>]
261-
Note that compared to other apps, this function currently lacks support
262-
for specifying a protocol schema (i.e. protocol://...).
280+
proxy: the proxy setting in the form: [scheme://][<username>:<password>@]<host>[:<port>]
281+
This currently supports http:// and https:// proxies.
282+
A hostname without scheme is assumed to be http.
263283
264284
reactor: reactor to be used to connect to the proxy
265285
286+
tls_options_factory: the TLS options to use when connecting through a https proxy
287+
266288
kwargs: other args to be passed to HostnameEndpoint
267289
268290
Returns:
269-
interfaces.IStreamClientEndpoint|None: endpoint to use to connect to the proxy,
270-
or None
291+
a tuple of
292+
endpoint to use to connect to the proxy, or None
293+
ProxyCredentials or if no credentials were found, or None
294+
295+
Raise:
296+
ValueError if proxy has no hostname or unsupported scheme.
271297
"""
272298
if proxy is None:
273-
return None
299+
return None, None
274300

275-
# Parse the connection string
276-
host, port = parse_host_port(proxy, default_port=1080)
277-
return HostnameEndpoint(reactor, host, port, **kwargs)
301+
# Note: urlsplit/urlparse cannot be used here as that does not work (for Python
302+
# 3.9+) on scheme-less proxies, e.g. host:port.
303+
scheme, host, port, credentials = parse_proxy(proxy)
278304

305+
proxy_endpoint = HostnameEndpoint(reactor, host, port, **kwargs)
279306

280-
def parse_username_password(proxy: bytes) -> Tuple[Optional[ProxyCredentials], bytes]:
281-
"""
282-
Parses the username and password from a proxy declaration e.g
283-
username:password@hostname:port.
307+
if scheme == b"https":
308+
tls_options = tls_options_factory.creatorForNetloc(host, port)
309+
proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint)
284310

285-
Args:
286-
proxy: The proxy connection string.
311+
return proxy_endpoint, credentials
287312

288-
Returns
289-
An instance of ProxyCredentials and the proxy connection string with any credentials
290-
stripped, i.e u:p@host:port -> host:port. If no credentials were found, the
291-
ProxyCredentials instance is replaced with None.
292-
"""
293-
if proxy and b"@" in proxy:
294-
# We use rsplit here as the password could contain an @ character
295-
credentials, proxy_without_credentials = proxy.rsplit(b"@", 1)
296-
return ProxyCredentials(credentials), proxy_without_credentials
297313

298-
return None, proxy
314+
def parse_proxy(
315+
proxy: bytes, default_scheme: bytes = b"http", default_port: int = 1080
316+
) -> Tuple[bytes, bytes, int, Optional[ProxyCredentials]]:
317+
"""
318+
Parse a proxy connection string.
299319
320+
Given a HTTP proxy URL, breaks it down into components and checks that it
321+
has a hostname (otherwise it is not useful to us when trying to find a
322+
proxy) and asserts that the URL has a scheme we support.
300323
301-
def parse_host_port(hostport: bytes, default_port: int = None) -> Tuple[bytes, int]:
302-
"""
303-
Parse the hostname and port from a proxy connection byte string.
304324
305325
Args:
306-
hostport: The proxy connection string. Must be in the form 'host[:port]'.
307-
default_port: The default port to return if one is not found in `hostport`.
326+
proxy: The proxy connection string. Must be in the form '[scheme://][<username>:<password>@]host[:port]'.
327+
default_scheme: The default scheme to return if one is not found in `proxy`. Defaults to http
328+
default_port: The default port to return if one is not found in `proxy`. Defaults to 1080
308329
309330
Returns:
310-
A tuple containing the hostname and port. Uses `default_port` if one was not found.
331+
A tuple containing the scheme, hostname, port and ProxyCredentials.
332+
If no credentials were found, the ProxyCredentials instance is replaced with None.
333+
334+
Raise:
335+
ValueError if proxy has no hostname or unsupported scheme.
311336
"""
312-
if b":" in hostport:
313-
host, port = hostport.rsplit(b":", 1)
314-
try:
315-
port = int(port)
316-
return host, port
317-
except ValueError:
318-
# the thing after the : wasn't a valid port; presumably this is an
319-
# IPv6 address.
320-
pass
337+
# First check if we have a scheme present
338+
# Note: urlsplit/urlparse cannot be used (for Python # 3.9+) on scheme-less proxies, e.g. host:port.
339+
if b"://" not in proxy:
340+
proxy = b"".join([default_scheme, b"://", proxy])
341+
342+
url = urlparse(proxy)
343+
344+
if not url.hostname:
345+
raise ValueError("Proxy URL did not contain a hostname! Please specify one.")
346+
347+
if url.scheme not in (b"http", b"https"):
348+
raise ValueError(
349+
f"Unknown proxy scheme {url.scheme!s}; only 'http' and 'https' is supported."
350+
)
351+
352+
credentials = None
353+
if url.username and url.password:
354+
credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))
321355

322-
return hostport, default_port
356+
return url.scheme, url.hostname, url.port or default_port, credentials

0 commit comments

Comments
 (0)