1414import base64
1515import logging
1616import re
17- from typing import Optional , Tuple
18- from urllib .request import getproxies_environment , proxy_bypass_environment
17+ from typing import Any , Dict , Optional , Tuple
18+ from urllib .parse import urlparse
19+ from urllib .request import ( # type: ignore[attr-defined]
20+ getproxies_environment ,
21+ proxy_bypass_environment ,
22+ )
1923
2024import attr
2125from zope .interface import implementer
2226
2327from twisted .internet import defer
2428from twisted .internet .endpoints import HostnameEndpoint , wrapClientTLS
29+ from twisted .internet .interfaces import IReactorCore , IStreamClientEndpoint
2530from twisted .python .failure import Failure
26- from twisted .web .client import URI , BrowserLikePolicyForHTTPS , _AgentBase
31+ from twisted .web .client import (
32+ URI ,
33+ BrowserLikePolicyForHTTPS ,
34+ HTTPConnectionPool ,
35+ _AgentBase ,
36+ )
2737from twisted .web .error import SchemeNotSupported
2838from twisted .web .http_headers import Headers
29- from twisted .web .iweb import IAgent , IPolicyForHTTPS
39+ from twisted .web .iweb import IAgent , IBodyProducer , IPolicyForHTTPS
3040
3141from synapse .http .connectproxyclient import HTTPConnectProxyEndpoint
42+ from synapse .types import ISynapseReactor
3243
3344logger = logging .getLogger (__name__ )
3445
@@ -63,35 +74,38 @@ class ProxyAgent(_AgentBase):
6374 reactor might have some blacklisting applied (i.e. for DNS queries),
6475 but we need unblocked access to the proxy.
6576
66- contextFactory (IPolicyForHTTPS) : A factory for TLS contexts, to control the
77+ contextFactory: A factory for TLS contexts, to control the
6778 verification parameters of OpenSSL. The default is to use a
6879 `BrowserLikePolicyForHTTPS`, so unless you have special
6980 requirements you can leave this as-is.
7081
71- connectTimeout (Optional[float]) : The amount of time that this Agent will wait
82+ connectTimeout: The amount of time that this Agent will wait
7283 for the peer to accept a connection, in seconds. If 'None',
7384 HostnameEndpoint's default (30s) will be used.
74-
7585 This is used for connections to both proxies and destination servers.
7686
77- bindAddress (bytes) : The local address for client sockets to bind to.
87+ bindAddress: The local address for client sockets to bind to.
7888
79- pool (HTTPConnectionPool|None) : connection pool to be used. If None, a
89+ pool: connection pool to be used. If None, a
8090 non-persistent pool instance will be created.
8191
82- use_proxy (bool) : Whether proxy settings should be discovered and used
92+ use_proxy: Whether proxy settings should be discovered and used
8393 from conventional environment variables.
94+
95+ Raises:
96+ ValueError if use_proxy is set and the environment variables
97+ contain an invalid proxy specification.
8498 """
8599
86100 def __init__ (
87101 self ,
88- reactor ,
89- proxy_reactor = None ,
102+ reactor : IReactorCore ,
103+ proxy_reactor : Optional [ ISynapseReactor ] = None ,
90104 contextFactory : Optional [IPolicyForHTTPS ] = None ,
91- connectTimeout = None ,
92- bindAddress = None ,
93- pool = None ,
94- use_proxy = False ,
105+ connectTimeout : Optional [ float ] = None ,
106+ bindAddress : Optional [ bytes ] = None ,
107+ pool : Optional [ HTTPConnectionPool ] = None ,
108+ use_proxy : bool = False ,
95109 ):
96110 contextFactory = contextFactory or BrowserLikePolicyForHTTPS ()
97111
@@ -102,7 +116,7 @@ def __init__(
102116 else :
103117 self .proxy_reactor = proxy_reactor
104118
105- self ._endpoint_kwargs = {}
119+ self ._endpoint_kwargs : Dict [ str , Any ] = {}
106120 if connectTimeout is not None :
107121 self ._endpoint_kwargs ["timeout" ] = connectTimeout
108122 if bindAddress is not None :
@@ -117,24 +131,26 @@ def __init__(
117131 https_proxy = proxies ["https" ].encode () if "https" in proxies else None
118132 no_proxy = proxies ["no" ] if "no" in proxies else None
119133
120- # Parse credentials from http and https proxy connection string if present
121- self .http_proxy_creds , http_proxy = parse_username_password (http_proxy )
122- self .https_proxy_creds , https_proxy = parse_username_password (https_proxy )
123-
124- self .http_proxy_endpoint = _http_proxy_endpoint (
125- http_proxy , self .proxy_reactor , ** self ._endpoint_kwargs
134+ self .http_proxy_endpoint , self .http_proxy_creds = _http_proxy_endpoint (
135+ http_proxy , self .proxy_reactor , contextFactory , ** self ._endpoint_kwargs
126136 )
127137
128- self .https_proxy_endpoint = _http_proxy_endpoint (
129- https_proxy , self .proxy_reactor , ** self ._endpoint_kwargs
138+ self .https_proxy_endpoint , self . https_proxy_creds = _http_proxy_endpoint (
139+ https_proxy , self .proxy_reactor , contextFactory , ** self ._endpoint_kwargs
130140 )
131141
132142 self .no_proxy = no_proxy
133143
134144 self ._policy_for_https = contextFactory
135145 self ._reactor = reactor
136146
137- def request (self , method , uri , headers = None , bodyProducer = None ):
147+ def request (
148+ self ,
149+ method : bytes ,
150+ uri : bytes ,
151+ headers : Optional [Headers ] = None ,
152+ bodyProducer : Optional [IBodyProducer ] = None ,
153+ ) -> defer .Deferred :
138154 """
139155 Issue a request to the server indicated by the given uri.
140156
@@ -146,16 +162,15 @@ def request(self, method, uri, headers=None, bodyProducer=None):
146162 See also: twisted.web.iweb.IAgent.request
147163
148164 Args:
149- method (bytes) : The request method to use, such as `GET`, `POST`, etc
165+ method: The request method to use, such as `GET`, `POST`, etc
150166
151- uri (bytes) : The location of the resource to request.
167+ uri: The location of the resource to request.
152168
153- headers (Headers|None) : Extra headers to send with the request
169+ headers: Extra headers to send with the request
154170
155- bodyProducer (IBodyProducer|None): An object which can generate bytes to
156- make up the body of this request (for example, the properly encoded
157- contents of a file for a file upload). Or, None if the request is to
158- have no body.
171+ bodyProducer: An object which can generate bytes to make up the body of
172+ this request (for example, the properly encoded contents of a file for
173+ a file upload). Or, None if the request is to have no body.
159174
160175 Returns:
161176 Deferred[IResponse]: completes when the header of the response has
@@ -253,70 +268,89 @@ def request(self, method, uri, headers=None, bodyProducer=None):
253268 )
254269
255270
256- def _http_proxy_endpoint (proxy : Optional [bytes ], reactor , ** kwargs ):
271+ def _http_proxy_endpoint (
272+ proxy : Optional [bytes ],
273+ reactor : IReactorCore ,
274+ tls_options_factory : IPolicyForHTTPS ,
275+ ** kwargs ,
276+ ) -> Tuple [Optional [IStreamClientEndpoint ], Optional [ProxyCredentials ]]:
257277 """Parses an http proxy setting and returns an endpoint for the proxy
258278
259279 Args:
260- proxy: the proxy setting in the form: [<username>:<password>@]<host>[:<port>]
261- Note that compared to other apps, this function currently lacks support
262- for specifying a protocol schema (i.e. protocol://...) .
280+ proxy: the proxy setting in the form: [scheme://][ <username>:<password>@]<host>[:<port>]
281+ This currently supports http:// and https:// proxies.
282+ A hostname without scheme is assumed to be http .
263283
264284 reactor: reactor to be used to connect to the proxy
265285
286+ tls_options_factory: the TLS options to use when connecting through a https proxy
287+
266288 kwargs: other args to be passed to HostnameEndpoint
267289
268290 Returns:
269- interfaces.IStreamClientEndpoint|None: endpoint to use to connect to the proxy,
270- or None
291+ a tuple of
292+ endpoint to use to connect to the proxy, or None
293+ ProxyCredentials or if no credentials were found, or None
294+
295+ Raise:
296+ ValueError if proxy has no hostname or unsupported scheme.
271297 """
272298 if proxy is None :
273- return None
299+ return None , None
274300
275- # Parse the connection string
276- host , port = parse_host_port ( proxy , default_port = 1080 )
277- return HostnameEndpoint ( reactor , host , port , ** kwargs )
301+ # Note: urlsplit/urlparse cannot be used here as that does not work (for Python
302+ # 3.9+) on scheme-less proxies, e.g. host:port.
303+ scheme , host , port , credentials = parse_proxy ( proxy )
278304
305+ proxy_endpoint = HostnameEndpoint (reactor , host , port , ** kwargs )
279306
280- def parse_username_password (proxy : bytes ) -> Tuple [Optional [ProxyCredentials ], bytes ]:
281- """
282- Parses the username and password from a proxy declaration e.g
283- username:password@hostname:port.
307+ if scheme == b"https" :
308+ tls_options = tls_options_factory .creatorForNetloc (host , port )
309+ proxy_endpoint = wrapClientTLS (tls_options , proxy_endpoint )
284310
285- Args:
286- proxy: The proxy connection string.
311+ return proxy_endpoint , credentials
287312
288- Returns
289- An instance of ProxyCredentials and the proxy connection string with any credentials
290- stripped, i.e u:p@host:port -> host:port. If no credentials were found, the
291- ProxyCredentials instance is replaced with None.
292- """
293- if proxy and b"@" in proxy :
294- # We use rsplit here as the password could contain an @ character
295- credentials , proxy_without_credentials = proxy .rsplit (b"@" , 1 )
296- return ProxyCredentials (credentials ), proxy_without_credentials
297313
298- return None , proxy
314+ def parse_proxy (
315+ proxy : bytes , default_scheme : bytes = b"http" , default_port : int = 1080
316+ ) -> Tuple [bytes , bytes , int , Optional [ProxyCredentials ]]:
317+ """
318+ Parse a proxy connection string.
299319
320+ Given a HTTP proxy URL, breaks it down into components and checks that it
321+ has a hostname (otherwise it is not useful to us when trying to find a
322+ proxy) and asserts that the URL has a scheme we support.
300323
301- def parse_host_port (hostport : bytes , default_port : int = None ) -> Tuple [bytes , int ]:
302- """
303- Parse the hostname and port from a proxy connection byte string.
304324
305325 Args:
306- hostport: The proxy connection string. Must be in the form 'host[:port]'.
307- default_port: The default port to return if one is not found in `hostport`.
326+ proxy: The proxy connection string. Must be in the form '[scheme://][<username>:<password>@]host[:port]'.
327+ default_scheme: The default scheme to return if one is not found in `proxy`. Defaults to http
328+ default_port: The default port to return if one is not found in `proxy`. Defaults to 1080
308329
309330 Returns:
310- A tuple containing the hostname and port. Uses `default_port` if one was not found.
331+ A tuple containing the scheme, hostname, port and ProxyCredentials.
332+ If no credentials were found, the ProxyCredentials instance is replaced with None.
333+
334+ Raise:
335+ ValueError if proxy has no hostname or unsupported scheme.
311336 """
312- if b":" in hostport :
313- host , port = hostport .rsplit (b":" , 1 )
314- try :
315- port = int (port )
316- return host , port
317- except ValueError :
318- # the thing after the : wasn't a valid port; presumably this is an
319- # IPv6 address.
320- pass
337+ # First check if we have a scheme present
338+ # Note: urlsplit/urlparse cannot be used (for Python # 3.9+) on scheme-less proxies, e.g. host:port.
339+ if b"://" not in proxy :
340+ proxy = b"" .join ([default_scheme , b"://" , proxy ])
341+
342+ url = urlparse (proxy )
343+
344+ if not url .hostname :
345+ raise ValueError ("Proxy URL did not contain a hostname! Please specify one." )
346+
347+ if url .scheme not in (b"http" , b"https" ):
348+ raise ValueError (
349+ f"Unknown proxy scheme { url .scheme !s} ; only 'http' and 'https' is supported."
350+ )
351+
352+ credentials = None
353+ if url .username and url .password :
354+ credentials = ProxyCredentials (b"" .join ([url .username , b":" , url .password ]))
321355
322- return hostport , default_port
356+ return url . scheme , url . hostname , url . port or default_port , credentials
0 commit comments