diff --git a/.coveragerc b/.coveragerc index 3d57cf76..f7570d40 100644 --- a/.coveragerc +++ b/.coveragerc @@ -4,7 +4,6 @@ omit = hyper/compat.py hyper/httplib_compat.py hyper/ssl_compat.py - hyper/packages/* [report] fail_under = 100 diff --git a/.travis.yml b/.travis.yml index 18b52dbf..4ea96d6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ matrix: install: - ".travis/install.sh" -before_script: "flake8 --max-complexity 15 --exclude 'hyper/packages/*' hyper test" +before_script: "flake8 --max-complexity 15 hyper test" script: - ".travis/run.sh" diff --git a/hyper/common/util.py b/hyper/common/util.py index a2278b54..2f286e10 100644 --- a/hyper/common/util.py +++ b/hyper/common/util.py @@ -8,7 +8,7 @@ from enum import Enum from hyper.compat import unicode, bytes, imap -from ..packages.rfc3986.uri import URIReference +from rfc3986 import URIReference from ..compat import is_py3 diff --git a/hyper/packages/__init__.py b/hyper/packages/__init__.py deleted file mode 100644 index 4cf1e653..00000000 --- a/hyper/packages/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# -*- coding: utf-8 -*- -""" -hyper/packages -~~~~~~~~~~~~~~ - -This module contains external packages that are vendored into hyper. -""" diff --git a/hyper/packages/rfc3986/LICENSE b/hyper/packages/rfc3986/LICENSE deleted file mode 100644 index 72ce24cf..00000000 --- a/hyper/packages/rfc3986/LICENSE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2014 Ian Cordasco, Rackspace - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/hyper/packages/rfc3986/__init__.py b/hyper/packages/rfc3986/__init__.py deleted file mode 100644 index a3aea4c4..00000000 --- a/hyper/packages/rfc3986/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -rfc3986 -======= - -An implementation of semantics and validations described in RFC 3986. See -http://rfc3986.rtfd.org/ for documentation. - -:copyright: (c) 2014 Rackspace -:license: Apache v2.0, see LICENSE for details -""" - -__title__ = 'rfc3986' -__author__ = 'Ian Cordasco' -__author_email__ = 'ian.cordasco@rackspace.com' -__license__ = 'Apache v2.0' -__copyright__ = 'Copyright 2014 Rackspace' -__version__ = '0.3.0' - -from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri, - urlparse) -from .parseresult import ParseResult - -__all__ = ( - 'ParseResult', - 'URIReference', - 'is_valid_uri', - 'normalize_uri', - 'uri_reference', - 'urlparse', -) diff --git a/hyper/packages/rfc3986/api.py b/hyper/packages/rfc3986/api.py deleted file mode 100644 index 3e9e401a..00000000 --- a/hyper/packages/rfc3986/api.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -rfc3986.api -~~~~~~~~~~~ - -This defines the simple API to rfc3986. This module defines 3 functions and -provides access to the class ``URIReference``. -""" - -from .uri import URIReference -from .parseresult import ParseResult - - -def uri_reference(uri, encoding='utf-8'): - """Parse a URI string into a URIReference. - - This is a convenience function. You could achieve the same end by using - ``URIReference.from_string(uri)``. - - :param str uri: The URI which needs to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: A parsed URI - :rtype: :class:`URIReference` - """ - return URIReference.from_string(uri, encoding) - - -def is_valid_uri(uri, encoding='utf-8', **kwargs): - """Determine if the URI given is valid. - - This is a convenience function. You could use either - ``uri_reference(uri).is_valid()`` or - ``URIReference.from_string(uri).is_valid()`` to achieve the same result. - - :param str uri: The URI to be validated. - :param str encoding: The encoding of the string provided - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require the - presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require the - presence of the fragment component. - :returns: ``True`` if the URI is valid, ``False`` otherwise. - :rtype: bool - """ - return URIReference.from_string(uri, encoding).is_valid(**kwargs) - - -def normalize_uri(uri, encoding='utf-8'): - """Normalize the given URI. - - This is a convenience function. You could use either - ``uri_reference(uri).normalize().unsplit()`` or - ``URIReference.from_string(uri).normalize().unsplit()`` instead. - - :param str uri: The URI to be normalized. - :param str encoding: The encoding of the string provided - :returns: The normalized URI. - :rtype: str - """ - normalized_reference = URIReference.from_string(uri, encoding).normalize() - return normalized_reference.unsplit() - - -def urlparse(uri, encoding='utf-8'): - """Parse a given URI and return a ParseResult. - - This is a partial replacement of the standard library's urlparse function. - - :param str uri: The URI to be parsed. - :param str encoding: The encoding of the string provided. - :returns: A parsed URI - :rtype: :class:`~rfc3986.parseresult.ParseResult` - """ - return ParseResult.from_string(uri, encoding, strict=False) diff --git a/hyper/packages/rfc3986/compat.py b/hyper/packages/rfc3986/compat.py deleted file mode 100644 index 6fc7f6d8..00000000 --- a/hyper/packages/rfc3986/compat.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys - - -if sys.version_info >= (3, 0): - unicode = str # Python 3.x - - -def to_str(b, encoding): - if hasattr(b, 'decode') and not isinstance(b, unicode): - b = b.decode('utf-8') - return b - - -def to_bytes(s, encoding): - if hasattr(s, 'encode') and not isinstance(s, bytes): - s = s.encode('utf-8') - return s diff --git a/hyper/packages/rfc3986/exceptions.py b/hyper/packages/rfc3986/exceptions.py deleted file mode 100644 index f9adbde7..00000000 --- a/hyper/packages/rfc3986/exceptions.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -class RFC3986Exception(Exception): - pass - - -class InvalidAuthority(RFC3986Exception): - def __init__(self, authority): - super(InvalidAuthority, self).__init__( - "The authority ({0}) is not valid.".format(authority)) - - -class InvalidPort(RFC3986Exception): - def __init__(self, port): - super(InvalidPort, self).__init__( - 'The port ("{0}") is not valid.'.format(port)) - - -class ResolutionError(RFC3986Exception): - def __init__(self, uri): - super(ResolutionError, self).__init__( - "{0} is not an absolute URI.".format(uri.unsplit())) diff --git a/hyper/packages/rfc3986/misc.py b/hyper/packages/rfc3986/misc.py deleted file mode 100644 index c599434c..00000000 --- a/hyper/packages/rfc3986/misc.py +++ /dev/null @@ -1,214 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -rfc3986.misc -~~~~~~~~~~~~ - -This module contains important constants, patterns, and compiled regular -expressions for parsing and validating URIs and their components. -""" - -import re - -# These are enumerated for the named tuple used as a superclass of -# URIReference -URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment'] - -important_characters = { - 'generic_delimiters': ":/?#[]@", - 'sub_delimiters': "!$&'()*+,;=", - # We need to escape the '*' in this case - 're_sub_delimiters': "!$&'()\*+,;=", - 'unreserved_chars': ('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' - '0123456789._~-'), - # We need to escape the '-' in this case: - 're_unreserved': 'A-Za-z0-9._~\-', - } -# For details about delimiters and reserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.2 -GENERIC_DELIMITERS = set(important_characters['generic_delimiters']) -SUB_DELIMITERS = set(important_characters['sub_delimiters']) -RESERVED_CHARS = GENERIC_DELIMITERS.union(SUB_DELIMITERS) -# For details about unreserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED_CHARS = set(important_characters['unreserved_chars']) -NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%') - -# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B -component_pattern_dict = { - 'scheme': '[^:/?#]+', - 'authority': '[^/?#]*', - 'path': '[^?#]*', - 'query': '[^#]*', - 'fragment': '.*', - } - -# See http://tools.ietf.org/html/rfc3986#appendix-B -# In this case, we name each of the important matches so we can use -# SRE_Match#groupdict to parse the values out if we so choose. This is also -# modified to ignore other matches that are not important to the parsing of -# the reference so we can also simply use SRE_Match#groups. -expression = ('(?:(?P{scheme}):)?(?://(?P{authority}))?' - '(?P{path})(?:\?(?P{query}))?' - '(?:#(?P{fragment}))?' - ).format(**component_pattern_dict) - -URI_MATCHER = re.compile(expression) - -# ######################### -# Authority Matcher Section -# ######################### - -# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 -# The pattern for a regular name, e.g., www.google.com, api.github.com -reg_name = '(({0})*|[{1}]*)'.format( - '%[0-9A-Fa-f]{2}', - important_characters['re_sub_delimiters'] + - important_characters['re_unreserved'] - ) -# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, -ipv4 = '(\d{1,3}.){3}\d{1,3}' -# Hexadecimal characters used in each piece of an IPv6 address -hexdig = '[0-9A-Fa-f]{1,4}' -# Least-significant 32 bits of an IPv6 address -ls32 = '({hex}:{hex}|{ipv4})'.format(hex=hexdig, ipv4=ipv4) -# Substitutions into the following patterns for IPv6 patterns defined -# http://tools.ietf.org/html/rfc3986#page-20 -subs = {'hex': hexdig, 'ls32': ls32} - -# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details -# about ABNF (Augmented Backus-Naur Form) use in the comments -variations = [ - # 6( h16 ":" ) ls32 - '(%(hex)s:){6}%(ls32)s' % subs, - # "::" 5( h16 ":" ) ls32 - '::(%(hex)s:){5}%(ls32)s' % subs, - # [ h16 ] "::" 4( h16 ":" ) ls32 - '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % subs, - # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % subs, - # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % subs, - # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % subs, - # [ *4( h16 ":" ) h16 ] "::" ls32 - '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % subs, - # [ *5( h16 ":" ) h16 ] "::" h16 - '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % subs, - # [ *6( h16 ":" ) h16 ] "::" - '((%(hex)s:){0,6}%(hex)s)?::' % subs, - ] - -ipv6 = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format(*variations) - -ipv_future = 'v[0-9A-Fa-f]+.[%s]+' % ( - important_characters['re_unreserved'] + - important_characters['re_sub_delimiters'] + - ':') - -ip_literal = '\[({0}|{1})\]'.format(ipv6, ipv_future) - -# Pattern for matching the host piece of the authority -HOST_PATTERN = '({0}|{1}|{2})'.format(reg_name, ipv4, ip_literal) - -SUBAUTHORITY_MATCHER = re.compile(( - '^(?:(?P[A-Za-z0-9_.~\-%:]+)@)?' # userinfo - '(?P{0}?)' # host - ':?(?P\d+)?$' # port - ).format(HOST_PATTERN)) - -IPv4_MATCHER = re.compile('^' + ipv4 + '$') - - -# #################### -# Path Matcher Section -# #################### - -# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information -# about the path patterns defined below. - -# Percent encoded character values -pct_encoded = '%[A-Fa-f0-9]{2}' -pchar = ('([' + important_characters['re_unreserved'] - + important_characters['re_sub_delimiters'] - + ':@]|%s)' % pct_encoded) -segments = { - 'segment': pchar + '*', - # Non-zero length segment - 'segment-nz': pchar + '+', - # Non-zero length segment without ":" - 'segment-nz-nc': pchar.replace(':', '') + '+' - } - -# Path types taken from Section 3.3 (linked above) -path_empty = '^$' -path_rootless = '%(segment-nz)s(/%(segment)s)*' % segments -path_noscheme = '%(segment-nz-nc)s(/%(segment)s)*' % segments -path_absolute = '/(%s)?' % path_rootless -path_abempty = '(/%(segment)s)*' % segments - -# Matcher used to validate path components -PATH_MATCHER = re.compile('^(%s|%s|%s|%s|%s)$' % ( - path_abempty, path_absolute, path_noscheme, path_rootless, path_empty - )) - - -# ################################## -# Query and Fragment Matcher Section -# ################################## - -QUERY_MATCHER = re.compile( - '^([/?:@' + important_characters['re_unreserved'] - + important_characters['re_sub_delimiters'] - + ']|%s)*$' % pct_encoded) - -FRAGMENT_MATCHER = QUERY_MATCHER - -# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 -SCHEME_MATCHER = re.compile('^[A-Za-z][A-Za-z0-9+.\-]*$') - -# Relative reference matcher - -# See http://tools.ietf.org/html/rfc3986#section-4.2 for details -relative_part = '(//%s%s|%s|%s|%s)' % ( - component_pattern_dict['authority'], path_abempty, path_absolute, - path_noscheme, path_empty - ) - -RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % ( - relative_part, QUERY_MATCHER.pattern, FRAGMENT_MATCHER.pattern - )) - -# See http://tools.ietf.org/html/rfc3986#section-3 for definition -hier_part = '(//%s%s|%s|%s|%s)' % ( - component_pattern_dict['authority'], path_abempty, path_absolute, - path_rootless, path_empty - ) - -# See http://tools.ietf.org/html/rfc3986#section-4.3 -ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % ( - component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern[1:-1] - )) - - -# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 -def merge_paths(base_uri, relative_path): - """Merge a base URI's path with a relative URI's path.""" - if base_uri.path is None and base_uri.authority is not None: - return '/' + relative_path - else: - path = base_uri.path or '' - index = path.rfind('/') - return path[:index] + '/' + relative_path diff --git a/hyper/packages/rfc3986/normalizers.py b/hyper/packages/rfc3986/normalizers.py deleted file mode 100644 index bb0630cb..00000000 --- a/hyper/packages/rfc3986/normalizers.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import re - -from .compat import to_bytes -from .misc import NON_PCT_ENCODED - - -def normalize_scheme(scheme): - return scheme.lower() - - -def normalize_authority(authority): - userinfo, host, port = authority - result = '' - if userinfo: - result += normalize_percent_characters(userinfo) + '@' - if host: - result += host.lower() - if port: - result += ':' + port - return result - - -def normalize_path(path): - if not path: - return path - - path = normalize_percent_characters(path) - return remove_dot_segments(path) - - -def normalize_query(query): - return normalize_percent_characters(query) - - -def normalize_fragment(fragment): - return normalize_percent_characters(fragment) - - -PERCENT_MATCHER = re.compile('%[A-Fa-f0-9]{2}') - - -def normalize_percent_characters(s): - """All percent characters should be upper-cased. - - For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. - """ - matches = set(PERCENT_MATCHER.findall(s)) - for m in matches: - if not m.isupper(): - s = s.replace(m, m.upper()) - return s - - -def remove_dot_segments(s): - # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code - segments = s.split('/') # Turn the path into a list of segments - output = [] # Initialize the variable to use to store output - - for segment in segments: - # '.' is the current directory, so ignore it, it is superfluous - if segment == '.': - continue - # Anything other than '..', should be appended to the output - elif segment != '..': - output.append(segment) - # In this case segment == '..', if we can, we should pop the last - # element - elif output: - output.pop() - - # If the path starts with '/' and the output is empty or the first string - # is non-empty - if s.startswith('/') and (not output or output[0]): - output.insert(0, '') - - # If the path starts with '/.' or '/..' ensure we add one more empty - # string to add a trailing '/' - if s.endswith(('/.', '/..')): - output.append('') - - return '/'.join(output) - - -def encode_component(uri_component, encoding): - if uri_component is None: - return uri_component - - uri_bytes = to_bytes(uri_component, encoding) - - encoded_uri = bytearray() - - for i in range(0, len(uri_bytes)): - # Will return a single character bytestring on both Python 2 & 3 - byte = uri_bytes[i:i+1] - byte_ord = ord(byte) - if byte_ord < 128 and byte.decode() in NON_PCT_ENCODED: - encoded_uri.extend(byte) - continue - encoded_uri.extend('%{0:02x}'.format(byte_ord).encode()) - - return encoded_uri.decode(encoding) diff --git a/hyper/packages/rfc3986/parseresult.py b/hyper/packages/rfc3986/parseresult.py deleted file mode 100644 index 2def55b6..00000000 --- a/hyper/packages/rfc3986/parseresult.py +++ /dev/null @@ -1,303 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2015 Ian Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple - -from . import compat -from . import exceptions -from . import normalizers -from . import uri - -__all__ = ('ParseResult', 'ParseResultBytes') - -PARSED_COMPONENTS = ('scheme', 'userinfo', 'host', 'port', 'path', 'query', - 'fragment') - - -class ParseResultMixin(object): - def _generate_authority(self, attributes): - # I swear I did not align the comparisons below. That's just how they - # happened to align based on pep8 and attribute lengths. - userinfo, host, port = (attributes[p] - for p in ('userinfo', 'host', 'port')) - if (self.userinfo != userinfo or - self.host != host or - self.port != port): - if port: - port = '{0}'.format(port) - return normalizers.normalize_authority( - (compat.to_str(userinfo, self.encoding), - compat.to_str(host, self.encoding), - port) - ) - return self.authority - - def geturl(self): - """Standard library shim to the unsplit method.""" - return self.unsplit() - - @property - def hostname(self): - """Standard library shim for the host portion of the URI.""" - return self.host - - @property - def netloc(self): - """Standard library shim for the authority portion of the URI.""" - return self.authority - - @property - def params(self): - """Standard library shim for the query portion of the URI.""" - return self.query - - -class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), - ParseResultMixin): - slots = () - - def __new__(cls, scheme, userinfo, host, port, path, query, fragment, - uri_ref, encoding='utf-8'): - parse_result = super(ParseResult, cls).__new__( - cls, - scheme or None, - userinfo or None, - host, - port or None, - path or None, - query or None, - fragment or None) - parse_result.encoding = encoding - parse_result.reference = uri_ref - return parse_result - - @classmethod - def from_string(cls, uri_string, encoding='utf-8', strict=True): - """Parse a URI from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :param bool strict: Parse strictly according to :rfc:`3986` if True. - If False, parse similarly to the standard library's urlparse - function. - :returns: :class:`ParseResult` or subclass thereof - """ - reference = uri.URIReference.from_string(uri_string, encoding) - try: - subauthority = reference.authority_info() - except exceptions.InvalidAuthority: - if strict: - raise - userinfo, host, port = split_authority(reference.authority) - else: - # Thanks to Richard Barrell for this idea: - # https://twitter.com/0x2ba22e11/status/617338811975139328 - userinfo, host, port = (subauthority.get(p) - for p in ('userinfo', 'host', 'port')) - - if port: - try: - port = int(port) - except ValueError: - raise exceptions.InvalidPort(port) - - return cls(scheme=reference.scheme, - userinfo=userinfo, - host=host, - port=port, - path=reference.path, - query=reference.query, - fragment=reference.fragment, - uri_ref=reference, - encoding=encoding) - - @property - def authority(self): - """Normalized authority generated from the subauthority parts.""" - return self.reference.authority - - def copy_with(self, scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment=None): - attributes = zip(PARSED_COMPONENTS, - (scheme, userinfo, host, port, path, query, fragment)) - attrs_dict = {} - for name, value in attributes: - if value is None: - value = getattr(self, name) - attrs_dict[name] = value - authority = self._generate_authority(attrs_dict) - ref = self.reference.copy_with(scheme=attrs_dict['scheme'], - authority=authority, - path=attrs_dict['path'], - query=attrs_dict['query'], - fragment=attrs_dict['fragment']) - return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict) - - def encode(self, encoding=None): - encoding = encoding or self.encoding - attrs = dict( - zip(PARSED_COMPONENTS, - (attr.encode(encoding) if hasattr(attr, 'encode') else attr - for attr in self))) - return ParseResultBytes( - uri_ref=self.reference, - encoding=encoding, - **attrs - ) - - def unsplit(self, use_idna=False): - """Create a URI string from the components. - - :returns: The parsed URI reconstituted as a string. - :rtype: str - """ - parse_result = self - if use_idna and self.host: - hostbytes = self.host.encode('idna') - host = hostbytes.decode(self.encoding) - parse_result = self.copy_with(host=host) - return parse_result.reference.unsplit() - - -class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), - ParseResultMixin): - def __new__(cls, scheme, userinfo, host, port, path, query, fragment, - uri_ref, encoding='utf-8'): - parse_result = super(ParseResultBytes, cls).__new__( - cls, - scheme or None, - userinfo or None, - host, - port or None, - path or None, - query or None, - fragment or None) - parse_result.encoding = encoding - parse_result.reference = uri_ref - return parse_result - - @classmethod - def from_string(cls, uri_string, encoding='utf-8', strict=True): - """Parse a URI from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :param bool strict: Parse strictly according to :rfc:`3986` if True. - If False, parse similarly to the standard library's urlparse - function. - :returns: :class:`ParseResultBytes` or subclass thereof - """ - reference = uri.URIReference.from_string(uri_string, encoding) - try: - subauthority = reference.authority_info() - except exceptions.InvalidAuthority: - if strict: - raise - userinfo, host, port = split_authority(reference.authority) - else: - # Thanks to Richard Barrell for this idea: - # https://twitter.com/0x2ba22e11/status/617338811975139328 - userinfo, host, port = (subauthority.get(p) - for p in ('userinfo', 'host', 'port')) - - if port: - try: - port = int(port) - except ValueError: - raise exceptions.InvalidPort(port) - - to_bytes = compat.to_bytes - return cls(scheme=to_bytes(reference.scheme, encoding), - userinfo=to_bytes(userinfo, encoding), - host=to_bytes(host, encoding), - port=port, - path=to_bytes(reference.path, encoding), - query=to_bytes(reference.query, encoding), - fragment=to_bytes(reference.fragment, encoding), - uri_ref=reference, - encoding=encoding) - - @property - def authority(self): - """Normalized authority generated from the subauthority parts.""" - return self.reference.authority.encode(self.encoding) - - def copy_with(self, scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment=None): - attributes = zip(PARSED_COMPONENTS, - (scheme, userinfo, host, port, path, query, fragment)) - attrs_dict = {} - for name, value in attributes: - if value is None: - value = getattr(self, name) - if not isinstance(value, bytes) and hasattr(value, 'encode'): - value = value.encode(self.encoding) - attrs_dict[name] = value - authority = self._generate_authority(attrs_dict) - to_str = compat.to_str - ref = self.reference.copy_with( - scheme=to_str(attrs_dict['scheme'], self.encoding), - authority=authority, - path=to_str(attrs_dict['path'], self.encoding), - query=to_str(attrs_dict['query'], self.encoding), - fragment=to_str(attrs_dict['fragment'], self.encoding) - ) - return ParseResultBytes( - uri_ref=ref, - encoding=self.encoding, - **attrs_dict - ) - - def unsplit(self, use_idna=False): - """Create a URI bytes object from the components. - - :returns: The parsed URI reconstituted as a string. - :rtype: bytes - """ - parse_result = self - if use_idna and self.host: - # self.host is bytes, to encode to idna, we need to decode it - # first - host = self.host.decode(self.encoding) - hostbytes = host.encode('idna') - parse_result = self.copy_with(host=hostbytes) - uri = parse_result.reference.unsplit() - return uri.encode(self.encoding) - - -def split_authority(authority): - # Initialize our expected return values - userinfo = host = port = None - # Initialize an extra var we may need to use - extra_host = None - # Set-up rest in case there is no userinfo portion - rest = authority - - if '@' in authority: - userinfo, rest = authority.rsplit('@', 1) - - # Handle IPv6 host addresses - if rest.startswith('['): - host, rest = rest.split(']', 1) - host += ']' - - if ':' in rest: - extra_host, port = rest.split(':', 1) - elif not host and rest: - host = rest - - if extra_host and not host: - host = extra_host - - return userinfo, host, port diff --git a/hyper/packages/rfc3986/uri.py b/hyper/packages/rfc3986/uri.py deleted file mode 100644 index b7f5ccb7..00000000 --- a/hyper/packages/rfc3986/uri.py +++ /dev/null @@ -1,385 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Copyright (c) 2015 Ian Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple - -from .compat import to_str -from .exceptions import InvalidAuthority, ResolutionError -from .misc import ( - ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER, - QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER, - URI_COMPONENTS, merge_paths - ) -from .normalizers import ( - encode_component, normalize_scheme, normalize_authority, normalize_path, - normalize_query, normalize_fragment - ) - - -class URIReference(namedtuple('URIReference', URI_COMPONENTS)): - slots = () - - def __new__(cls, scheme, authority, path, query, fragment, - encoding='utf-8'): - ref = super(URIReference, cls).__new__( - cls, - scheme or None, - authority or None, - path or None, - query or None, - fragment or None) - ref.encoding = encoding - return ref - - def __eq__(self, other): - other_ref = other - if isinstance(other, tuple): - other_ref = URIReference(*other) - elif not isinstance(other, URIReference): - try: - other_ref = URIReference.from_string(other) - except TypeError: - raise TypeError( - 'Unable to compare URIReference() to {0}()'.format( - type(other).__name__)) - - # See http://tools.ietf.org/html/rfc3986#section-6.2 - naive_equality = tuple(self) == tuple(other_ref) - return naive_equality or self.normalized_equality(other_ref) - - @classmethod - def from_string(cls, uri_string, encoding='utf-8'): - """Parse a URI reference from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: :class:`URIReference` or subclass thereof - """ - uri_string = to_str(uri_string, encoding) - - split_uri = URI_MATCHER.match(uri_string).groupdict() - return cls(split_uri['scheme'], split_uri['authority'], - encode_component(split_uri['path'], encoding), - encode_component(split_uri['query'], encoding), - encode_component(split_uri['fragment'], encoding), encoding) - - def authority_info(self): - """Returns a dictionary with the ``userinfo``, ``host``, and ``port``. - - If the authority is not valid, it will raise a ``InvalidAuthority`` - Exception. - - :returns: - ``{'userinfo': 'username:password', 'host': 'www.example.com', - 'port': '80'}`` - :rtype: dict - :raises InvalidAuthority: If the authority is not ``None`` and can not - be parsed. - """ - if not self.authority: - return {'userinfo': None, 'host': None, 'port': None} - - match = SUBAUTHORITY_MATCHER.match(self.authority) - - if match is None: - # In this case, we have an authority that was parsed from the URI - # Reference, but it cannot be further parsed by our - # SUBAUTHORITY_MATCHER. In this case it must not be a valid - # authority. - raise InvalidAuthority(self.authority.encode(self.encoding)) - - # We had a match, now let's ensure that it is actually a valid host - # address if it is IPv4 - matches = match.groupdict() - host = matches.get('host') - - if (host and IPv4_MATCHER.match(host) and not - valid_ipv4_host_address(host)): - # If we have a host, it appears to be IPv4 and it does not have - # valid bytes, it is an InvalidAuthority. - raise InvalidAuthority(self.authority.encode(self.encoding)) - - return matches - - @property - def host(self): - """If present, a string representing the host.""" - try: - authority = self.authority_info() - except InvalidAuthority: - return None - return authority['host'] - - @property - def port(self): - """If present, the port (as a string) extracted from the authority.""" - try: - authority = self.authority_info() - except InvalidAuthority: - return None - return authority['port'] - - @property - def userinfo(self): - """If present, the userinfo extracted from the authority.""" - try: - authority = self.authority_info() - except InvalidAuthority: - return None - return authority['userinfo'] - - def is_absolute(self): - """Determine if this URI Reference is an absolute URI. - - See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. - - :returns: ``True`` if it is an absolute URI, ``False`` otherwise. - :rtype: bool - """ - return bool(ABSOLUTE_URI_MATCHER.match(self.unsplit())) - - def is_valid(self, **kwargs): - """Determines if the URI is valid. - - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require - the presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require - the presence of the fragment component. - :returns: ``True`` if the URI is valid. ``False`` otherwise. - :rtype: bool - """ - validators = [ - (self.scheme_is_valid, kwargs.get('require_scheme', False)), - (self.authority_is_valid, kwargs.get('require_authority', False)), - (self.path_is_valid, kwargs.get('require_path', False)), - (self.query_is_valid, kwargs.get('require_query', False)), - (self.fragment_is_valid, kwargs.get('require_fragment', False)), - ] - return all(v(r) for v, r in validators) - - def _is_valid(self, value, matcher, require): - if require: - return (value is not None - and matcher.match(value)) - - # require is False and value is not None - return value is None or matcher.match(value) - - def authority_is_valid(self, require=False): - """Determines if the authority component is valid. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the authority is valid. ``False`` otherwise. - :rtype: bool - """ - try: - self.authority_info() - except InvalidAuthority: - return False - - is_valid = self._is_valid(self.authority, - SUBAUTHORITY_MATCHER, - require) - - # Ensure that IPv4 addresses have valid bytes - if is_valid and self.host and IPv4_MATCHER.match(self.host): - return valid_ipv4_host_address(self.host) - - # Perhaps the host didn't exist or if it did, it wasn't an IPv4-like - # address. In either case, we want to rely on the `_is_valid` check, - # so let's return that. - return is_valid - - def scheme_is_valid(self, require=False): - """Determines if the scheme component is valid. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the scheme is valid. ``False`` otherwise. - :rtype: bool - """ - return self._is_valid(self.scheme, SCHEME_MATCHER, require) - - def path_is_valid(self, require=False): - """Determines if the path component is valid. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the path is valid. ``False`` otherwise. - :rtype: bool - """ - return self._is_valid(self.path, PATH_MATCHER, require) - - def query_is_valid(self, require=False): - """Determines if the query component is valid. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the query is valid. ``False`` otherwise. - :rtype: bool - """ - return self._is_valid(self.query, QUERY_MATCHER, require) - - def fragment_is_valid(self, require=False): - """Determines if the fragment component is valid. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the fragment is valid. ``False`` otherwise. - :rtype: bool - """ - return self._is_valid(self.fragment, FRAGMENT_MATCHER, require) - - def normalize(self): - """Normalize this reference as described in Section 6.2.2 - - This is not an in-place normalization. Instead this creates a new - URIReference. - - :returns: A new reference object with normalized components. - :rtype: URIReference - """ - # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in - # this method. - return URIReference(normalize_scheme(self.scheme or ''), - normalize_authority( - (self.userinfo, self.host, self.port)), - normalize_path(self.path or ''), - normalize_query(self.query or ''), - normalize_fragment(self.fragment or '')) - - def normalized_equality(self, other_ref): - """Compare this URIReference to another URIReference. - - :param URIReference other_ref: (required), The reference with which - we're comparing. - :returns: ``True`` if the references are equal, ``False`` otherwise. - :rtype: bool - """ - return tuple(self.normalize()) == tuple(other_ref.normalize()) - - def resolve_with(self, base_uri, strict=False): - """Use an absolute URI Reference to resolve this relative reference. - - Assuming this is a relative reference that you would like to resolve, - use the provided base URI to resolve it. - - See http://tools.ietf.org/html/rfc3986#section-5 for more information. - - :param base_uri: Either a string or URIReference. It must be an - absolute URI or it will raise an exception. - :returns: A new URIReference which is the result of resolving this - reference using ``base_uri``. - :rtype: :class:`URIReference` - :raises ResolutionError: If the ``base_uri`` is not an absolute URI. - """ - if not isinstance(base_uri, URIReference): - base_uri = URIReference.from_string(base_uri) - - if not base_uri.is_absolute(): - raise ResolutionError(base_uri) - - # This is optional per - # http://tools.ietf.org/html/rfc3986#section-5.2.1 - base_uri = base_uri.normalize() - - # The reference we're resolving - resolving = self - - if not strict and resolving.scheme == base_uri.scheme: - resolving = resolving.copy_with(scheme=None) - - # http://tools.ietf.org/html/rfc3986#page-32 - if resolving.scheme is not None: - target = resolving.copy_with(path=normalize_path(resolving.path)) - else: - if resolving.authority is not None: - target = resolving.copy_with( - scheme=base_uri.scheme, - path=normalize_path(resolving.path) - ) - else: - if resolving.path is None: - if resolving.query is not None: - query = resolving.query - else: - query = base_uri.query - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=base_uri.path, - query=query - ) - else: - if resolving.path.startswith('/'): - path = normalize_path(resolving.path) - else: - path = normalize_path( - merge_paths(base_uri, resolving.path) - ) - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=path, - query=resolving.query - ) - return target - - def unsplit(self): - """Create a URI string from the components. - - :returns: The URI Reference reconstituted as a string. - :rtype: str - """ - # See http://tools.ietf.org/html/rfc3986#section-5.3 - result_list = [] - if self.scheme: - result_list.extend([self.scheme, ':']) - if self.authority: - result_list.extend(['//', self.authority]) - if self.path: - result_list.append(self.path) - if self.query: - result_list.extend(['?', self.query]) - if self.fragment: - result_list.extend(['#', self.fragment]) - return ''.join(result_list) - - def copy_with(self, scheme=None, authority=None, path=None, query=None, - fragment=None): - attributes = { - 'scheme': scheme, - 'authority': authority, - 'path': path, - 'query': query, - 'fragment': fragment, - } - for key, value in list(attributes.items()): - if value is None: - del attributes[key] - return self._replace(**attributes) - - -def valid_ipv4_host_address(host): - # If the host exists, and it might be IPv4, check each byte in the - # address. - return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')]) diff --git a/setup.py b/setup.py index 861c883a..a2578a6b 100644 --- a/setup.py +++ b/setup.py @@ -49,8 +49,6 @@ def run_tests(self): 'hyper.http20', 'hyper.common', 'hyper.http11', - 'hyper.packages', - 'hyper.packages.rfc3986' ] setup( @@ -78,7 +76,9 @@ def run_tests(self): 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: Implementation :: CPython', ], - install_requires=['h2>=2.4,<3.0,!=2.5.0', 'hyperframe>=3.2,<4.0'], + install_requires=[ + 'h2>=2.4,<3.0,!=2.5.0', 'hyperframe>=3.2,<4.0', 'rfc3986>=1.1.0,<2.0' + ], tests_require=['pytest', 'requests', 'mock'], cmdclass={'test': PyTest}, entry_points={ diff --git a/test/test_hyper.py b/test/test_hyper.py index 0556bb0c..76a68cfe 100644 --- a/test/test_hyper.py +++ b/test/test_hyper.py @@ -67,6 +67,16 @@ def test_connections_accept_proxy_hosts_and_ports(self): assert c.proxy_host == 'localhost' assert c.proxy_port == 8443 + def test_connections_can_parse_proxy_hosts_with_userinfo(self): + c = HTTP20Connection('www.google.com', + proxy_host='azAz09!==:fakepaswd@localhost:8443') + # Note that the userinfo part is getting stripped out, + # it's not automatically added as Basic Auth header to + # the proxy_headers! It should be done manually. + assert c.host == 'www.google.com' + assert c.proxy_host == 'localhost' + assert c.proxy_port == 8443 + def test_connections_can_parse_proxy_hosts_and_ports(self): c = HTTP20Connection('www.google.com', proxy_host='localhost', diff --git a/tox.ini b/tox.ini index a35f850f..311f9c97 100644 --- a/tox.ini +++ b/tox.ini @@ -14,4 +14,4 @@ commands= py.test {toxinidir}/test/ [testenv:lint] basepython=python3.5 deps = flake8==2.5.4 -commands = flake8 --max-complexity 15 --exclude "hyper/packages/*" hyper test +commands = flake8 --max-complexity 15 hyper test