diff --git a/docs/source/changes.rst b/docs/source/changes.rst index da8fe0c..1500861 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -10,6 +10,11 @@ Bug fixes: * Always return headers as ``bytes`` objects (`#60 `__) +* Allow a broader range of characters in header values. This violates + the RFC, but is apparently required for compatibility with + real-world code, like Google Analytics cookies (`#57 + `__, `#58 + `__) Other changes: diff --git a/h11/_abnf.py b/h11/_abnf.py index 57e2804..39b0c8b 100644 --- a/h11/_abnf.py +++ b/h11/_abnf.py @@ -44,7 +44,14 @@ # See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 # # So our definition of field_content attempts to fix it up... -vchar_or_obs_text = r"[\x21-\xff]" +# +# Also, we allow lots of control characters, because apparently people assume +# that they're legal in practice (e.g., google analytics makes cookies with +# \x01 in them!): +# https://github.com/python-hyper/h11/issues/57 +# We still don't allow NUL or whitespace, because those are often treated as +# meta-characters and letting them through can lead to nasty issues like SSRF. +vchar_or_obs_text = r"[^\x00\s]" field_vchar = vchar_or_obs_text field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals()) diff --git a/h11/tests/test_events.py b/h11/tests/test_events.py index adfe7bb..d5cd5fd 100644 --- a/h11/tests/test_events.py +++ b/h11/tests/test_events.py @@ -82,10 +82,17 @@ def test_events(): http_version="1.0") # Header values are validated - with pytest.raises(LocalProtocolError): - req = Request(method="GET", target="/", - headers=[("Host", "a"), ("Foo", " asd\x00")], - http_version="1.0") + for bad_char in "\x00\r\n\f\v": + with pytest.raises(LocalProtocolError): + req = Request(method="GET", target="/", + headers=[("Host", "a"), ("Foo", "asd" + bad_char)], + http_version="1.0") + + # But for compatibility we allow non-whitespace control characters, even + # though they're forbidden by the spec. + Request(method="GET", target="/", + headers=[("Host", "a"), ("Foo", "asd\x01\x02\x7f")], + http_version="1.0") ir = InformationalResponse(status_code=100, headers=[("Host", "a")]) assert ir.status_code == 100 diff --git a/h11/tests/test_headers.py b/h11/tests/test_headers.py index 39e442a..497aedd 100644 --- a/h11/tests/test_headers.py +++ b/h11/tests/test_headers.py @@ -18,6 +18,12 @@ def test_normalize_and_validate(): assert "foo bar" in str(excinfo.value) with pytest.raises(LocalProtocolError): normalize_and_validate([(b"foo\x00bar", b"baz")]) + # Not even 8-bit characters: + with pytest.raises(LocalProtocolError): + normalize_and_validate([(b"foo\xffbar", b"baz")]) + # And not even the control characters we allow in values: + with pytest.raises(LocalProtocolError): + normalize_and_validate([(b"foo\x01bar", b"baz")]) # no return or NUL characters in values with pytest.raises(LocalProtocolError) as excinfo: @@ -29,7 +35,13 @@ def test_normalize_and_validate(): normalize_and_validate([("foo", "bar\x00baz")]) # no leading/trailing whitespace with pytest.raises(LocalProtocolError): - normalize_and_validate([("foo", " barbaz ")]) + normalize_and_validate([("foo", "barbaz ")]) + with pytest.raises(LocalProtocolError): + normalize_and_validate([("foo", " barbaz")]) + with pytest.raises(LocalProtocolError): + normalize_and_validate([("foo", "barbaz\t")]) + with pytest.raises(LocalProtocolError): + normalize_and_validate([("foo", "\tbarbaz")]) # content-length assert (normalize_and_validate([("Content-Length", "1")]) diff --git a/h11/tests/test_io.py b/h11/tests/test_io.py index b2b9014..3ff07bc 100644 --- a/h11/tests/test_io.py +++ b/h11/tests/test_io.py @@ -388,6 +388,19 @@ def test_reject_garbage_in_header_line(): b"Host: foo\x00bar\r\n\r\n", None) +# https://github.com/python-hyper/h11/issues/57 +def test_allow_some_garbage_in_cookies(): + tr(READERS[CLIENT, IDLE], + b"HEAD /foo HTTP/1.1\r\n" + b"Host: foo\r\n" + b"Set-Cookie: ___utmvafIumyLc=kUd\x01UpAt; path=/; Max-Age=900\r\n" + b"\r\n", + Request(method="HEAD", target="/foo", + headers=[ + ("Host", "foo"), + ("Set-Cookie", "___utmvafIumyLc=kUd\x01UpAt; path=/; Max-Age=900"), + ])) + def test_host_comes_first(): tw(write_headers, normalize_and_validate([("foo", "bar"), ("Host", "example.com")]),