From 4e72ec4ab2e554989d4ca186ccde7e9a49ff66f7 Mon Sep 17 00:00:00 2001 From: "Nathaniel J. Smith" Date: Sat, 5 May 2018 17:11:32 -0700 Subject: [PATCH] Allow more characters in header values The RFC says we should reject any header value that contains control characters. But apparently in the real world, you have to both accept and produce these sometimes (e.g. Google Analytics cookies use them). As a compromise, we now accept most control characters, but continue to disallow NUL (\x00) and all whitespace (\t\n\r\f\v and space), except that space and tab are allowed inside header values when surrounded by non-whitespace characters. Closes: gh-57, gh-58 --- docs/source/changes.rst | 5 +++++ h11/_abnf.py | 9 ++++++++- h11/tests/test_events.py | 15 +++++++++++---- h11/tests/test_headers.py | 14 +++++++++++++- h11/tests/test_io.py | 13 +++++++++++++ 5 files changed, 50 insertions(+), 6 deletions(-) diff --git a/docs/source/changes.rst b/docs/source/changes.rst index da8fe0c..1500861 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -10,6 +10,11 @@ Bug fixes: * Always return headers as ``bytes`` objects (`#60 `__) +* Allow a broader range of characters in header values. This violates + the RFC, but is apparently required for compatibility with + real-world code, like Google Analytics cookies (`#57 + `__, `#58 + `__) Other changes: diff --git a/h11/_abnf.py b/h11/_abnf.py index 57e2804..39b0c8b 100644 --- a/h11/_abnf.py +++ b/h11/_abnf.py @@ -44,7 +44,14 @@ # See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 # # So our definition of field_content attempts to fix it up... -vchar_or_obs_text = r"[\x21-\xff]" +# +# Also, we allow lots of control characters, because apparently people assume +# that they're legal in practice (e.g., google analytics makes cookies with +# \x01 in them!): +# https://github.com/python-hyper/h11/issues/57 +# We still don't allow NUL or whitespace, because those are often treated as +# meta-characters and letting them through can lead to nasty issues like SSRF. +vchar_or_obs_text = r"[^\x00\s]" field_vchar = vchar_or_obs_text field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals()) diff --git a/h11/tests/test_events.py b/h11/tests/test_events.py index adfe7bb..d5cd5fd 100644 --- a/h11/tests/test_events.py +++ b/h11/tests/test_events.py @@ -82,10 +82,17 @@ def test_events(): http_version="1.0") # Header values are validated - with pytest.raises(LocalProtocolError): - req = Request(method="GET", target="/", - headers=[("Host", "a"), ("Foo", " asd\x00")], - http_version="1.0") + for bad_char in "\x00\r\n\f\v": + with pytest.raises(LocalProtocolError): + req = Request(method="GET", target="/", + headers=[("Host", "a"), ("Foo", "asd" + bad_char)], + http_version="1.0") + + # But for compatibility we allow non-whitespace control characters, even + # though they're forbidden by the spec. + Request(method="GET", target="/", + headers=[("Host", "a"), ("Foo", "asd\x01\x02\x7f")], + http_version="1.0") ir = InformationalResponse(status_code=100, headers=[("Host", "a")]) assert ir.status_code == 100 diff --git a/h11/tests/test_headers.py b/h11/tests/test_headers.py index 39e442a..497aedd 100644 --- a/h11/tests/test_headers.py +++ b/h11/tests/test_headers.py @@ -18,6 +18,12 @@ def test_normalize_and_validate(): assert "foo bar" in str(excinfo.value) with pytest.raises(LocalProtocolError): normalize_and_validate([(b"foo\x00bar", b"baz")]) + # Not even 8-bit characters: + with pytest.raises(LocalProtocolError): + normalize_and_validate([(b"foo\xffbar", b"baz")]) + # And not even the control characters we allow in values: + with pytest.raises(LocalProtocolError): + normalize_and_validate([(b"foo\x01bar", b"baz")]) # no return or NUL characters in values with pytest.raises(LocalProtocolError) as excinfo: @@ -29,7 +35,13 @@ def test_normalize_and_validate(): normalize_and_validate([("foo", "bar\x00baz")]) # no leading/trailing whitespace with pytest.raises(LocalProtocolError): - normalize_and_validate([("foo", " barbaz ")]) + normalize_and_validate([("foo", "barbaz ")]) + with pytest.raises(LocalProtocolError): + normalize_and_validate([("foo", " barbaz")]) + with pytest.raises(LocalProtocolError): + normalize_and_validate([("foo", "barbaz\t")]) + with pytest.raises(LocalProtocolError): + normalize_and_validate([("foo", "\tbarbaz")]) # content-length assert (normalize_and_validate([("Content-Length", "1")]) diff --git a/h11/tests/test_io.py b/h11/tests/test_io.py index b2b9014..3ff07bc 100644 --- a/h11/tests/test_io.py +++ b/h11/tests/test_io.py @@ -388,6 +388,19 @@ def test_reject_garbage_in_header_line(): b"Host: foo\x00bar\r\n\r\n", None) +# https://github.com/python-hyper/h11/issues/57 +def test_allow_some_garbage_in_cookies(): + tr(READERS[CLIENT, IDLE], + b"HEAD /foo HTTP/1.1\r\n" + b"Host: foo\r\n" + b"Set-Cookie: ___utmvafIumyLc=kUd\x01UpAt; path=/; Max-Age=900\r\n" + b"\r\n", + Request(method="HEAD", target="/foo", + headers=[ + ("Host", "foo"), + ("Set-Cookie", "___utmvafIumyLc=kUd\x01UpAt; path=/; Max-Age=900"), + ])) + def test_host_comes_first(): tw(write_headers, normalize_and_validate([("foo", "bar"), ("Host", "example.com")]),