Skip to content

Commit 1ee66de

Browse files
hrnciarencukoubasbloemsaatserhiy-storchakabsiem
committed
00435: pythongh-121650: Encode newlines in headers, and verify
headers are sound (pythonGH-122233) Per RFC 2047: > [...] these encoding schemes allow the > encoding of arbitrary octet values, mail readers that implement this > decoding should also ensure that display of the decoded data on the > recipient's terminal will not cause unwanted side-effects It seems that the "quoted-word" scheme is a valid way to include a newline character in a header value, just like we already allow undecodable bytes or control characters. They do need to be properly quoted when serialized to text, though. This should fail for custom fold() implementations that aren't careful about newlines. (cherry picked from commit 0976339) This patch also contains modified commit cherry picked from c5bba85. This commit was backported to simplify the backport of the other commit fixing CVE. The only modification is a removal of one test case which tests multiple changes in Python 3.7 and it wasn't working properly with Python 3.6 where we backported only one change. Co-authored-by: Petr Viktorin <[email protected]> Co-authored-by: Bas Bloemsaat <[email protected]> Co-authored-by: Serhiy Storchaka <[email protected]> Co-authored-by: bsiem <[email protected]>
1 parent a03f753 commit 1ee66de

11 files changed

+170
-1
lines changed

Doc/library/email.errors.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ The following exception classes are defined in the :mod:`email.errors` module:
5959
:class:`~email.mime.image.MIMEImage`).
6060

6161

62+
.. exception:: HeaderWriteError()
63+
64+
Raised when an error occurs when the :mod:`~email.generator` outputs
65+
headers.
66+
67+
6268
Here is the list of the defects that the :class:`~email.parser.FeedParser`
6369
can find while parsing messages. Note that the defects are added to the message
6470
where the problem was found, so for example, if a message nested inside a

Doc/library/email.policy.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,24 @@ added matters. To illustrate::
229229

230230
.. versionadded:: 3.6
231231

232+
233+
.. attribute:: verify_generated_headers
234+
235+
If ``True`` (the default), the generator will raise
236+
:exc:`~email.errors.HeaderWriteError` instead of writing a header
237+
that is improperly folded or delimited, such that it would
238+
be parsed as multiple headers or joined with adjacent data.
239+
Such headers can be generated by custom header classes or bugs
240+
in the ``email`` module.
241+
242+
As it's a security feature, this defaults to ``True`` even in the
243+
:class:`~email.policy.Compat32` policy.
244+
For backwards compatible, but unsafe, behavior, it must be set to
245+
``False`` explicitly.
246+
247+
.. versionadded:: 3.8.20
248+
249+
232250
The following :class:`Policy` method is intended to be called by code using
233251
the email library to create policy instances with custom settings:
234252

Lib/email/_header_value_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@
9292
ASPECIALS = TSPECIALS | set("*'%")
9393
ATTRIBUTE_ENDS = ASPECIALS | WSP
9494
EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
95+
NLSET = {'\n', '\r'}
96+
SPECIALSNL = SPECIALS | NLSET
9597

9698
def quote_string(value):
9799
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
@@ -2611,6 +2613,13 @@ def _refold_parse_tree(parse_tree, *, policy):
26112613
wrap_as_ew_blocked -= 1
26122614
continue
26132615
tstr = str(part)
2616+
if not want_encoding:
2617+
if part.token_type == 'ptext':
2618+
# Encode if tstr contains special characters.
2619+
want_encoding = not SPECIALSNL.isdisjoint(tstr)
2620+
else:
2621+
# Encode if tstr contains newlines.
2622+
want_encoding = not NLSET.isdisjoint(tstr)
26142623
try:
26152624
tstr.encode(encoding)
26162625
charset = encoding

Lib/email/_policybase.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
157157
message_factory -- the class to use to create new message objects.
158158
If the value is None, the default is Message.
159159
160+
verify_generated_headers
161+
-- if true, the generator verifies that each header
162+
they are properly folded, so that a parser won't
163+
treat it as multiple headers, start-of-body, or
164+
part of another header.
165+
This is a check against custom Header & fold()
166+
implementations.
160167
"""
161168

162169
raise_on_defect = False
@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
165172
max_line_length = 78
166173
mangle_from_ = False
167174
message_factory = None
175+
verify_generated_headers = True
168176

169177
def handle_defect(self, obj, defect):
170178
"""Based on policy, either raise defect or call register_defect.

Lib/email/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ class CharsetError(MessageError):
2929
"""An illegal charset was given."""
3030

3131

32+
class HeaderWriteError(MessageError):
33+
"""Error while writing headers."""
34+
35+
3236
# These are parsing defects which the parser was able to work around.
3337
class MessageDefect(ValueError):
3438
"""Base class for a message defect."""

Lib/email/generator.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414
from copy import deepcopy
1515
from io import StringIO, BytesIO
1616
from email.utils import _has_surrogates
17+
from email.errors import HeaderWriteError
1718

1819
UNDERSCORE = '_'
1920
NL = '\n' # XXX: no longer used by the code below.
2021

2122
NLCRE = re.compile(r'\r\n|\r|\n')
2223
fcre = re.compile(r'^From ', re.MULTILINE)
24+
NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
2325

2426

2527

@@ -219,7 +221,19 @@ def _dispatch(self, msg):
219221

220222
def _write_headers(self, msg):
221223
for h, v in msg.raw_items():
222-
self.write(self.policy.fold(h, v))
224+
folded = self.policy.fold(h, v)
225+
if self.policy.verify_generated_headers:
226+
linesep = self.policy.linesep
227+
if not folded.endswith(self.policy.linesep):
228+
raise HeaderWriteError(
229+
f'folded header does not end with {linesep!r}: {folded!r}')
230+
folded_no_linesep = folded
231+
if folded.endswith(linesep):
232+
folded_no_linesep = folded[:-len(linesep)]
233+
if NEWLINE_WITHOUT_FWSP.search(folded_no_linesep):
234+
raise HeaderWriteError(
235+
f'folded header contains newline: {folded!r}')
236+
self.write(folded)
223237
# A blank line always separates headers from body
224238
self.write(self._NL)
225239

Lib/test/test_email/test_generator.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from email.message import EmailMessage
66
from email.generator import Generator, BytesGenerator
77
from email import policy
8+
import email.errors
89
from test.test_email import TestEmailBase, parameterize
910

1011

@@ -215,6 +216,44 @@ def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self):
215216
g.flatten(msg)
216217
self.assertEqual(s.getvalue(), self.typ(expected))
217218

219+
def test_keep_encoded_newlines(self):
220+
msg = self.msgmaker(self.typ(textwrap.dedent("""\
221+
To: nobody
222+
Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: [email protected]
223+
224+
None
225+
""")))
226+
expected = textwrap.dedent("""\
227+
To: nobody
228+
Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: [email protected]
229+
230+
None
231+
""")
232+
s = self.ioclass()
233+
g = self.genclass(s, policy=self.policy.clone(max_line_length=80))
234+
g.flatten(msg)
235+
self.assertEqual(s.getvalue(), self.typ(expected))
236+
237+
def test_keep_long_encoded_newlines(self):
238+
msg = self.msgmaker(self.typ(textwrap.dedent("""\
239+
To: nobody
240+
Subject: Bad subject =?UTF-8?Q?=0A?=Bcc: [email protected]
241+
242+
None
243+
""")))
244+
expected = textwrap.dedent("""\
245+
To: nobody
246+
Subject: Bad subject \n\
247+
=?utf-8?q?=0A?=Bcc:
248+
249+
250+
None
251+
""")
252+
s = self.ioclass()
253+
g = self.genclass(s, policy=self.policy.clone(max_line_length=30))
254+
g.flatten(msg)
255+
self.assertEqual(s.getvalue(), self.typ(expected))
256+
218257

219258
class TestGenerator(TestGeneratorBase, TestEmailBase):
220259

@@ -223,6 +262,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
223262
ioclass = io.StringIO
224263
typ = str
225264

265+
def test_verify_generated_headers(self):
266+
"""gh-121650: by default the generator prevents header injection"""
267+
class LiteralHeader(str):
268+
name = 'Header'
269+
def fold(self, **kwargs):
270+
return self
271+
272+
for text in (
273+
'Value\r\nBad Injection\r\n',
274+
'NoNewLine'
275+
):
276+
with self.subTest(text=text):
277+
message = message_from_string(
278+
"Header: Value\r\n\r\nBody",
279+
policy=self.policy,
280+
)
281+
282+
del message['Header']
283+
message['Header'] = LiteralHeader(text)
284+
285+
with self.assertRaises(email.errors.HeaderWriteError):
286+
message.as_string()
287+
226288

227289
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
228290

Lib/test/test_email/test_headerregistry.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1546,6 +1546,22 @@ def test_set_message_header_from_group(self):
15461546

15471547
class TestFolding(TestHeaderBase):
15481548

1549+
def test_address_display_names(self):
1550+
"""Test the folding and encoding of address headers."""
1551+
for name, result in (
1552+
('Foo Bar, France', '"Foo Bar, France"'),
1553+
('Foo Bar (France)', '"Foo Bar (France)"'),
1554+
('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='),
1555+
('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='),
1556+
('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='),
1557+
('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='),
1558+
('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'),
1559+
('Foo Bär <France>', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='),
1560+
):
1561+
h = self.make_header('To', Address(name, addr_spec='[email protected]'))
1562+
self.assertEqual(h.fold(policy=policy.default),
1563+
'To: %s <[email protected]>\n' % result)
1564+
15491565
def test_short_unstructured(self):
15501566
h = self.make_header('subject', 'this is a test')
15511567
self.assertEqual(h.fold(policy=policy.default),

Lib/test/test_email/test_policy.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase):
2626
'raise_on_defect': False,
2727
'mangle_from_': True,
2828
'message_factory': None,
29+
'verify_generated_headers': True,
2930
}
3031
# These default values are the ones set on email.policy.default.
3132
# If any of these defaults change, the docs must be updated.
@@ -265,6 +266,31 @@ def test_short_maxlen_error(self):
265266
with self.assertRaises(email.errors.HeaderParseError):
266267
policy.fold("Subject", subject)
267268

269+
def test_verify_generated_headers(self):
270+
"""Turning protection off allows header injection"""
271+
policy = email.policy.default.clone(verify_generated_headers=False)
272+
for text in (
273+
'Header: Value\r\nBad: Injection\r\n',
274+
'Header: NoNewLine'
275+
):
276+
with self.subTest(text=text):
277+
message = email.message_from_string(
278+
"Header: Value\r\n\r\nBody",
279+
policy=policy,
280+
)
281+
class LiteralHeader(str):
282+
name = 'Header'
283+
def fold(self, **kwargs):
284+
return self
285+
286+
del message['Header']
287+
message['Header'] = LiteralHeader(text)
288+
289+
self.assertEqual(
290+
message.as_string(),
291+
f"{text}\nBody",
292+
)
293+
268294
# XXX: Need subclassing tests.
269295
# For adding subclassed objects, make sure the usual rules apply (subclass
270296
# wins), but that the order still works (right overrides left).
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix serialization of display name in originator or destination address fields with both encoded words and special chars.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:mod:`email` headers with embedded newlines are now quoted on output. The
2+
:mod:`~email.generator` will now refuse to serialize (write) headers that
3+
are unsafely folded or delimited; see
4+
:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas
5+
Bloemsaat and Petr Viktorin in :gh:`121650`.)

0 commit comments

Comments
 (0)