diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a59a5e21358e7b..86d0dde1705763 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1020,6 +1020,7 @@ def test_errors(self): (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), (b'a+IKw-b\xff', 'a\u20acb\ufffd'), (b'a+IKw\xffb', 'a\u20ac\ufffdb'), + (b'a+@b', 'a\ufffdb'), ] for raw, expected in tests: with self.subTest(raw=raw): diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 3cc018c0cc2caa..fb7bb2d523fe6e 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1630,6 +1630,10 @@ def test_codecs_utf7(self): for c in set_o: self.assertEqual(c.encode('ascii').decode('utf7'), c) + with self.assertRaisesRegex(UnicodeDecodeError, + 'ill-formed sequence'): + b'+@'.decode('utf-7') + def test_codecs_utf8(self): self.assertEqual(''.encode('utf-8'), b'') self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst new file mode 100644 index 00000000000000..5b113e3204c104 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst @@ -0,0 +1,3 @@ +The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed +sequences starting with "+" (as specified in RFC 2152). Patch by Zackery +Spytz. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 04fd6d03b464e8..0460d184932ee0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0) goto onError; } + else if (s < e && !IS_BASE64(*s)) { + s++; + errmsg = "ill-formed sequence"; + goto utf7Error; + } else { /* begin base64-encoded section */ inShift = 1; surrogate = 0;