Skip to content

Commit e349bf2

Browse files
ZackerySpytzserhiy-storchaka
authored andcommitted
bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741)
The UTF-7 decoder now raises UnicodeDecodeError for ill-formed sequences starting with "+" (as specified in RFC 2152).
1 parent d3d3171 commit e349bf2

File tree

4 files changed

+13
-0
lines changed

4 files changed

+13
-0
lines changed

Lib/test/test_codecs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ def test_errors(self):
10201020
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
10211021
(b'a+IKw-b\xff', 'a\u20acb\ufffd'),
10221022
(b'a+IKw\xffb', 'a\u20ac\ufffdb'),
1023+
(b'a+@b', 'a\ufffdb'),
10231024
]
10241025
for raw, expected in tests:
10251026
with self.subTest(raw=raw):

Lib/test/test_unicode.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,6 +1630,10 @@ def test_codecs_utf7(self):
16301630
for c in set_o:
16311631
self.assertEqual(c.encode('ascii').decode('utf7'), c)
16321632

1633+
with self.assertRaisesRegex(UnicodeDecodeError,
1634+
'ill-formed sequence'):
1635+
b'+@'.decode('utf-7')
1636+
16331637
def test_codecs_utf8(self):
16341638
self.assertEqual(''.encode('utf-8'), b'')
16351639
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed
2+
sequences starting with "+" (as specified in RFC 2152). Patch by Zackery
3+
Spytz.

Objects/unicodeobject.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
44794479
if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
44804480
goto onError;
44814481
}
4482+
else if (s < e && !IS_BASE64(*s)) {
4483+
s++;
4484+
errmsg = "ill-formed sequence";
4485+
goto utf7Error;
4486+
}
44824487
else { /* begin base64-encoded section */
44834488
inShift = 1;
44844489
surrogate = 0;

0 commit comments

Comments
 (0)