Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/codecs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,10 @@ In addition, the following error handler is specific to the given codecs:
The ``'backslashreplace'`` error handler now works with decoding and
translating.

.. versionchanged:: 3.14
All standard error handlers except ``'surrogateescape'`` now support
translating.

The set of allowed values can be extended by registering a new named error
handler:

Expand Down
44 changes: 26 additions & 18 deletions Lib/test/test_codeccallbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,17 +510,12 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
codecs.xmlcharrefreplace_errors,
UnicodeError("ouch")
)
# "xmlcharrefreplace" can only be used for encoding
# "xmlcharrefreplace" can not be used for decoding
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
# Use the correct exception
cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 99999, 100000,
999999, 1000000)
Expand All @@ -533,6 +528,13 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
),
("".join("&#%d;" % c for c in cs), 1 + len(s))
)
self.assertEqual(
codecs.xmlcharrefreplace_errors(
UnicodeTranslateError("a" + s + "b",
1, 1 + len(s), "ouch")
),
("".join("&#%d;" % c for c in cs), 1 + len(s))
)

def test_badandgoodbackslashreplaceexceptions(self):
# "backslashreplace" complains about a non-exception passed in
Expand Down Expand Up @@ -605,17 +607,12 @@ def test_badandgoodnamereplaceexceptions(self):
codecs.namereplace_errors,
UnicodeError("ouch")
)
# "namereplace" can only be used for encoding
# "namereplace" can not be used for decoding
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
# Use the correct exception
tests = [
("\u3042", "\\N{HIRAGANA LETTER A}"),
Expand All @@ -637,6 +634,12 @@ def test_badandgoodnamereplaceexceptions(self):
1, 1 + len(s), "ouch")),
(r, 1 + len(s))
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeTranslateError("a" + s + "b",
1, 1 + len(s), "ouch")),
(r, 1 + len(s))
)

def test_badandgoodsurrogateescapeexceptions(self):
surrogateescape_errors = codecs.lookup_error('surrogateescape')
Expand Down Expand Up @@ -696,12 +699,6 @@ def test_badandgoodsurrogatepassexceptions(self):
surrogatepass_errors,
UnicodeError("ouch")
)
# "surrogatepass" can not be used for translating
self.assertRaises(
TypeError,
surrogatepass_errors,
UnicodeTranslateError("\ud800", 0, 1, "ouch")
)
# Use the correct exception
for enc in ("utf-8", "utf-16le", "utf-16be", "utf-32le", "utf-32be"):
with self.subTest(encoding=enc):
Expand All @@ -715,13 +712,24 @@ def test_badandgoodsurrogatepassexceptions(self):
surrogatepass_errors,
UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch")
)
self.assertRaises(
UnicodeTranslateError,
surrogatepass_errors,
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
for s in ("\ud800", "\udfff", "\ud800\udfff"):
with self.subTest(str=s):
self.assertRaises(
UnicodeEncodeError,
surrogatepass_errors,
UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
)
self.assertEqual(
surrogatepass_errors(
UnicodeTranslateError("a" + s + "b",
1, 1 + len(s), "ouch")),
(s, 1 + len(s))
)
tests = [
("utf-8", "\ud800", b'\xed\xa0\x80', 3),
("utf-16le", "\ud800", b'\x00\xd8', 2),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
All standard error handlers except ``'surrogateescape'`` now support
translating.
Loading