Skip to content

Commit b7c33de

Browse files
authored
PYTHON-3046 Document support for backslashreplace and surrogateescape (#836)
1 parent d6fc05a commit b7c33de

File tree

3 files changed

+27
-50
lines changed

3 files changed

+27
-50
lines changed

bson/codec_options.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ class CodecOptions(_options_base):
233233
- `unicode_decode_error_handler`: The error handler to apply when
234234
a Unicode-related error occurs during BSON decoding that would
235235
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
236-
'strict', 'replace', and 'ignore'. Defaults to 'strict'.
236+
'strict', 'replace', 'backslashreplace', 'surrogateescape', and
237+
'ignore'. Defaults to 'strict'.
237238
- `tzinfo`: A :class:`~datetime.tzinfo` subclass that specifies the
238239
timezone to/from which :class:`~datetime.datetime` objects should be
239240
encoded/decoded.

pymongo/mongo_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ def __init__(
330330
- `unicode_decode_error_handler`: The error handler to apply when
331331
a Unicode-related error occurs during BSON decoding that would
332332
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
333-
'strict', 'replace', and 'ignore'. Defaults to 'strict'.
333+
'strict', 'replace', 'backslashreplace', 'surrogateescape', and
334+
'ignore'. Defaults to 'strict'.
334335
- `srvServiceName`: (string) The SRV service name to use for
335336
"mongodb+srv://" URIs. Defaults to "mongodb". Use it like so::
336337

test/test_bson.py

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -994,57 +994,32 @@ def test_decode_all_defaults(self):
994994
def test_unicode_decode_error_handler(self):
995995
enc = encode({"keystr": "foobar"})
996996

997-
# Test handling of bad key value.
997+
# Test handling of bad key value, bad string value, and both.
998998
invalid_key = enc[:7] + b'\xe9' + enc[8:]
999-
replaced_key = b'ke\xe9str'.decode('utf-8', 'replace')
1000-
ignored_key = b'ke\xe9str'.decode('utf-8', 'ignore')
1001-
1002-
dec = decode(invalid_key,
1003-
CodecOptions(unicode_decode_error_handler="replace"))
1004-
self.assertEqual(dec, {replaced_key: "foobar"})
1005-
1006-
dec = decode(invalid_key,
1007-
CodecOptions(unicode_decode_error_handler="ignore"))
1008-
self.assertEqual(dec, {ignored_key: "foobar"})
1009-
1010-
self.assertRaises(InvalidBSON, decode, invalid_key, CodecOptions(
1011-
unicode_decode_error_handler="strict"))
1012-
self.assertRaises(InvalidBSON, decode, invalid_key, CodecOptions())
1013-
self.assertRaises(InvalidBSON, decode, invalid_key)
1014-
1015-
# Test handing of bad string value.
1016-
invalid_val = BSON(enc[:18] + b'\xe9' + enc[19:])
1017-
replaced_val = b'fo\xe9bar'.decode('utf-8', 'replace')
1018-
ignored_val = b'fo\xe9bar'.decode('utf-8', 'ignore')
1019-
1020-
dec = decode(invalid_val,
1021-
CodecOptions(unicode_decode_error_handler="replace"))
1022-
self.assertEqual(dec, {"keystr": replaced_val})
1023-
1024-
dec = decode(invalid_val,
1025-
CodecOptions(unicode_decode_error_handler="ignore"))
1026-
self.assertEqual(dec, {"keystr": ignored_val})
1027-
1028-
self.assertRaises(InvalidBSON, decode, invalid_val, CodecOptions(
1029-
unicode_decode_error_handler="strict"))
1030-
self.assertRaises(InvalidBSON, decode, invalid_val, CodecOptions())
1031-
self.assertRaises(InvalidBSON, decode, invalid_val)
1032-
1033-
# Test handing bad key + bad value.
999+
invalid_val = enc[:18] + b'\xe9' + enc[19:]
10341000
invalid_both = enc[:7] + b'\xe9' + enc[8:18] + b'\xe9' + enc[19:]
10351001

1036-
dec = decode(invalid_both,
1037-
CodecOptions(unicode_decode_error_handler="replace"))
1038-
self.assertEqual(dec, {replaced_key: replaced_val})
1039-
1040-
dec = decode(invalid_both,
1041-
CodecOptions(unicode_decode_error_handler="ignore"))
1042-
self.assertEqual(dec, {ignored_key: ignored_val})
1043-
1044-
self.assertRaises(InvalidBSON, decode, invalid_both, CodecOptions(
1045-
unicode_decode_error_handler="strict"))
1046-
self.assertRaises(InvalidBSON, decode, invalid_both, CodecOptions())
1047-
self.assertRaises(InvalidBSON, decode, invalid_both)
1002+
# Ensure that strict mode raises an error.
1003+
for invalid in [invalid_key, invalid_val, invalid_both]:
1004+
self.assertRaises(InvalidBSON, decode, invalid, CodecOptions(
1005+
unicode_decode_error_handler="strict"))
1006+
self.assertRaises(InvalidBSON, decode, invalid, CodecOptions())
1007+
self.assertRaises(InvalidBSON, decode, invalid)
1008+
1009+
# Test all other error handlers.
1010+
for handler in ['replace', 'backslashreplace', 'surrogateescape',
1011+
'ignore']:
1012+
expected_key = b'ke\xe9str'.decode('utf-8', handler)
1013+
expected_val = b'fo\xe9bar'.decode('utf-8', handler)
1014+
doc = decode(invalid_key,
1015+
CodecOptions(unicode_decode_error_handler=handler))
1016+
self.assertEqual(doc, {expected_key: "foobar"})
1017+
doc = decode(invalid_val,
1018+
CodecOptions(unicode_decode_error_handler=handler))
1019+
self.assertEqual(doc, {"keystr": expected_val})
1020+
doc = decode(invalid_both,
1021+
CodecOptions(unicode_decode_error_handler=handler))
1022+
self.assertEqual(doc, {expected_key: expected_val})
10481023

10491024
# Test handling bad error mode.
10501025
dec = decode(enc,

0 commit comments

Comments
 (0)