Skip to content

Commit 74b501c

Browse files
benfredjagerman
authored andcommitted
Fix passing in utf8 encoded strings with python 2
Passing utf8 encoded strings from python to a C++ function taking a std::string was broken. The previous version was trying to call 'PyUnicode_FromObject' on this data, which failed to convert the string to unicode with the default ascii codec. Also this incurs an unnecessary conversion to unicode for data this is immediately converted back to utf8. Fix by treating python 2 strings the same python 3 bytes objects, and just copying over the data if possible.
1 parent 0365d49 commit 74b501c

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

include/pybind11/cast.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -734,9 +734,14 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
734734
#if PY_MAJOR_VERSION >= 3
735735
return load_bytes(load_src);
736736
#else
737+
if (sizeof(CharT) == 1) {
738+
return load_bytes(load_src);
739+
}
740+
737741
// The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
738742
if (!PYBIND11_BYTES_CHECK(load_src.ptr()))
739743
return false;
744+
740745
temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
741746
if (!temp) { PyErr_Clear(); return false; }
742747
load_src = temp;
@@ -780,9 +785,8 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
780785
#endif
781786
}
782787

783-
#if PY_MAJOR_VERSION >= 3
784-
// In Python 3, when loading into a std::string or char*, accept a bytes object as-is (i.e.
785-
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op. Python 2,
788+
// When loading into a std::string or char*, accept a bytes object as-is (i.e.
789+
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
786790
// which supports loading a unicode from a str, doesn't take this path.
787791
template <typename C = CharT>
788792
bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) {
@@ -798,9 +802,9 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
798802

799803
return false;
800804
}
805+
801806
template <typename C = CharT>
802807
bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; }
803-
#endif
804808
};
805809

806810
// Type caster for C-style strings. We basically use a std::string type caster, but also add the

tests/test_python_types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,9 @@ def test_bytes_to_string():
554554
assert string_length(byte("a\x00b")) == 3
555555
assert strlen(byte("a\x00b")) == 1 # C-string limitation
556556

557+
# passing in a utf8 encoded string should work
558+
assert string_length(u'💩'.encode("utf8")) == 4
559+
557560

558561
def test_builtins_cast_return_none():
559562
"""Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""

0 commit comments

Comments
 (0)