Skip to content

Commit 283302f

Browse files
committed
sync with hg 940c5b63
1 parent cf0a645 commit 283302f

File tree

8 files changed

+139
-15
lines changed

8 files changed

+139
-15
lines changed

include/pybind11/cast.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,14 @@ struct pyobject_caster {
16311631

16321632
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16331633
bool load(handle src, bool /* convert */) {
1634+
#if defined(PYBIND11_STR_NON_PERMISSIVE) && !defined(PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE)
1635+
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
1636+
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
1637+
if (!str_from_bytes) throw error_already_set();
1638+
value = reinterpret_steal<type>(str_from_bytes);
1639+
return true;
1640+
}
1641+
#endif
16341642
if (!isinstance<type>(src))
16351643
return false;
16361644
value = reinterpret_borrow<type>(src);

include/pybind11/detail/common.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,18 @@
160160
#include <typeindex>
161161
#include <type_traits>
162162

163+
#define PYBIND11_STR_NON_PERMISSIVE
164+
// If UNDEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
165+
// (probably surprising, but this is the legacy behavior). As a side-effect,
166+
// pybind11::isinstance<str>() is true for both pybind11::str and pybind11::bytes.
167+
// If DEFINED, pybind11::str can only hold PyUnicodeObject, and
168+
// pybind11::isinstance<str>() is true only for pybind11::str.
169+
170+
//#define PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
171+
// This macro has an effect only if PYBIND11_STR_NON_PERMISSIVE is defined.
172+
// If UNDEFINED, the pybind11::str caster will implicitly decode bytes to PyUnicodeObject.
173+
// If DEFINED, the pybind11::str caster will only accept PyUnicodeObject.
174+
163175
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
164176
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)
165177
#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check

include/pybind11/pytypes.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,12 @@ inline bool PyIterable_Check(PyObject *obj) {
752752
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
753753
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
754754

755+
#ifdef PYBIND11_STR_NON_PERMISSIVE
756+
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
757+
#else
755758
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
759+
#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
760+
#endif
756761

757762
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
758763

@@ -796,7 +801,9 @@ PYBIND11_NAMESPACE_END(detail)
796801
Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \
797802
PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead") \
798803
bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \
799-
static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }
804+
static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \
805+
template <typename Policy_> \
806+
Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
800807

801808
#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
802809
PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
@@ -806,9 +813,7 @@ PYBIND11_NAMESPACE_END(detail)
806813
{ if (!m_ptr) throw error_already_set(); } \
807814
Name(object &&o) \
808815
: Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
809-
{ if (!m_ptr) throw error_already_set(); } \
810-
template <typename Policy_> \
811-
Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
816+
{ if (!m_ptr) throw error_already_set(); }
812817

813818
#define PYBIND11_OBJECT(Name, Parent, CheckFun) \
814819
PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
@@ -899,7 +904,7 @@ class bytes;
899904

900905
class str : public object {
901906
public:
902-
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
907+
PYBIND11_OBJECT_CVT(str, object, PYBIND11_STR_CHECK_FUN, raw_str)
903908

904909
str(const char *c, size_t n)
905910
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
@@ -920,7 +925,7 @@ class str : public object {
920925
Return a string representation of the object. This is analogous to
921926
the ``str()`` function in Python.
922927
\endrst */
923-
explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { }
928+
explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); }
924929

925930
operator std::string() const {
926931
object temp = *this;
@@ -945,8 +950,8 @@ class str : public object {
945950
/// Return string representation -- always returns a new reference, even if already a str
946951
static PyObject *raw_str(PyObject *op) {
947952
PyObject *str_value = PyObject_Str(op);
948-
if (!str_value) throw error_already_set();
949953
#if PY_MAJOR_VERSION < 3
954+
if (!str_value) throw error_already_set();
950955
PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
951956
Py_XDECREF(str_value); str_value = unicode;
952957
#endif

include/pybind11/stl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
144144
using value_conv = make_caster<Value>;
145145

146146
bool load(handle src, bool convert) {
147-
if (!isinstance<sequence>(src) || isinstance<str>(src))
147+
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
148148
return false;
149149
auto s = reinterpret_borrow<sequence>(src);
150150
value.clear();

tests/test_eval.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def test_evals(capture):
2222
@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
2323
def test_eval_file():
2424
filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
25+
if env.PY2:
26+
filename = filename.decode('utf-8')
2527
assert m.test_eval_file(filename)
2628

2729
assert m.test_eval_file_failure()

tests/test_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def hook(unraisable_hook_args):
6868
# Use monkeypatch so pytest can apply and remove the patch as appropriate
6969
monkeypatch.setattr(sys, 'unraisablehook', hook)
7070

71-
assert m.python_alreadyset_in_destructor('already_set demo') is True
71+
assert m.python_alreadyset_in_destructor(u'already_set demo') is True
7272
if hooked:
7373
assert triggered[0] is True
7474

tests/test_pytypes.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ TEST_SUBMODULE(pytypes, m) {
8080
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
8181
m.def("str_from_object", [](const py::object& obj) { return py::str(obj); });
8282
m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); });
83+
m.def("str_from_handle", [](py::handle h) { return py::str(h); });
8384

8485
m.def("str_format", []() {
8586
auto s1 = "{} + {} = {}"_s.format(1, 2, 3);
@@ -197,6 +198,7 @@ TEST_SUBMODULE(pytypes, m) {
197198
// test_constructors
198199
m.def("default_constructors", []() {
199200
return py::dict(
201+
"bytes"_a=py::bytes(),
200202
"str"_a=py::str(),
201203
"bool"_a=py::bool_(),
202204
"int"_a=py::int_(),
@@ -210,6 +212,7 @@ TEST_SUBMODULE(pytypes, m) {
210212

211213
m.def("converting_constructors", [](py::dict d) {
212214
return py::dict(
215+
"bytes"_a=py::bytes(d["bytes"]),
213216
"str"_a=py::str(d["str"]),
214217
"bool"_a=py::bool_(d["bool"]),
215218
"int"_a=py::int_(d["int"]),
@@ -225,6 +228,7 @@ TEST_SUBMODULE(pytypes, m) {
225228
m.def("cast_functions", [](py::dict d) {
226229
// When converting between Python types, obj.cast<T>() should be the same as T(obj)
227230
return py::dict(
231+
"bytes"_a=d["bytes"].cast<py::bytes>(),
228232
"str"_a=d["str"].cast<py::str>(),
229233
"bool"_a=d["bool"].cast<py::bool_>(),
230234
"int"_a=d["int"].cast<py::int_>(),
@@ -369,4 +373,18 @@ TEST_SUBMODULE(pytypes, m) {
369373
buf, static_cast<ssize_t>(strlen(buf)));
370374
});
371375
#endif
376+
377+
#ifdef PYBIND11_STR_NON_PERMISSIVE
378+
m.attr("has_str_non_permissive") = true;
379+
#endif
380+
#ifdef PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
381+
m.attr("has_str_caster_no_implicit_decode") = true;
382+
#endif
383+
384+
m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
385+
m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance<py::str>(o); });
386+
387+
m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
388+
m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); });
389+
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
372390
}

tests/test_pytypes.py

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,28 @@ def __repr__(self):
104104

105105
assert m.str_from_object(A()) == "this is a str"
106106
assert m.repr_from_object(A()) == "this is a repr"
107+
assert m.str_from_handle(A()) == "this is a str"
107108

108109
s1, s2 = m.str_format()
109110
assert s1 == "1 + 2 = 3"
110111
assert s1 == s2
111112

113+
malformed_utf8 = b"\x80"
114+
if env.PY2:
115+
if hasattr(m, "has_str_non_permissive"):
116+
with pytest.raises(UnicodeDecodeError):
117+
m.str_from_object(malformed_utf8)
118+
else:
119+
m.str_from_object(malformed_utf8) is malformed_utf8
120+
with pytest.raises(UnicodeDecodeError):
121+
m.str_from_handle(malformed_utf8)
122+
else:
123+
if hasattr(m, "has_str_non_permissive"):
124+
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
125+
else:
126+
assert m.str_from_object(malformed_utf8) is malformed_utf8
127+
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
128+
112129

113130
def test_bytes(doc):
114131
assert m.bytes_from_string().decode() == "foo"
@@ -190,11 +207,17 @@ def func(self, x, *args):
190207

191208
def test_constructors():
192209
"""C++ default and converting constructors are equivalent to type calls in Python"""
193-
types = [str, bool, int, float, tuple, list, dict, set]
210+
types = [bytes, str, bool, int, float, tuple, list, dict, set]
194211
expected = {t.__name__: t() for t in types}
212+
if env.PY2:
213+
# Note that bytes.__name__ == 'str' in Python 2.
214+
# pybind11::str is unicode even under Python 2.
215+
expected["bytes"] = bytes()
216+
expected["str"] = u"" # flake8 complains about unicode().
195217
assert m.default_constructors() == expected
196218

197219
data = {
220+
bytes: b'41', # Currently no supported or working conversions.
198221
str: 42,
199222
bool: "Not empty",
200223
int: "42",
@@ -207,6 +230,11 @@ def test_constructors():
207230
}
208231
inputs = {k.__name__: v for k, v in data.items()}
209232
expected = {k.__name__: k(v) for k, v in data.items()}
233+
if env.PY2: # Similar to the above. See comments above.
234+
inputs["bytes"] = b'41'
235+
inputs["str"] = 42
236+
expected["bytes"] = b'41'
237+
expected["str"] = u"42"
210238

211239
assert m.converting_constructors(inputs) == expected
212240
assert m.cast_functions(inputs) == expected
@@ -245,13 +273,26 @@ def test_pybind11_str_raw_str():
245273
valid_orig = u"DZ"
246274
valid_utf8 = valid_orig.encode("utf-8")
247275
valid_cvt = cvt(valid_utf8)
248-
assert type(valid_cvt) == bytes # Probably surprising.
249-
assert valid_cvt == b'\xc7\xb1'
276+
if hasattr(m, "has_str_non_permissive"):
277+
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
278+
if env.PY2:
279+
assert valid_cvt == valid_orig
280+
else:
281+
assert valid_cvt == u"b'\\xc7\\xb1'"
282+
else:
283+
assert valid_cvt is valid_utf8
250284

251285
malformed_utf8 = b'\x80'
252-
malformed_cvt = cvt(malformed_utf8)
253-
assert type(malformed_cvt) == bytes # Probably surprising.
254-
assert malformed_cvt == b'\x80'
286+
if hasattr(m, "has_str_non_permissive"):
287+
if env.PY2:
288+
with pytest.raises(UnicodeDecodeError):
289+
cvt(malformed_utf8)
290+
else:
291+
malformed_cvt = cvt(malformed_utf8)
292+
assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821
293+
assert malformed_cvt == u"b'\\x80'"
294+
else:
295+
assert cvt(malformed_utf8) is malformed_utf8
255296

256297

257298
def test_implicit_casting():
@@ -379,3 +420,41 @@ def test_memoryview_from_memory():
379420
assert isinstance(view, memoryview)
380421
assert view.format == 'B'
381422
assert bytes(view) == b'\xff\xe1\xab\x37'
423+
424+
425+
def test_isinstance_string_types():
426+
assert m.isinstance_pybind11_bytes(b"")
427+
assert not m.isinstance_pybind11_bytes(u"")
428+
429+
assert m.isinstance_pybind11_str(u"")
430+
if hasattr(m, "has_str_non_permissive"):
431+
assert not m.isinstance_pybind11_str(b"")
432+
else:
433+
assert m.isinstance_pybind11_str(b"")
434+
435+
436+
def test_pass_bytes_or_unicode_to_string_types():
437+
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
438+
with pytest.raises(TypeError):
439+
m.pass_to_pybind11_bytes(u"Str")
440+
441+
if hasattr(m, "has_str_caster_no_implicit_decode"):
442+
with pytest.raises(TypeError):
443+
m.pass_to_pybind11_str(b"Bytes")
444+
else:
445+
assert m.pass_to_pybind11_str(b"Bytes") == 5
446+
assert m.pass_to_pybind11_str(u"Str") == 3
447+
448+
assert m.pass_to_std_string(b"Bytes") == 5
449+
assert m.pass_to_std_string(u"Str") == 3
450+
451+
malformed_utf8 = b"\x80"
452+
if hasattr(m, "has_str_non_permissive"):
453+
if hasattr(m, "has_str_caster_no_implicit_decode"):
454+
with pytest.raises(TypeError):
455+
m.pass_to_pybind11_str(malformed_utf8)
456+
else:
457+
with pytest.raises(UnicodeDecodeError):
458+
m.pass_to_pybind11_str(malformed_utf8)
459+
else:
460+
assert m.pass_to_pybind11_str(malformed_utf8) == 1

0 commit comments

Comments
 (0)