Skip to content

Commit 255f9ad

Browse files
committed
Meta PR for Google Patches
1 parent d3c999c commit 255f9ad

File tree

8 files changed

+109
-12
lines changed

8 files changed

+109
-12
lines changed

include/pybind11/cast.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,6 +1632,14 @@ struct pyobject_caster {
16321632

16331633
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16341634
bool load(handle src, bool /* convert */) {
1635+
#if defined(PYBIND11_STR_NON_PERMISSIVE) && !defined(PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE)
1636+
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
1637+
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
1638+
if (!str_from_bytes) throw error_already_set();
1639+
value = reinterpret_steal<type>(str_from_bytes);
1640+
return true;
1641+
}
1642+
#endif
16351643
if (!isinstance<type>(src))
16361644
return false;
16371645
value = reinterpret_borrow<type>(src);

include/pybind11/detail/common.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@
161161
#include <typeindex>
162162
#include <type_traits>
163163

164+
#define PYBIND11_STR_NON_PERMISSIVE
165+
// If UNDEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
166+
// (probably surprising, but this is the legacy behavior). As a side-effect,
167+
// pybind11::isinstance<str>() is true for both pybind11::str and pybind11::bytes.
168+
// If DEFINED, pybind11::str can only hold PyUnicodeObject, and
169+
// pybind11::isinstance<str>() is true only for pybind11::str.
170+
171+
//#define PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
172+
// This macro has an effect only if PYBIND11_STR_NON_PERMISSIVE is defined.
173+
// If UNDEFINED, the pybind11::str caster will implicitly decode bytes to PyUnicodeObject.
174+
// If DEFINED, the pybind11::str caster will only accept PyUnicodeObject.
175+
164176
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
165177
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)
166178
#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check

include/pybind11/pytypes.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,12 @@ inline bool PyIterable_Check(PyObject *obj) {
754754
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
755755
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
756756

757+
#ifdef PYBIND11_STR_NON_PERMISSIVE
758+
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
759+
#else
757760
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
761+
#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
762+
#endif
758763

759764
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
760765

@@ -927,7 +932,7 @@ class bytes;
927932

928933
class str : public object {
929934
public:
930-
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
935+
PYBIND11_OBJECT_CVT(str, object, PYBIND11_STR_CHECK_FUN, raw_str)
931936

932937
str(const char *c, size_t n)
933938
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {

include/pybind11/stl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
144144
using value_conv = make_caster<Value>;
145145

146146
bool load(handle src, bool convert) {
147-
if (!isinstance<sequence>(src) || isinstance<str>(src))
147+
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
148148
return false;
149149
auto s = reinterpret_borrow<sequence>(src);
150150
value.clear();

tests/test_eval.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def test_evals(capture):
2222
@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
2323
def test_eval_file():
2424
filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
25+
if env.PY2:
26+
filename = filename.decode('utf-8')
2527
assert m.test_eval_file(filename)
2628

2729
assert m.test_eval_file_failure()

tests/test_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def hook(unraisable_hook_args):
6868
# Use monkeypatch so pytest can apply and remove the patch as appropriate
6969
monkeypatch.setattr(sys, 'unraisablehook', hook)
7070

71-
assert m.python_alreadyset_in_destructor('already_set demo') is True
71+
assert m.python_alreadyset_in_destructor(u'already_set demo') is True
7272
if hooked:
7373
assert triggered[0] is True
7474

tests/test_pytypes.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,4 +383,18 @@ TEST_SUBMODULE(pytypes, m) {
383383
buf, static_cast<ssize_t>(strlen(buf)));
384384
});
385385
#endif
386+
387+
#ifdef PYBIND11_STR_NON_PERMISSIVE
388+
m.attr("has_str_non_permissive") = true;
389+
#endif
390+
#ifdef PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
391+
m.attr("has_str_caster_no_implicit_decode") = true;
392+
#endif
393+
394+
m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
395+
m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance<py::str>(o); });
396+
397+
m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
398+
m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); });
399+
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
386400
}

tests/test_pytypes.py

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,19 @@ def __repr__(self):
111111
assert s1 == s2
112112

113113
malformed_utf8 = b"\x80"
114-
assert m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
115114
if env.PY2:
116-
# with pytest.raises(UnicodeDecodeError):
117-
# m.str_from_object(malformed_utf8)
115+
if hasattr(m, "has_str_non_permissive"):
116+
with pytest.raises(UnicodeDecodeError):
117+
m.str_from_object(malformed_utf8)
118+
else:
119+
m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
118120
with pytest.raises(UnicodeDecodeError):
119121
m.str_from_handle(malformed_utf8)
120122
else:
121-
# assert m.str_from_object(malformed_utf8) == "b'\\x80'"
123+
if hasattr(m, "has_str_non_permissive"):
124+
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
125+
else:
126+
assert m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
122127
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
123128

124129

@@ -268,13 +273,26 @@ def test_pybind11_str_raw_str():
268273
valid_orig = u"DZ"
269274
valid_utf8 = valid_orig.encode("utf-8")
270275
valid_cvt = cvt(valid_utf8)
271-
assert type(valid_cvt) == bytes # Probably surprising.
272-
assert valid_cvt == b'\xc7\xb1'
276+
if hasattr(m, "has_str_non_permissive"):
277+
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
278+
if env.PY2:
279+
assert valid_cvt == valid_orig
280+
else:
281+
assert valid_cvt == u"b'\\xc7\\xb1'"
282+
else:
283+
assert valid_cvt is valid_utf8
273284

274285
malformed_utf8 = b'\x80'
275-
malformed_cvt = cvt(malformed_utf8)
276-
assert type(malformed_cvt) == bytes # Probably surprising.
277-
assert malformed_cvt == b'\x80'
286+
if hasattr(m, "has_str_non_permissive"):
287+
if env.PY2:
288+
with pytest.raises(UnicodeDecodeError):
289+
cvt(malformed_utf8)
290+
else:
291+
malformed_cvt = cvt(malformed_utf8)
292+
assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821
293+
assert malformed_cvt == u"b'\\x80'"
294+
else:
295+
assert cvt(malformed_utf8) is malformed_utf8
278296

279297

280298
def test_implicit_casting():
@@ -410,3 +428,41 @@ def test_memoryview_from_memory():
410428
assert isinstance(view, memoryview)
411429
assert view.format == 'B'
412430
assert bytes(view) == b'\xff\xe1\xab\x37'
431+
432+
433+
def test_isinstance_string_types():
434+
assert m.isinstance_pybind11_bytes(b"")
435+
assert not m.isinstance_pybind11_bytes(u"")
436+
437+
assert m.isinstance_pybind11_str(u"")
438+
if hasattr(m, "has_str_non_permissive"):
439+
assert not m.isinstance_pybind11_str(b"")
440+
else:
441+
assert m.isinstance_pybind11_str(b"")
442+
443+
444+
def test_pass_bytes_or_unicode_to_string_types():
445+
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
446+
with pytest.raises(TypeError):
447+
m.pass_to_pybind11_bytes(u"Str")
448+
449+
if hasattr(m, "has_str_caster_no_implicit_decode"):
450+
with pytest.raises(TypeError):
451+
m.pass_to_pybind11_str(b"Bytes")
452+
else:
453+
assert m.pass_to_pybind11_str(b"Bytes") == 5
454+
assert m.pass_to_pybind11_str(u"Str") == 3
455+
456+
assert m.pass_to_std_string(b"Bytes") == 5
457+
assert m.pass_to_std_string(u"Str") == 3
458+
459+
malformed_utf8 = b"\x80"
460+
if hasattr(m, "has_str_non_permissive"):
461+
if hasattr(m, "has_str_caster_no_implicit_decode"):
462+
with pytest.raises(TypeError):
463+
m.pass_to_pybind11_str(malformed_utf8)
464+
else:
465+
with pytest.raises(UnicodeDecodeError):
466+
m.pass_to_pybind11_str(malformed_utf8)
467+
else:
468+
assert m.pass_to_pybind11_str(malformed_utf8) == 1

0 commit comments

Comments
 (0)