Skip to content

Commit 2039372

Browse files
committed
Reducing from two to only one macro: PYBIND11_STR_LEGACY_PERMISSIVE
1 parent a64fd32 commit 2039372

File tree

9 files changed

+85
-61
lines changed

9 files changed

+85
-61
lines changed

docs/changelog.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ Changelog
66
Starting with version 1.8.0, pybind11 releases use a `semantic versioning
77
<http://semver.org>`_ policy.
88

9+
v2.7.0 (WIP)
10+
------------------------------
11+
12+
* ``py::str`` changed to exclusively hold `PyUnicodeObject`. Previously
13+
``py::str`` could also hold `bytes`, which is probably surprising, was
14+
never documented, and can mask bugs (e.g. accidental use of ``py::str``
15+
instead of ``py::bytes``).
16+
`#2409 <https://github.com/pybind/pybind11/pull/2409>`_
17+
918

1019
v2.6.2 (TBA, not yet released)
1120
------------------------------

docs/upgrade.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,31 @@ modernization and other useful information.
1010

1111
.. _upgrade-guide-2.6:
1212

13+
v2.7
14+
====
15+
16+
*Before* v2.7, ``py::str`` can hold ``PyUnicodeObject`` or ``PyBytesObject``,
17+
and ``py::isinstance<str>()`` is ``true`` for both ``py::str`` and
18+
``py::bytes``. Starting with v2.7, ``py::str`` exclusively holds
19+
``PyUnicodeObject`` (`#2409 <https://github.com/pybind/pybind11/pull/2409>`_),
20+
and ``py::isinstance<str>()`` is ``true`` only for ``py::str``. To help in
21+
the transition of client code, the ``PYBIND11_STR_LEGACY_PERMISSIVE`` macro
22+
is provided as an escape hatch to go back to the legacy behavior. This macro
23+
will be removed in future releases. Two types of required client-code fixes
24+
are expected to be common:
25+
26+
* Accidental use of ``py::str`` instead of ``py::bytes``, masked by the legacy
27+
behavior. These are probably very easy to fix, by changing from
28+
``py::str`` to ``py::bytes``.
29+
30+
* Reliance on py::isinstance<str>(obj) being ``true`` for
31+
``py::bytes``. This is likely to be easy to fix in most cases by adding
32+
``|| py::isinstance<bytes>(obj)``, but a fix may be more involved, e.g. if
33+
``py::isinstance<T>`` appears in a template. Such situations will require
34+
careful review and custom fixes.
35+
36+
37+
1338
v2.6
1439
====
1540

include/pybind11/cast.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1628,7 +1628,10 @@ struct pyobject_caster {
16281628

16291629
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16301630
bool load(handle src, bool /* convert */) {
1631-
#if defined(PYBIND11_STR_NON_PERMISSIVE) && !defined(PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE)
1631+
#if PY_MAJOR_VERSION < 3 && !defined(PYBIND11_STR_LEGACY_PERMISSIVE)
1632+
// For Python 2, without this implicit conversion, Python code would
1633+
// need to be cluttered with six.ensure_text() or similar, only to be
1634+
// un-cluttered later after Python 2 support is dropped.
16321635
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
16331636
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
16341637
if (!str_from_bytes) throw error_already_set();

include/pybind11/detail/common.h

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -163,19 +163,18 @@
163163
#include <typeindex>
164164
#include <type_traits>
165165

166-
#define PYBIND11_STR_NON_PERMISSIVE
167-
// If UNDEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
168-
// (probably surprising, but this is the legacy behavior). As a side-effect,
169-
// pybind11::isinstance<str>() is true for both pybind11::str and pybind11::bytes.
170-
// If DEFINED, pybind11::str can only hold PyUnicodeObject, and
171-
// pybind11::isinstance<str>() is true only for pybind11::str.
172-
173-
#if PY_MAJOR_VERSION >= 3
174-
#define PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
175-
#endif
176-
// This macro has an effect only if PYBIND11_STR_NON_PERMISSIVE is defined.
177-
// If UNDEFINED, the pybind11::str caster will implicitly decode bytes to PyUnicodeObject.
178-
// If DEFINED, the pybind11::str caster will only accept PyUnicodeObject.
166+
// #define PYBIND11_STR_LEGACY_PERMISSIVE
167+
// If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
168+
// (probably surprising and never documented, but this was the
169+
// legacy behavior until and including v2.6.x). As a side-effect,
170+
// pybind11::isinstance<str>() is true for both pybind11::str and
171+
// pybind11::bytes.
172+
// If UNDEFINED, pybind11::str can only hold PyUnicodeObject, and
173+
// pybind11::isinstance<str>() is true only for pybind11::str.
174+
// However, for Python 2 only (!), the pybind11::str caster
175+
// implicitly decodes bytes to PyUnicodeObject. This is to ease
176+
// the transition from the legacy behavior to the non-permissive
177+
// behavior.
179178

180179
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
181180
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)

include/pybind11/pytypes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -754,11 +754,11 @@ inline bool PyIterable_Check(PyObject *obj) {
754754
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
755755
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
756756

757-
#ifdef PYBIND11_STR_NON_PERMISSIVE
758-
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
759-
#else
757+
#ifdef PYBIND11_STR_LEGACY_PERMISSIVE
760758
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
761759
#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
760+
#else
761+
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
762762
#endif
763763

764764
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }

tests/test_eval.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ def test_evals(capture):
2222
@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
2323
def test_eval_file():
2424
filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
25-
if env.PY2:
26-
filename = filename.decode("utf-8")
2725
assert m.test_eval_file(filename)
2826

2927
assert m.test_eval_file_failure()

tests/test_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def hook(unraisable_hook_args):
7979
# Use monkeypatch so pytest can apply and remove the patch as appropriate
8080
monkeypatch.setattr(sys, "unraisablehook", hook)
8181

82-
assert m.python_alreadyset_in_destructor(u"already_set demo") is True
82+
assert m.python_alreadyset_in_destructor("already_set demo") is True
8383
if hooked:
8484
assert triggered[0] is True
8585

tests/test_pytypes.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,8 @@ TEST_SUBMODULE(pytypes, m) {
414414
// test_builtin_functions
415415
m.def("get_len", [](py::handle h) { return py::len(h); });
416416

417-
#ifdef PYBIND11_STR_NON_PERMISSIVE
418-
m.attr("has_str_non_permissive") = true;
419-
#endif
420-
#ifdef PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
421-
m.attr("has_str_caster_no_implicit_decode") = true;
417+
#ifdef PYBIND11_STR_LEGACY_PERMISSIVE
418+
m.attr("PYBIND11_STR_LEGACY_PERMISSIVE") = true;
422419
#endif
423420

424421
m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });

tests/test_pytypes.py

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -120,23 +120,17 @@ def __repr__(self):
120120
assert s1 == s2
121121

122122
malformed_utf8 = b"\x80"
123+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
124+
assert m.str_from_object(malformed_utf8) is malformed_utf8
125+
elif env.PY2:
126+
with pytest.raises(UnicodeDecodeError):
127+
m.str_from_object(malformed_utf8)
128+
else:
129+
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
123130
if env.PY2:
124-
if hasattr(m, "has_str_non_permissive"):
125-
with pytest.raises(UnicodeDecodeError):
126-
m.str_from_object(malformed_utf8)
127-
else:
128-
m.str_from_object(
129-
malformed_utf8
130-
) is malformed_utf8 # To be fixed; see #2380
131131
with pytest.raises(UnicodeDecodeError):
132132
m.str_from_handle(malformed_utf8)
133133
else:
134-
if hasattr(m, "has_str_non_permissive"):
135-
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
136-
else:
137-
assert (
138-
m.str_from_object(malformed_utf8) is malformed_utf8
139-
) # To be fixed; see #2380
140134
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
141135

142136

@@ -312,26 +306,26 @@ def test_pybind11_str_raw_str():
312306
valid_orig = u"DZ"
313307
valid_utf8 = valid_orig.encode("utf-8")
314308
valid_cvt = cvt(valid_utf8)
315-
if hasattr(m, "has_str_non_permissive"):
309+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
310+
assert valid_cvt is valid_utf8
311+
else:
316312
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
317313
if env.PY2:
318314
assert valid_cvt == valid_orig
319315
else:
320-
assert valid_cvt == u"b'\\xc7\\xb1'"
321-
else:
322-
assert valid_cvt is valid_utf8
316+
assert valid_cvt == "b'\\xc7\\xb1'"
323317

324318
malformed_utf8 = b"\x80"
325-
if hasattr(m, "has_str_non_permissive"):
319+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
320+
assert cvt(malformed_utf8) is malformed_utf8
321+
else:
326322
if env.PY2:
327323
with pytest.raises(UnicodeDecodeError):
328324
cvt(malformed_utf8)
329325
else:
330326
malformed_cvt = cvt(malformed_utf8)
331-
assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821
332-
assert malformed_cvt == u"b'\\x80'"
333-
else:
334-
assert cvt(malformed_utf8) is malformed_utf8
327+
assert type(malformed_cvt) is str
328+
assert malformed_cvt == "b'\\x80'"
335329

336330

337331
def test_implicit_casting():
@@ -517,34 +511,33 @@ def test_isinstance_string_types():
517511
assert not m.isinstance_pybind11_bytes(u"")
518512

519513
assert m.isinstance_pybind11_str(u"")
520-
if hasattr(m, "has_str_non_permissive"):
521-
assert not m.isinstance_pybind11_str(b"")
522-
else:
514+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
523515
assert m.isinstance_pybind11_str(b"")
516+
else:
517+
assert not m.isinstance_pybind11_str(b"")
524518

525519

526520
def test_pass_bytes_or_unicode_to_string_types():
527521
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
528522
with pytest.raises(TypeError):
529523
m.pass_to_pybind11_bytes(u"Str")
530524

531-
if hasattr(m, "has_str_caster_no_implicit_decode"):
525+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE") or env.PY2:
526+
assert m.pass_to_pybind11_str(b"Bytes") == 5
527+
else:
532528
with pytest.raises(TypeError):
533529
m.pass_to_pybind11_str(b"Bytes")
534-
else:
535-
assert m.pass_to_pybind11_str(b"Bytes") == 5
536530
assert m.pass_to_pybind11_str(u"Str") == 3
537531

538532
assert m.pass_to_std_string(b"Bytes") == 5
539533
assert m.pass_to_std_string(u"Str") == 3
540534

541535
malformed_utf8 = b"\x80"
542-
if hasattr(m, "has_str_non_permissive"):
543-
if hasattr(m, "has_str_caster_no_implicit_decode"):
544-
with pytest.raises(TypeError):
545-
m.pass_to_pybind11_str(malformed_utf8)
546-
else:
547-
with pytest.raises(UnicodeDecodeError):
548-
m.pass_to_pybind11_str(malformed_utf8)
549-
else:
536+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
550537
assert m.pass_to_pybind11_str(malformed_utf8) == 1
538+
elif env.PY2:
539+
with pytest.raises(UnicodeDecodeError):
540+
m.pass_to_pybind11_str(malformed_utf8)
541+
else:
542+
with pytest.raises(TypeError):
543+
m.pass_to_pybind11_str(malformed_utf8)

0 commit comments

Comments
 (0)