Skip to content

Commit 203492a

Browse files
committed
Reducing from two to only one macro: PYBIND11_STR_LEGACY_PERMISSIVE
1 parent b1ae78f commit 203492a

File tree

9 files changed

+86
-61
lines changed

9 files changed

+86
-61
lines changed

docs/changelog.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@ Changelog
66
Starting with version 1.8.0, pybind11 releases use a `semantic versioning
77
<http://semver.org>`_ policy.
88

9+
v2.7.0 (WIP)
10+
------------------------------
11+
12+
* ``py::str`` changed to exclusively hold `PyUnicodeObject`. Previously
13+
``py::str`` could also hold `bytes`, which is probably surprising, was
14+
never documented, and can mask bugs (e.g. accidental use of ``py::str``
15+
instead of ``py::bytes``).
16+
`#2409 <https://github.com/pybind/pybind11/pull/2409>`_
17+
18+
919
v2.6.2 (TBA, not yet released)
1020
------------------------------
1121

docs/upgrade.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,31 @@ modernization and other useful information.
1010

1111
.. _upgrade-guide-2.6:
1212

13+
v2.7
14+
====
15+
16+
*Before* v2.7, ``py::str`` can hold ``PyUnicodeObject`` or ``PyBytesObject``,
17+
and ``py::isinstance<str>()`` is ``true`` for both ``py::str`` and
18+
``py::bytes``. Starting with v2.7, ``py::str`` exclusively holds
19+
``PyUnicodeObject`` (`#2409 <https://github.com/pybind/pybind11/pull/2409>`_),
20+
and ``py::isinstance<str>()`` is ``true`` only for ``py::str``. To help in
21+
the transition of client code, the ``PYBIND11_STR_LEGACY_PERMISSIVE`` macro
22+
is provided as an escape hatch to go back to the legacy behavior. This macro
23+
will be removed in future releases. Two types of required client-code fixes
24+
are expected to be common:
25+
26+
* Accidental use of ``py::str`` instead of ``py::bytes``, masked by the legacy
27+
behavior. These are probably very easy to fix, by changing from
28+
``py::str`` to ``py::bytes``.
29+
30+
* Reliance on py::isinstance<str>(obj) being ``true`` for
31+
``py::bytes``. This is likely to be easy to fix in most cases by adding
32+
``|| py::isinstance<bytes>(obj)``, but a fix may be more involved, e.g. if
33+
``py::isinstance<T>`` appears in a template. Such situations will require
34+
careful review and custom fixes.
35+
36+
37+
1338
v2.6
1439
====
1540

include/pybind11/cast.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1623,7 +1623,10 @@ struct pyobject_caster {
16231623

16241624
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
16251625
bool load(handle src, bool /* convert */) {
1626-
#if defined(PYBIND11_STR_NON_PERMISSIVE) && !defined(PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE)
1626+
#if PY_MAJOR_VERSION < 3 && !defined(PYBIND11_STR_LEGACY_PERMISSIVE)
1627+
// For Python 2, without this implicit conversion, Python code would
1628+
// need to be cluttered with six.ensure_text() or similar, only to be
1629+
// un-cluttered later after Python 2 support is dropped.
16271630
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
16281631
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
16291632
if (!str_from_bytes) throw error_already_set();

include/pybind11/detail/common.h

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -161,19 +161,18 @@
161161
#include <typeindex>
162162
#include <type_traits>
163163

164-
#define PYBIND11_STR_NON_PERMISSIVE
165-
// If UNDEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
166-
// (probably surprising, but this is the legacy behavior). As a side-effect,
167-
// pybind11::isinstance<str>() is true for both pybind11::str and pybind11::bytes.
168-
// If DEFINED, pybind11::str can only hold PyUnicodeObject, and
169-
// pybind11::isinstance<str>() is true only for pybind11::str.
170-
171-
#if PY_MAJOR_VERSION >= 3
172-
#define PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
173-
#endif
174-
// This macro has an effect only if PYBIND11_STR_NON_PERMISSIVE is defined.
175-
// If UNDEFINED, the pybind11::str caster will implicitly decode bytes to PyUnicodeObject.
176-
// If DEFINED, the pybind11::str caster will only accept PyUnicodeObject.
164+
// #define PYBIND11_STR_LEGACY_PERMISSIVE
165+
// If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
166+
// (probably surprising and never documented, but this was the
167+
// legacy behavior until and including v2.6.x). As a side-effect,
168+
// pybind11::isinstance<str>() is true for both pybind11::str and
169+
// pybind11::bytes.
170+
// If UNDEFINED, pybind11::str can only hold PyUnicodeObject, and
171+
// pybind11::isinstance<str>() is true only for pybind11::str.
172+
// However, for Python 2 only (!), the pybind11::str caster
173+
// implicitly decodes bytes to PyUnicodeObject. This is to ease
174+
// the transition from the legacy behavior to the non-permissive
175+
// behavior.
177176

178177
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
179178
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)

include/pybind11/pytypes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -754,11 +754,11 @@ inline bool PyIterable_Check(PyObject *obj) {
754754
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
755755
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
756756

757-
#ifdef PYBIND11_STR_NON_PERMISSIVE
758-
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
759-
#else
757+
#ifdef PYBIND11_STR_LEGACY_PERMISSIVE
760758
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
761759
#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
760+
#else
761+
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
762762
#endif
763763

764764
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }

tests/test_eval.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ def test_evals(capture):
2222
@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
2323
def test_eval_file():
2424
filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
25-
if env.PY2:
26-
filename = filename.decode("utf-8")
2725
assert m.test_eval_file(filename)
2826

2927
assert m.test_eval_file_failure()

tests/test_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def hook(unraisable_hook_args):
6868
# Use monkeypatch so pytest can apply and remove the patch as appropriate
6969
monkeypatch.setattr(sys, "unraisablehook", hook)
7070

71-
assert m.python_alreadyset_in_destructor(u"already_set demo") is True
71+
assert m.python_alreadyset_in_destructor("already_set demo") is True
7272
if hooked:
7373
assert triggered[0] is True
7474

tests/test_pytypes.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -411,11 +411,8 @@ TEST_SUBMODULE(pytypes, m) {
411411
// test_builtin_functions
412412
m.def("get_len", [](py::handle h) { return py::len(h); });
413413

414-
#ifdef PYBIND11_STR_NON_PERMISSIVE
415-
m.attr("has_str_non_permissive") = true;
416-
#endif
417-
#ifdef PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
418-
m.attr("has_str_caster_no_implicit_decode") = true;
414+
#ifdef PYBIND11_STR_LEGACY_PERMISSIVE
415+
m.attr("PYBIND11_STR_LEGACY_PERMISSIVE") = true;
419416
#endif
420417

421418
m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });

tests/test_pytypes.py

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -120,23 +120,17 @@ def __repr__(self):
120120
assert s1 == s2
121121

122122
malformed_utf8 = b"\x80"
123+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
124+
assert m.str_from_object(malformed_utf8) is malformed_utf8
125+
elif env.PY2:
126+
with pytest.raises(UnicodeDecodeError):
127+
m.str_from_object(malformed_utf8)
128+
else:
129+
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
123130
if env.PY2:
124-
if hasattr(m, "has_str_non_permissive"):
125-
with pytest.raises(UnicodeDecodeError):
126-
m.str_from_object(malformed_utf8)
127-
else:
128-
m.str_from_object(
129-
malformed_utf8
130-
) is malformed_utf8 # To be fixed; see #2380
131131
with pytest.raises(UnicodeDecodeError):
132132
m.str_from_handle(malformed_utf8)
133133
else:
134-
if hasattr(m, "has_str_non_permissive"):
135-
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
136-
else:
137-
assert (
138-
m.str_from_object(malformed_utf8) is malformed_utf8
139-
) # To be fixed; see #2380
140134
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
141135

142136

@@ -310,26 +304,26 @@ def test_pybind11_str_raw_str():
310304
valid_orig = u"DZ"
311305
valid_utf8 = valid_orig.encode("utf-8")
312306
valid_cvt = cvt(valid_utf8)
313-
if hasattr(m, "has_str_non_permissive"):
307+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
308+
assert valid_cvt is valid_utf8
309+
else:
314310
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
315311
if env.PY2:
316312
assert valid_cvt == valid_orig
317313
else:
318-
assert valid_cvt == u"b'\\xc7\\xb1'"
319-
else:
320-
assert valid_cvt is valid_utf8
314+
assert valid_cvt == "b'\\xc7\\xb1'"
321315

322316
malformed_utf8 = b"\x80"
323-
if hasattr(m, "has_str_non_permissive"):
317+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
318+
assert cvt(malformed_utf8) is malformed_utf8
319+
else:
324320
if env.PY2:
325321
with pytest.raises(UnicodeDecodeError):
326322
cvt(malformed_utf8)
327323
else:
328324
malformed_cvt = cvt(malformed_utf8)
329-
assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821
330-
assert malformed_cvt == u"b'\\x80'"
331-
else:
332-
assert cvt(malformed_utf8) is malformed_utf8
325+
assert type(malformed_cvt) is str
326+
assert malformed_cvt == "b'\\x80'"
333327

334328

335329
def test_implicit_casting():
@@ -515,34 +509,33 @@ def test_isinstance_string_types():
515509
assert not m.isinstance_pybind11_bytes(u"")
516510

517511
assert m.isinstance_pybind11_str(u"")
518-
if hasattr(m, "has_str_non_permissive"):
519-
assert not m.isinstance_pybind11_str(b"")
520-
else:
512+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
521513
assert m.isinstance_pybind11_str(b"")
514+
else:
515+
assert not m.isinstance_pybind11_str(b"")
522516

523517

524518
def test_pass_bytes_or_unicode_to_string_types():
525519
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
526520
with pytest.raises(TypeError):
527521
m.pass_to_pybind11_bytes(u"Str")
528522

529-
if hasattr(m, "has_str_caster_no_implicit_decode"):
523+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE") or env.PY2:
524+
assert m.pass_to_pybind11_str(b"Bytes") == 5
525+
else:
530526
with pytest.raises(TypeError):
531527
m.pass_to_pybind11_str(b"Bytes")
532-
else:
533-
assert m.pass_to_pybind11_str(b"Bytes") == 5
534528
assert m.pass_to_pybind11_str(u"Str") == 3
535529

536530
assert m.pass_to_std_string(b"Bytes") == 5
537531
assert m.pass_to_std_string(u"Str") == 3
538532

539533
malformed_utf8 = b"\x80"
540-
if hasattr(m, "has_str_non_permissive"):
541-
if hasattr(m, "has_str_caster_no_implicit_decode"):
542-
with pytest.raises(TypeError):
543-
m.pass_to_pybind11_str(malformed_utf8)
544-
else:
545-
with pytest.raises(UnicodeDecodeError):
546-
m.pass_to_pybind11_str(malformed_utf8)
547-
else:
534+
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
548535
assert m.pass_to_pybind11_str(malformed_utf8) == 1
536+
elif env.PY2:
537+
with pytest.raises(UnicodeDecodeError):
538+
m.pass_to_pybind11_str(malformed_utf8)
539+
else:
540+
with pytest.raises(TypeError):
541+
m.pass_to_pybind11_str(malformed_utf8)

0 commit comments

Comments
 (0)