From 91e2cb993f4fe2bcfe2513e2a8daecde156d331b Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Wed, 1 Dec 2021 11:07:18 -0400 Subject: [PATCH 1/7] Expand string_view support to str, bytes, memoryview 1. Allows constructing a str or bytes implicitly from a string_view; this is essentially a small shortcut allowing a caller to write `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`. 2. Allows implicit conversion *to* string_view from py::bytes -- this saves a fair bit more as currently there is no simple way to get such a view of the bytes without copying it (or resorting to Python API calls). (This is not done for `str` because when the str contains unicode we have to allocate to a temporary and so there might not be some string data we can properly view without owning.) 3. Allows `memoryview::from_memory` to accept a string_view. As with the other from_memory calls, it's entirely your responsibility to keep it alive. This also required moving the string_view availability detection into detail/common.h because this PR needs it in pytypes.h, which is higher up the include chain than cast.h where it was being detected currently. --- include/pybind11/cast.h | 17 --------------- include/pybind11/detail/common.h | 18 ++++++++++++++++ include/pybind11/pytypes.h | 37 ++++++++++++++++++++++++++++++++ tests/test_builtin_casters.cpp | 13 +++++++++++ tests/test_builtin_casters.py | 8 +++++++ 5 files changed, 76 insertions(+), 17 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 097d41bdaf..01dc5df73f 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -27,23 +27,6 @@ #include #include -#if defined(PYBIND11_CPP17) -# if defined(__has_include) -# if __has_include() -# define PYBIND11_HAS_STRING_VIEW -# endif -# elif defined(_MSC_VER) -# define PYBIND11_HAS_STRING_VIEW -# endif -#endif -#ifdef PYBIND11_HAS_STRING_VIEW -#include -#endif - -#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L -# define PYBIND11_HAS_U8STRING -#endif - PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 862451fd13..73821c742f 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -183,6 +183,21 @@ # define PYBIND11_HAS_VARIANT 1 #endif +#if defined(PYBIND11_CPP17) +# if defined(__has_include) +# if __has_include() +# define PYBIND11_HAS_STRING_VIEW +# endif +# elif defined(_MSC_VER) +# define PYBIND11_HAS_STRING_VIEW +# endif +#endif + +#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L +# define PYBIND11_HAS_U8STRING +#endif + + #include #include #include @@ -229,6 +244,9 @@ # include # endif #endif +#ifdef PYBIND11_HAS_STRING_VIEW +# include +#endif // #define PYBIND11_STR_LEGACY_PERMISSIVE // If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index a08fd8ceb6..c395bdf27d 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1085,6 +1085,18 @@ class str : public object { // NOLINTNEXTLINE(google-explicit-constructor) str(const std::string &s) : str(s.data(), s.size()) { } +#ifdef PYBIND11_HAS_STRING_VIEW + // NOLINTNEXTLINE(google-explicit-constructor) + str(std::string_view s) : str(s.data(), s.size()) { } + +# ifdef PYBIND11_HAS_U8STRING + // reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char) + // NOLINTNEXTLINE(google-explicit-constructor) + str(std::u8string_view s) : str(reinterpret_cast(s.data()), s.size()) { } +# endif + +#endif + explicit str(const bytes &b); /** \rst @@ -1167,6 +1179,24 @@ class bytes : public object { pybind11_fail("Unable to extract bytes contents!"); return std::string(buffer, (size_t) length); } + +#ifdef PYBIND11_HAS_STRING_VIEW + // NOLINTNEXTLINE(google-explicit-constructor) + bytes(std::string_view s) : bytes(s.data(), s.size()) { } + + // Obtain a string view that views the current `bytes` buffer value. Note that this is only + // valid so long as the `bytes` instance remains alive and so generally should not outlive the + // lifetime of the `bytes` instance. + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::string_view() const { + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + return {buffer, static_cast(length)}; + } +#endif + }; // Note: breathe >= 4.17.0 will fail to build docs if the below two constructors // are included in the doxygen group; close here and reopen after as a workaround @@ -1714,6 +1744,13 @@ class memoryview : public object { static memoryview from_memory(const void *mem, ssize_t size) { return memoryview::from_memory(const_cast(mem), size, true); } + +#ifdef PYBIND11_HAS_STRING_VIEW + static memoryview from_memory(std::string_view mem) { + return from_memory(const_cast(mem.data()), static_cast(mem.size()), true); + } +#endif + #endif }; diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index 71c778e8e2..ee24131e24 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -140,10 +140,23 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); }); m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); }); + // The inner lambdas here are to also test implicit conversion + using namespace std::literals; + m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); }); + m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); }); + m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); }); +#if PY_MAJOR_VERSION >= 3 + m.def("string_view_memoryview", []() { + static constexpr auto val = "Have some \360\237\216\202"sv; + return py::memoryview::from_memory(val); + }); +#endif + # ifdef PYBIND11_HAS_U8STRING m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); }); m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; }); m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); }); + m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; }); # endif #endif diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py index 9c5e17a68e..6d266b4213 100644 --- a/tests/test_builtin_casters.py +++ b/tests/test_builtin_casters.py @@ -206,6 +206,14 @@ def test_string_view(capture): """ ) + assert m.string_view_bytes() == b"abc \x80\x80 def" + assert m.string_view_str() == u"abc ‽ def" + assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def" + if hasattr(m, "has_u8string"): + assert m.string_view8_str() == u"abc ‽ def" + if not env.PY2: + assert m.string_view_memoryview() == "Have some 🎂".encode() + def test_integer_casting(): """Issue #929 - out-of-range integer values shouldn't be accepted""" From f56dc86f1545812b4557db3b48d426dc95722095 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Thu, 2 Dec 2021 13:33:45 -0400 Subject: [PATCH 2/7] Move string_view include to pytypes.h --- include/pybind11/detail/common.h | 3 --- include/pybind11/pytypes.h | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 73821c742f..eb5fb08682 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -244,9 +244,6 @@ # include # endif #endif -#ifdef PYBIND11_HAS_STRING_VIEW -# include -#endif // #define PYBIND11_STR_LEGACY_PERMISSIVE // If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index c395bdf27d..98c4d4621a 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -18,6 +18,10 @@ # include #endif +#ifdef PYBIND11_HAS_STRING_VIEW +# include +#endif + PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) /* A few forward declarations */ From add6628bfd74f23a7a01f790da2722548fb3873c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 2 Dec 2021 17:16:32 -0800 Subject: [PATCH 3/7] CI-testing a fix for the "ambiguous conversion" issue. This change is known to fix the `tensorflow::tstring` issue reported under https://github.com/pybind/pybind11/pull/3521#issuecomment-985100965 TODO: Minimal reproducer for the `tensorflow::tstring` issue. --- include/pybind11/pytypes.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 98c4d4621a..f1ae82b039 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1185,8 +1185,10 @@ class bytes : public object { } #ifdef PYBIND11_HAS_STRING_VIEW + // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521). // NOLINTNEXTLINE(google-explicit-constructor) - bytes(std::string_view s) : bytes(s.data(), s.size()) { } + template ::value, int> = 0> + bytes(T s) : bytes(s.data(), s.size()) { } // Obtain a string view that views the current `bytes` buffer value. Note that this is only // valid so long as the `bytes` instance remains alive and so generally should not outlive the From 7f7ddaa7b242d4c023acb439e66612be7aa20d03 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 2 Dec 2021 17:28:41 -0800 Subject: [PATCH 4/7] Make clang-tidy happy (hopefully). --- include/pybind11/pytypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index f1ae82b039..78b8f4853c 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1186,8 +1186,8 @@ class bytes : public object { #ifdef PYBIND11_HAS_STRING_VIEW // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521). - // NOLINTNEXTLINE(google-explicit-constructor) template ::value, int> = 0> + // NOLINTNEXTLINE(google-explicit-constructor) bytes(T s) : bytes(s.data(), s.size()) { } // Obtain a string view that views the current `bytes` buffer value. Note that this is only From 16e178bae3988606e07967a5cbe8c92c3ed4beb9 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 2 Dec 2021 22:28:24 -0800 Subject: [PATCH 5/7] Adding minimal reproducer for the `tensorflow::tstring` issue. Error without the enable_if trick: ``` /usr/local/google/home/rwgk/forked/pybind11/tests/test_builtin_casters.cpp:169:16: error: ambiguous conversion for functional-style cast from 'TypeWithBothOperatorStringAndStringView' to 'py::bytes' return py::bytes(TypeWithBothOperatorStringAndStringView()); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1174:5: note: candidate constructor bytes(const std::string &s) : bytes(s.data(), s.size()) { } ^ /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1191:5: note: candidate constructor bytes(std::string_view s) : bytes(s.data(), s.size()) { } ^ ``` --- tests/test_builtin_casters.cpp | 7 +++++++ tests/test_builtin_casters.py | 2 ++ 2 files changed, 9 insertions(+) diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index ee24131e24..90741856b1 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -158,6 +158,13 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); }); m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; }); # endif + + struct TypeWithBothOperatorStringAndStringView { + operator std::string() const { return "success"; } + operator std::string_view() const { return "failure"; } + }; + m.def("bytes_from_type_with_both_operator_string_and_string_view", + []() { return py::bytes(TypeWithBothOperatorStringAndStringView()); }); #endif // test_integer_casting diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py index 6d266b4213..844e10d0aa 100644 --- a/tests/test_builtin_casters.py +++ b/tests/test_builtin_casters.py @@ -214,6 +214,8 @@ def test_string_view(capture): if not env.PY2: assert m.string_view_memoryview() == "Have some 🎂".encode() + assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success" + def test_integer_casting(): """Issue #929 - out-of-range integer values shouldn't be accepted""" From 34281953950e46d2ca5ae207ae172ead5f30749e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 2 Dec 2021 22:59:31 -0800 Subject: [PATCH 6/7] Adding missing NOLINTNEXTLINE --- tests/test_builtin_casters.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index 90741856b1..f1ba0b8336 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -160,7 +160,9 @@ TEST_SUBMODULE(builtin_casters, m) { # endif struct TypeWithBothOperatorStringAndStringView { + // NOLINTNEXTLINE(google-explicit-constructor) operator std::string() const { return "success"; } + // NOLINTNEXTLINE(google-explicit-constructor) operator std::string_view() const { return "failure"; } }; m.def("bytes_from_type_with_both_operator_string_and_string_view", From 5709ccfae019a4b32e7b910f8f52f56352145560 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 3 Dec 2021 11:22:22 -0400 Subject: [PATCH 7/7] Also apply ambiguous conversion workaround to str() --- include/pybind11/pytypes.h | 4 +++- tests/test_builtin_casters.cpp | 2 ++ tests/test_builtin_casters.py | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 78b8f4853c..f803a05ca7 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1090,8 +1090,10 @@ class str : public object { str(const std::string &s) : str(s.data(), s.size()) { } #ifdef PYBIND11_HAS_STRING_VIEW + // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521). + template ::value, int> = 0> // NOLINTNEXTLINE(google-explicit-constructor) - str(std::string_view s) : str(s.data(), s.size()) { } + str(T s) : str(s.data(), s.size()) { } # ifdef PYBIND11_HAS_U8STRING // reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char) diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index f1ba0b8336..fe629e7c34 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -167,6 +167,8 @@ TEST_SUBMODULE(builtin_casters, m) { }; m.def("bytes_from_type_with_both_operator_string_and_string_view", []() { return py::bytes(TypeWithBothOperatorStringAndStringView()); }); + m.def("str_from_type_with_both_operator_string_and_string_view", + []() { return py::str(TypeWithBothOperatorStringAndStringView()); }); #endif // test_integer_casting diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py index 844e10d0aa..b1f1e395a7 100644 --- a/tests/test_builtin_casters.py +++ b/tests/test_builtin_casters.py @@ -215,6 +215,7 @@ def test_string_view(capture): assert m.string_view_memoryview() == "Have some 🎂".encode() assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success" + assert m.str_from_type_with_both_operator_string_and_string_view() == "success" def test_integer_casting():