Skip to content

Add PyUnicodeWriter API #95

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Changelog
=========

* 2024-07-18: Add functions:

* ``PyUnicodeWriter_Create()``
* ``PyUnicodeWriter_Discard()``
* ``PyUnicodeWriter_Finish()``
* ``PyUnicodeWriter_WriteChar()``
* ``PyUnicodeWriter_WriteUTF8()``
* ``PyUnicodeWriter_WriteStr()``
* ``PyUnicodeWriter_WriteRepr()``
* ``PyUnicodeWriter_WriteSubstring()``
* ``PyUnicodeWriter_WriteWideChar()``
* ``PyUnicodeWriter_Format()``

* 2024-06-03: Add ``PyLong_GetSign()``.
* 2024-04-23: Drop Python 3.5 support. It cannot be tested anymore (pip fails).
* 2024-04-02: Add ``PyDict_SetDefaultRef()`` function.
Expand Down
153 changes: 153 additions & 0 deletions pythoncapi_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,159 @@ PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value,
}
#endif

#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
typedef struct PyUnicodeWriter PyUnicodeWriter;

static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
{
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
PyMem_Free(writer);
}

static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
{
if (length < 0) {
PyErr_SetString(PyExc_ValueError,
"length must be positive");
return NULL;
}

const size_t size = sizeof(_PyUnicodeWriter);
PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
if (pub_writer == _Py_NULL) {
PyErr_NoMemory();
return _Py_NULL;
}
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;

_PyUnicodeWriter_Init(writer);
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
PyUnicodeWriter_Discard(pub_writer);
return NULL;
}
writer->overallocate = 1;
return pub_writer;
}

static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
{
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
PyMem_Free(writer);
return str;
}

static inline int
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
{
if (ch > 0x10ffff) {
PyErr_SetString(PyExc_ValueError,
"character must be in range(0x110000)");
return -1;
}

return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
}

int
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
{
PyObject *str = PyObject_Str(obj);
if (str == NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
Py_DECREF(str);
return res;
}

int
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
{
PyObject *str = PyObject_Repr(obj);
if (str == NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
Py_DECREF(str);
return res;
}

static inline int
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
const char *str, Py_ssize_t size)
{
if (size < 0) {
size = (Py_ssize_t)strlen(str);
}

PyObject *str_obj = PyUnicode_FromStringAndSize(str, size);
if (str_obj == _Py_NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
Py_DECREF(str_obj);
return res;
}

static inline int
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
const wchar_t *str, Py_ssize_t size)
{
if (size < 0) {
size = (Py_ssize_t)wcslen(str);
}

PyObject *str_obj = PyUnicode_FromWideChar(str, size);
if (str_obj == _Py_NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
Py_DECREF(str_obj);
return res;
}

static inline int
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t start, Py_ssize_t end)
{
if (!PyUnicode_Check(str)) {
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
return -1;
}
if (start < 0 || start > end) {
PyErr_Format(PyExc_ValueError, "invalid start argument");
return -1;
}
if (end > PyUnicode_GET_LENGTH(str)) {
PyErr_Format(PyExc_ValueError, "invalid end argument");
return -1;
}

return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
start, end);
}

static inline int
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
{
va_list vargs;
va_start(vargs, format);
PyObject *str = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (str == _Py_NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
Py_DECREF(str);
return res;
}
#endif // PY_VERSION_HEX < 0x030E0000

// gh-116560 added PyLong_GetSign() to Python 3.14.0a0
#if PY_VERSION_HEX < 0x030E00A0
Expand Down
146 changes: 146 additions & 0 deletions tests/test_pythoncapi_compat_cext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1733,6 +1733,147 @@ test_get_constant(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
}


#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
#define TEST_UNICODEWRITER 1

static PyObject *
test_unicodewriter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}
int ret;

// test PyUnicodeWriter_WriteStr()
PyObject *str = PyUnicode_FromString("var");
if (str == NULL) {
goto error;
}
ret = PyUnicodeWriter_WriteStr(writer, str);
Py_CLEAR(str);
if (ret < 0) {
goto error;
}

// test PyUnicodeWriter_WriteChar()
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
goto error;
}

// test PyUnicodeWriter_WriteSubstring()
str = PyUnicode_FromString("[long]");
if (str == NULL) {
goto error;
}
ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
Py_CLEAR(str);
if (ret < 0) {
goto error;
}

// test PyUnicodeWriter_WriteUTF8()
if (PyUnicodeWriter_WriteUTF8(writer, " valu\xC3\xA9", -1) < 0) {
goto error;
}
if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) {
goto error;
}

// test PyUnicodeWriter_WriteRepr()
str = PyUnicode_FromString("repr");
if (str == NULL) {
goto error;
}
if (PyUnicodeWriter_WriteRepr(writer, str) < 0) {
goto error;
}
Py_CLEAR(str);

{
PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "var=long valu\xC3\xA9 'repr'"));
Py_DECREF(result);
}

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}


static PyObject *
test_unicodewriter_widechar(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}

// test PyUnicodeWriter_WriteWideChar()
int ret = PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1);
if (ret < 0) {
goto error;
}

{
PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "euro=\xe2\x82\xac"));
Py_DECREF(result);
}

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}


static PyObject *
test_unicodewriter_format(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}

// test PyUnicodeWriter_Format()
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
goto error;
}

// test PyUnicodeWriter_WriteChar()
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
goto error;
}

{
PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
Py_DECREF(result);
}

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}
#endif


static struct PyMethodDef methods[] = {
{"test_object", test_object, METH_NOARGS, _Py_NULL},
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
Expand Down Expand Up @@ -1771,6 +1912,11 @@ static struct PyMethodDef methods[] = {
{"test_time", test_time, METH_NOARGS, _Py_NULL},
#endif
{"test_get_constant", test_get_constant, METH_NOARGS, _Py_NULL},
#ifdef TEST_UNICODEWRITER
{"test_unicodewriter", test_unicodewriter, METH_NOARGS, _Py_NULL},
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS, _Py_NULL},
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS, _Py_NULL},
#endif
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
};

Expand Down