Skip to content

Commit 84b02f3

Browse files
committed
pythongh-129005: Move bytearray to use bytes as a buffer
1. add `tp_new` to guarantee `ob_bytes_head` is always set, often to the empty bytes singleton. 2. `ob_alloc` information is now redundant, added assertion to validate that, would it make sense to deprecate? 3. There's a lot of `bytearray` code very similar to `bytes` code, more could likely be just proxied to the `bytes` now. Here just focusing on the swap as that enables optimizations. 4. If `bytearray` is passed a single-reference bytes it could potentially take "ownership" of it without copying the bytes, for now not implemented. This enables adding `bytearray._detach()` which I plan to do in a separate PR. ```bash ./python -m test -M8g -uall test_largefile -m test.test_largefile.CLargeFileTest.test_large_read ./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read ``` On my machine (AMD 64 bit Linux, Optimized build): `_io` takes: ~0.791s and uses ~2GB of RAM `_pyio` current: ~1.073s and uses ~4GB of RAM `_pyio` w/ bytearray._detach: ~0.887s and uses ~2GB of RAM Perf checking no major swings in an optimized build: `./python -E -bb -Wd -m test -uall -M32G test_bytes test_capi.test_bytearray -vvv` before: ~1.4s after: ~1.5s Previous discussion: https://discuss.python.org/t/add-zero-copy-conversion-of-bytearray-to-bytes-by-providing-bytes/79164
1 parent 0ef4ffe commit 84b02f3

File tree

4 files changed

+64
-41
lines changed

4 files changed

+64
-41
lines changed

Include/cpython/bytearrayobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ typedef struct {
99
char *ob_bytes; /* Physical backing buffer */
1010
char *ob_start; /* Logical start inside ob_bytes */
1111
Py_ssize_t ob_exports; /* How many buffer exports */
12+
PyObject *ob_bytes_head; /* bytes enabling zero-copy detach. */
1213
} PyByteArrayObject;
1314

1415
PyAPI_DATA(char) _PyByteArray_empty_string[];

Lib/test/test_sys.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1547,7 +1547,7 @@ def test_objecttypes(self):
15471547
samples = [b'', b'u'*100000]
15481548
for sample in samples:
15491549
x = bytearray(sample)
1550-
check(x, vsize('n2Pi') + x.__alloc__())
1550+
check(x, vsize('n2PiP') + x.__alloc__())
15511551
# bytearray_iterator
15521552
check(iter(bytearray()), size('nP'))
15531553
# bytes
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Move :class:`bytearray` to use a buffer directly convertable to :class:`bytes`.

Objects/bytearrayobject.c

Lines changed: 61 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,20 @@ class bytearray "PyByteArrayObject *" "&PyByteArray_Type"
2121
char _PyByteArray_empty_string[] = "";
2222

2323
/* Helpers */
24+
static void
25+
bytearray_set_bytes(PyByteArrayObject *self, PyObject *bytes, Py_ssize_t size)
26+
{
27+
/* Always point to a valid bytes object, often empty singleton. */
28+
assert(bytes);
29+
assert(PyBytes_CheckExact(bytes));
30+
self->ob_bytes_head = bytes;
31+
self->ob_bytes = self->ob_start = PyBytes_AS_STRING(bytes);
32+
33+
Py_ssize_t alloc = PyBytes_GET_SIZE(bytes);
34+
alloc = (alloc == 0 ? alloc : alloc + 1);
35+
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, alloc);
36+
Py_SET_SIZE(self, size);
37+
}
2438

2539
static int
2640
_getbytevalue(PyObject* arg, int *value)
@@ -127,7 +141,6 @@ PyObject *
127141
PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
128142
{
129143
PyByteArrayObject *new;
130-
Py_ssize_t alloc;
131144

132145
if (size < 0) {
133146
PyErr_SetString(PyExc_SystemError,
@@ -141,27 +154,17 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
141154
}
142155

143156
new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
144-
if (new == NULL)
157+
if (new == NULL) {
145158
return NULL;
146-
147-
if (size == 0) {
148-
new->ob_bytes = NULL;
149-
alloc = 0;
150159
}
151-
else {
152-
alloc = size + 1;
153-
new->ob_bytes = PyMem_Malloc(alloc);
154-
if (new->ob_bytes == NULL) {
155-
Py_DECREF(new);
156-
return PyErr_NoMemory();
157-
}
158-
if (bytes != NULL && size > 0)
159-
memcpy(new->ob_bytes, bytes, size);
160-
new->ob_bytes[size] = '\0'; /* Trailing null byte */
160+
161+
PyObject *buffer = PyBytes_FromStringAndSize(bytes, size);
162+
if (buffer == NULL) {
163+
Py_DECREF(new);
164+
return NULL;
161165
}
162-
Py_SET_SIZE(new, size);
163-
new->ob_alloc = alloc;
164-
new->ob_start = new->ob_bytes;
166+
167+
bytearray_set_bytes(new, buffer, size);
165168
new->ob_exports = 0;
166169

167170
return (PyObject *)new;
@@ -189,7 +192,6 @@ static int
189192
bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size)
190193
{
191194
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
192-
void *sval;
193195
PyByteArrayObject *obj = ((PyByteArrayObject *)self);
194196
/* All computations are done unsigned to avoid integer overflows
195197
(see issue #22335). */
@@ -245,28 +247,27 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size)
245247
}
246248

247249
if (logical_offset > 0) {
248-
sval = PyMem_Malloc(alloc);
249-
if (sval == NULL) {
250-
PyErr_NoMemory();
250+
/* Make a new non-offset buffer */
251+
PyObject *new = PyBytes_FromStringAndSize(NULL, alloc - 1);
252+
if (new == NULL) {
251253
return -1;
252254
}
253-
memcpy(sval, PyByteArray_AS_STRING(self),
255+
memcpy(PyBytes_AS_STRING(new), PyByteArray_AS_STRING(self),
254256
Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self)));
255-
PyMem_Free(obj->ob_bytes);
257+
Py_DECREF(obj->ob_bytes_head);
258+
bytearray_set_bytes(obj, new, size);
259+
obj->ob_bytes[size] = '\0'; /* Trailing null byte */
260+
return 0;
256261
}
257-
else {
258-
sval = PyMem_Realloc(obj->ob_bytes, alloc);
259-
if (sval == NULL) {
260-
PyErr_NoMemory();
261-
return -1;
262-
}
262+
263+
/* Too small, grow allocation */
264+
if (_PyBytes_Resize(&obj->ob_bytes_head, alloc - 1) < 0) {
265+
bytearray_set_bytes(obj, PyBytes_FromStringAndSize(NULL, 0), 0);
266+
return -1;
263267
}
264268

265-
obj->ob_bytes = obj->ob_start = sval;
266-
Py_SET_SIZE(self, size);
267-
FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, alloc);
269+
bytearray_set_bytes(obj, obj->ob_bytes_head, size);
268270
obj->ob_bytes[size] = '\0'; /* Trailing null byte */
269-
270271
return 0;
271272
}
272273

@@ -870,6 +871,19 @@ bytearray_ass_subscript(PyObject *op, PyObject *index, PyObject *values)
870871
return ret;
871872
}
872873

874+
static PyObject *
875+
bytearray_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
876+
{
877+
PyObject *obj = subtype->tp_alloc(subtype, 0);
878+
if (obj == NULL) {
879+
return NULL;
880+
}
881+
882+
PyByteArrayObject *self = _PyByteArray_CAST(obj);
883+
bytearray_set_bytes(self, PyBytes_FromStringAndSize(NULL, 0), 0);
884+
return obj;
885+
}
886+
873887
/*[clinic input]
874888
bytearray.__init__
875889
@@ -1233,9 +1247,7 @@ bytearray_dealloc(PyObject *op)
12331247
"deallocated bytearray object has exported buffers");
12341248
PyErr_Print();
12351249
}
1236-
if (self->ob_bytes != 0) {
1237-
PyMem_Free(self->ob_bytes);
1238-
}
1250+
Py_XDECREF(self->ob_bytes_head);
12391251
Py_TYPE(self)->tp_free((PyObject *)self);
12401252
}
12411253

@@ -2452,7 +2464,16 @@ static PyObject *
24522464
bytearray_alloc(PyObject *op, PyObject *Py_UNUSED(ignored))
24532465
{
24542466
PyByteArrayObject *self = _PyByteArray_CAST(op);
2455-
return PyLong_FromSsize_t(FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc));
2467+
2468+
Py_ssize_t size = PyBytes_GET_SIZE(self->ob_bytes_head);
2469+
Py_ssize_t alloc = (size == 0 ? size : size + 1);
2470+
2471+
#ifndef NDEBUG
2472+
/* Validate redundant information is redundant */
2473+
Py_ssize_t ob_alloc = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc);
2474+
assert(alloc == ob_alloc);
2475+
#endif
2476+
return PyLong_FromSsize_t(alloc);
24562477
}
24572478

24582479
/*[clinic input]
@@ -2820,7 +2841,7 @@ PyTypeObject PyByteArray_Type = {
28202841
0, /* tp_dictoffset */
28212842
(initproc)bytearray___init__, /* tp_init */
28222843
PyType_GenericAlloc, /* tp_alloc */
2823-
PyType_GenericNew, /* tp_new */
2844+
bytearray_new, /* tp_new */
28242845
PyObject_Free, /* tp_free */
28252846
.tp_version_tag = _Py_TYPE_VERSION_BYTEARRAY,
28262847
};

0 commit comments

Comments
 (0)