Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
self._blksize = getattr(fdfstat, 'st_blksize', 0)
if self._blksize <= 1:
self._blksize = DEFAULT_BUFFER_SIZE
self._estimated_size = fdfstat.st_size

if _setmode:
# don't translate newlines (\r\n <=> \n)
Expand Down Expand Up @@ -1654,14 +1655,18 @@ def readall(self):
"""
self._checkClosed()
self._checkReadable()
bufsize = DEFAULT_BUFFER_SIZE
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
end = os.fstat(self._fd).st_size
if end >= pos:
bufsize = end - pos + 1
except OSError:
pass
if self._estimated_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
bufsize = self._estimated_size + 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of the "+1"? It may overallocate 1 byte which is inefficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The read loop currently needs to do a os.read() / _py_Read which is a single byte which returns 0 size to find the end of the file and exit the loop. The very beginning of that loop does a check for "if buffer is full, grow buffer" so not over-allocating by one byte results in a much bigger allocation by that. In the _io case it then shrinks it back down at the end, whereas in the _pyio case the EOF read is never appended.

Could avoid the extra byte by writing a specialized "read known size" (w/ fallback to "read until EOF"), but was trying to avoid making more variants of the read loop and limit risk a bit.

As an aside: the _pyio implementation seems to have a lot of extra memory allocation and copy in the default case because os.read() internally allocates a buffer which it then copies into its bytearray...


if self._estimated_size > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
if self._estimated_size >= pos:
bufsize = self._estimated_size - pos + 1
except OSError:
pass

result = bytearray()
while True:
Expand Down Expand Up @@ -1737,6 +1742,7 @@ def truncate(self, size=None):
if size is None:
size = self.tell()
os.ftruncate(self._fd, size)
self._estimated_size = size
return size

def close(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is an over 10% reduction in system call count.
70 changes: 45 additions & 25 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
# define SMALLCHUNK BUFSIZ
#endif

/* Size at which a buffer is considered "large" and behavior should change to
avoid excessive memory allocation */
#define LARGE_BUFFER_CUTOFF_SIZE 65536

/*[clinic input]
module _io
Expand All @@ -72,6 +75,7 @@ typedef struct {
unsigned int closefd : 1;
char finalizing;
unsigned int blksize;
Py_off_t size_estimated;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to use the same name in the C and Python implementation, I suggest to rename this member to: estimated_size.

PyObject *weakreflist;
PyObject *dict;
} fileio;
Expand Down Expand Up @@ -196,6 +200,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->appending = 0;
self->seekable = -1;
self->blksize = 0;
self->size_estimated = -1;
self->closefd = 1;
self->weakreflist = NULL;
}
Expand Down Expand Up @@ -482,6 +487,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
if (fdfstat.st_blksize > 1)
self->blksize = fdfstat.st_blksize;
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
self->size_estimated = (Py_off_t)fdfstat.st_size;
}
}

#if defined(MS_WINDOWS) || defined(__CYGWIN__)
Expand Down Expand Up @@ -684,7 +692,7 @@ new_buffersize(fileio *self, size_t currentsize)
giving us amortized linear-time behavior. For bigger sizes, use a
less-than-double growth factor to avoid excessive allocation. */
assert(currentsize <= PY_SSIZE_T_MAX);
if (currentsize > 65536)
if (currentsize > LARGE_BUFFER_CUTOFF_SIZE)
addend = currentsize >> 3;
else
addend = 256 + currentsize;
Expand All @@ -707,43 +715,56 @@ static PyObject *
_io_FileIO_readall_impl(fileio *self)
/*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
{
struct _Py_stat_struct status;
Py_off_t pos, end;
PyObject *result;
Py_ssize_t bytes_read = 0;
Py_ssize_t n;
size_t bufsize;
int fstat_result;

if (self->fd < 0)
if (self->fd < 0) {
return err_closed();
}

Py_BEGIN_ALLOW_THREADS
_Py_BEGIN_SUPPRESS_IPH
end = self->size_estimated;
if (end <= 0) {
/* Use a default size and resize as needed. */
bufsize = SMALLCHUNK;
}
else {
/* This is probably a real file, so we try to allocate a
buffer one byte larger than the rest of the file. If the
calculation is right then we should get EOF without having
to enlarge the buffer. */
if (end >= _PY_READ_MAX) {
bufsize = _PY_READ_MAX;
}
else {
bufsize = Py_SAFE_DOWNCAST(end, Py_off_t, size_t) + 1;
Copy link
Member

@vstinner vstinner Jul 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that this cast is safe, Py_off_t can be bigger than size_t. You should do something like:

bufsize = (size_t)Py_MIN(end, SIZE_MAX);
bufsize++;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran into issues in test_largefile on Windows x86 which caused me to add this. Py_off_t is long long on that while size_t is int

#ifdef MS_WINDOWS
/* Windows uses long long for offsets */
typedef long long Py_off_t;
# define PyLong_AsOff_t PyLong_AsLongLong
# define PyLong_FromOff_t PyLong_FromLongLong
# define PY_OFF_T_MAX LLONG_MAX
# define PY_OFF_T_MIN LLONG_MIN
# define PY_OFF_T_COMPAT long long /* type compatible with off_t */
# define PY_PRIdOFF "lld" /* format to use for that type */
#else

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oop, misread this. The if end >= _PY_READ_MAX just before should catch this. (_PY_READ_MAX <= SIZE_MAX).

https://github.com/python/cpython/blob/main/Include/internal/pycore_fileutils.h#L65-L76

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, in fact the maximum is PY_SSIZE_T_MAX:

bufsize = (size_t)Py_MIN(end, PY_SSIZE_T_MAX);
if (bufsize < PY_SSIZE_T_MAX) {
    bufsize++;
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, replace bufsize = Py_SAFE_DOWNCAST(end, Py_off_t, size_t) + 1; with just bufsize = (size_t)end + 1;. I just dislike Py_SAFE_DOWNCAST() macro, it's not safe, the name is misleading.

}

/* While a lot of code does open().read() to get the whole contents
of a file it is possible a caller seeks/reads a ways into the file
then calls readall() to get the rest, which would result in allocating
more than required. Guard against that for larger files where we expect
the I/O time to dominate anyways while keeping small files fast. */
if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) {
Py_BEGIN_ALLOW_THREADS
_Py_BEGIN_SUPPRESS_IPH
#ifdef MS_WINDOWS
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
#else
pos = lseek(self->fd, 0L, SEEK_CUR);
pos = lseek(self->fd, 0L, SEEK_CUR);
#endif
_Py_END_SUPPRESS_IPH
fstat_result = _Py_fstat_noraise(self->fd, &status);
Py_END_ALLOW_THREADS

if (fstat_result == 0)
end = status.st_size;
else
end = (Py_off_t)-1;
_Py_END_SUPPRESS_IPH
Py_END_ALLOW_THREADS

if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
/* This is probably a real file, so we try to allocate a
buffer one byte larger than the rest of the file. If the
calculation is right then we should get EOF without having
to enlarge the buffer. */
bufsize = (size_t)(end - pos + 1);
} else {
bufsize = SMALLCHUNK;
if (end >= pos && pos >= 0 && end - pos < _PY_READ_MAX) {
bufsize = Py_SAFE_DOWNCAST(end - pos, Py_off_t, size_t) + 1;
}
}
}


result = PyBytes_FromStringAndSize(NULL, bufsize);
if (result == NULL)
return NULL;
Expand Down Expand Up @@ -783,7 +804,6 @@ _io_FileIO_readall_impl(fileio *self)
return NULL;
}
bytes_read += n;
pos += n;
}

if (PyBytes_GET_SIZE(result) > bytes_read) {
Expand Down