Skip to content

Commit 2f5f19e

Browse files
cmaloneyhauntsaninjaerlend-aaslandvstinner
authored
gh-120754: Reduce system calls in full-file FileIO.readall() case (#120755)
This reduces the system call count of a simple program[0] that reads all the `.rst` files in Doc by over 10% (5706 -> 4734 system calls on my linux system, 5813 -> 4875 on my macOS) This reduces the number of `fstat()` calls always and seek calls most the time. Stat was always called twice, once at open (to error early on directories), and a second time to get the size of the file to be able to read the whole file in one read. Now the size is cached with the first call. The code keeps an optimization that if the user had previously read a lot of data, the current position is subtracted from the number of bytes to read. That is somewhat expensive so only do it on larger files, otherwise just try and read the extra bytes and resize the PyBytes as needeed. I built a little test program to validate the behavior + assumptions around relative costs and then ran it under `strace` to get a log of the system calls. Full samples below[1]. After the changes, this is everything in one `filename.read_text()`: ```python3 openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3` fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0` ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` This does make some tradeoffs 1. If the file size changes between open() and readall(), this will still get all the data but might have more read calls. 2. I experimented with avoiding the stat + cached result for small files in general, but on my dev workstation at least that tended to reduce performance compared to using the fstat(). [0] ```python3 from pathlib import Path nlines = [] for filename in Path("cpython/Doc").glob("**/*.rst"): nlines.append(len(filename.read_text())) ``` [1] Before small file: ``` openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` After small file: ``` openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` Before large file: ``` openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104 read(3, "", 1) = 0 close(3) = 0 ``` After large file: ``` openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104 read(3, "", 1) = 0 close(3) = 0 ``` Co-authored-by: Shantanu <[email protected]> Co-authored-by: Erlend E. Aasland <[email protected]> Co-authored-by: Victor Stinner <[email protected]>
1 parent 9728ead commit 2f5f19e

File tree

3 files changed

+60
-33
lines changed

3 files changed

+60
-33
lines changed

Lib/_pyio.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,6 +1577,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
15771577
self._blksize = getattr(fdfstat, 'st_blksize', 0)
15781578
if self._blksize <= 1:
15791579
self._blksize = DEFAULT_BUFFER_SIZE
1580+
self._estimated_size = fdfstat.st_size
15801581

15811582
if _setmode:
15821583
# don't translate newlines (\r\n <=> \n)
@@ -1654,14 +1655,18 @@ def readall(self):
16541655
"""
16551656
self._checkClosed()
16561657
self._checkReadable()
1657-
bufsize = DEFAULT_BUFFER_SIZE
1658-
try:
1659-
pos = os.lseek(self._fd, 0, SEEK_CUR)
1660-
end = os.fstat(self._fd).st_size
1661-
if end >= pos:
1662-
bufsize = end - pos + 1
1663-
except OSError:
1664-
pass
1658+
if self._estimated_size <= 0:
1659+
bufsize = DEFAULT_BUFFER_SIZE
1660+
else:
1661+
bufsize = self._estimated_size + 1
1662+
1663+
if self._estimated_size > 65536:
1664+
try:
1665+
pos = os.lseek(self._fd, 0, SEEK_CUR)
1666+
if self._estimated_size >= pos:
1667+
bufsize = self._estimated_size - pos + 1
1668+
except OSError:
1669+
pass
16651670

16661671
result = bytearray()
16671672
while True:
@@ -1737,6 +1742,7 @@ def truncate(self, size=None):
17371742
if size is None:
17381743
size = self.tell()
17391744
os.ftruncate(self._fd, size)
1745+
self._estimated_size = size
17401746
return size
17411747

17421748
def close(self):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is an over 10% reduction in system call count.

Modules/_io/fileio.c

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
# define SMALLCHUNK BUFSIZ
5555
#endif
5656

57+
/* Size at which a buffer is considered "large" and behavior should change to
58+
avoid excessive memory allocation */
59+
#define LARGE_BUFFER_CUTOFF_SIZE 65536
5760

5861
/*[clinic input]
5962
module _io
@@ -72,6 +75,7 @@ typedef struct {
7275
unsigned int closefd : 1;
7376
char finalizing;
7477
unsigned int blksize;
78+
Py_off_t estimated_size;
7579
PyObject *weakreflist;
7680
PyObject *dict;
7781
} fileio;
@@ -196,6 +200,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
196200
self->appending = 0;
197201
self->seekable = -1;
198202
self->blksize = 0;
203+
self->estimated_size = -1;
199204
self->closefd = 1;
200205
self->weakreflist = NULL;
201206
}
@@ -482,6 +487,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
482487
if (fdfstat.st_blksize > 1)
483488
self->blksize = fdfstat.st_blksize;
484489
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
490+
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
491+
self->estimated_size = (Py_off_t)fdfstat.st_size;
492+
}
485493
}
486494

487495
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
@@ -684,7 +692,7 @@ new_buffersize(fileio *self, size_t currentsize)
684692
giving us amortized linear-time behavior. For bigger sizes, use a
685693
less-than-double growth factor to avoid excessive allocation. */
686694
assert(currentsize <= PY_SSIZE_T_MAX);
687-
if (currentsize > 65536)
695+
if (currentsize > LARGE_BUFFER_CUTOFF_SIZE)
688696
addend = currentsize >> 3;
689697
else
690698
addend = 256 + currentsize;
@@ -707,43 +715,56 @@ static PyObject *
707715
_io_FileIO_readall_impl(fileio *self)
708716
/*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
709717
{
710-
struct _Py_stat_struct status;
711718
Py_off_t pos, end;
712719
PyObject *result;
713720
Py_ssize_t bytes_read = 0;
714721
Py_ssize_t n;
715722
size_t bufsize;
716-
int fstat_result;
717723

718-
if (self->fd < 0)
724+
if (self->fd < 0) {
719725
return err_closed();
726+
}
720727

721-
Py_BEGIN_ALLOW_THREADS
722-
_Py_BEGIN_SUPPRESS_IPH
723-
#ifdef MS_WINDOWS
724-
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
725-
#else
726-
pos = lseek(self->fd, 0L, SEEK_CUR);
727-
#endif
728-
_Py_END_SUPPRESS_IPH
729-
fstat_result = _Py_fstat_noraise(self->fd, &status);
730-
Py_END_ALLOW_THREADS
731-
732-
if (fstat_result == 0)
733-
end = status.st_size;
734-
else
735-
end = (Py_off_t)-1;
736-
737-
if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
728+
end = self->estimated_size;
729+
if (end <= 0) {
730+
/* Use a default size and resize as needed. */
731+
bufsize = SMALLCHUNK;
732+
}
733+
else {
738734
/* This is probably a real file, so we try to allocate a
739735
buffer one byte larger than the rest of the file. If the
740736
calculation is right then we should get EOF without having
741737
to enlarge the buffer. */
742-
bufsize = (size_t)(end - pos + 1);
743-
} else {
744-
bufsize = SMALLCHUNK;
738+
if (end > _PY_READ_MAX - 1) {
739+
bufsize = _PY_READ_MAX;
740+
}
741+
else {
742+
bufsize = (size_t)end + 1;
743+
}
744+
745+
/* While a lot of code does open().read() to get the whole contents
746+
of a file it is possible a caller seeks/reads a ways into the file
747+
then calls readall() to get the rest, which would result in allocating
748+
more than required. Guard against that for larger files where we expect
749+
the I/O time to dominate anyways while keeping small files fast. */
750+
if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) {
751+
Py_BEGIN_ALLOW_THREADS
752+
_Py_BEGIN_SUPPRESS_IPH
753+
#ifdef MS_WINDOWS
754+
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
755+
#else
756+
pos = lseek(self->fd, 0L, SEEK_CUR);
757+
#endif
758+
_Py_END_SUPPRESS_IPH
759+
Py_END_ALLOW_THREADS
760+
761+
if (end >= pos && pos >= 0 && (end - pos) < (_PY_READ_MAX - 1)) {
762+
bufsize = (size_t)(end - pos) + 1;
763+
}
764+
}
745765
}
746766

767+
747768
result = PyBytes_FromStringAndSize(NULL, bufsize);
748769
if (result == NULL)
749770
return NULL;
@@ -783,7 +804,6 @@ _io_FileIO_readall_impl(fileio *self)
783804
return NULL;
784805
}
785806
bytes_read += n;
786-
pos += n;
787807
}
788808

789809
if (PyBytes_GET_SIZE(result) > bytes_read) {

0 commit comments

Comments
 (0)