-
-
Notifications
You must be signed in to change notification settings - Fork 33.7k
gh-120754: Reduce system calls in full-file readall case #120755
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
78c4de0
30d335e
9d7f925
dd0b294
7ad6fa8
fa9ac6a
93aee47
39e48ee
b7d3880
a4c2cb6
84bd2d8
b505334
7e276ec
9be6d1d
dc8e910
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is an over 10% reduction in system call count. |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -54,6 +54,9 @@ | |||||||||||||||||||||||||
| # define SMALLCHUNK BUFSIZ | ||||||||||||||||||||||||||
| #endif | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| /* Size at which a buffer is considered "large" and behavior should change to | ||||||||||||||||||||||||||
| avoid excessive memory allocation */ | ||||||||||||||||||||||||||
| #define LARGE_BUFFER_CUTOFF_SIZE 65536 | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| /*[clinic input] | ||||||||||||||||||||||||||
| module _io | ||||||||||||||||||||||||||
|
|
@@ -72,6 +75,7 @@ typedef struct { | |||||||||||||||||||||||||
| unsigned int closefd : 1; | ||||||||||||||||||||||||||
| char finalizing; | ||||||||||||||||||||||||||
| unsigned int blksize; | ||||||||||||||||||||||||||
| Py_off_t size_estimated; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
| PyObject *weakreflist; | ||||||||||||||||||||||||||
| PyObject *dict; | ||||||||||||||||||||||||||
| } fileio; | ||||||||||||||||||||||||||
|
|
@@ -196,6 +200,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |||||||||||||||||||||||||
| self->appending = 0; | ||||||||||||||||||||||||||
| self->seekable = -1; | ||||||||||||||||||||||||||
| self->blksize = 0; | ||||||||||||||||||||||||||
| self->size_estimated = -1; | ||||||||||||||||||||||||||
| self->closefd = 1; | ||||||||||||||||||||||||||
| self->weakreflist = NULL; | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
@@ -482,6 +487,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, | |||||||||||||||||||||||||
| if (fdfstat.st_blksize > 1) | ||||||||||||||||||||||||||
| self->blksize = fdfstat.st_blksize; | ||||||||||||||||||||||||||
| #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ | ||||||||||||||||||||||||||
| if (fdfstat.st_size < PY_SSIZE_T_MAX) { | ||||||||||||||||||||||||||
| self->size_estimated = (Py_off_t)fdfstat.st_size; | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| #if defined(MS_WINDOWS) || defined(__CYGWIN__) | ||||||||||||||||||||||||||
|
|
@@ -684,7 +692,7 @@ new_buffersize(fileio *self, size_t currentsize) | |||||||||||||||||||||||||
| giving us amortized linear-time behavior. For bigger sizes, use a | ||||||||||||||||||||||||||
| less-than-double growth factor to avoid excessive allocation. */ | ||||||||||||||||||||||||||
| assert(currentsize <= PY_SSIZE_T_MAX); | ||||||||||||||||||||||||||
| if (currentsize > 65536) | ||||||||||||||||||||||||||
| if (currentsize > LARGE_BUFFER_CUTOFF_SIZE) | ||||||||||||||||||||||||||
| addend = currentsize >> 3; | ||||||||||||||||||||||||||
| else | ||||||||||||||||||||||||||
| addend = 256 + currentsize; | ||||||||||||||||||||||||||
|
|
@@ -707,43 +715,56 @@ static PyObject * | |||||||||||||||||||||||||
| _io_FileIO_readall_impl(fileio *self) | ||||||||||||||||||||||||||
| /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/ | ||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||
| struct _Py_stat_struct status; | ||||||||||||||||||||||||||
| Py_off_t pos, end; | ||||||||||||||||||||||||||
| PyObject *result; | ||||||||||||||||||||||||||
| Py_ssize_t bytes_read = 0; | ||||||||||||||||||||||||||
| Py_ssize_t n; | ||||||||||||||||||||||||||
| size_t bufsize; | ||||||||||||||||||||||||||
| int fstat_result; | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| if (self->fd < 0) | ||||||||||||||||||||||||||
| if (self->fd < 0) { | ||||||||||||||||||||||||||
| return err_closed(); | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| Py_BEGIN_ALLOW_THREADS | ||||||||||||||||||||||||||
| _Py_BEGIN_SUPPRESS_IPH | ||||||||||||||||||||||||||
| end = self->size_estimated; | ||||||||||||||||||||||||||
| if (end <= 0) { | ||||||||||||||||||||||||||
| /* Use a default size and resize as needed. */ | ||||||||||||||||||||||||||
| bufsize = SMALLCHUNK; | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
| else { | ||||||||||||||||||||||||||
| /* This is probably a real file, so we try to allocate a | ||||||||||||||||||||||||||
| buffer one byte larger than the rest of the file. If the | ||||||||||||||||||||||||||
| calculation is right then we should get EOF without having | ||||||||||||||||||||||||||
| to enlarge the buffer. */ | ||||||||||||||||||||||||||
erlend-aasland marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
| if (end >= _PY_READ_MAX) { | ||||||||||||||||||||||||||
cmaloney marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
| bufsize = _PY_READ_MAX; | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
| else { | ||||||||||||||||||||||||||
| bufsize = Py_SAFE_DOWNCAST(end, Py_off_t, size_t) + 1; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
| #ifdef MS_WINDOWS | |
| /* Windows uses long long for offsets */ | |
| typedef long long Py_off_t; | |
| # define PyLong_AsOff_t PyLong_AsLongLong | |
| # define PyLong_FromOff_t PyLong_FromLongLong | |
| # define PY_OFF_T_MAX LLONG_MAX | |
| # define PY_OFF_T_MIN LLONG_MIN | |
| # define PY_OFF_T_COMPAT long long /* type compatible with off_t */ | |
| # define PY_PRIdOFF "lld" /* format to use for that type */ | |
| #else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oop, misread this. The if end >= _PY_READ_MAX just before should catch this. (_PY_READ_MAX <= SIZE_MAX).
https://github.com/python/cpython/blob/main/Include/internal/pycore_fileutils.h#L65-L76
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, in fact the maximum is PY_SSIZE_T_MAX:
bufsize = (size_t)Py_MIN(end, PY_SSIZE_T_MAX);
if (bufsize < PY_SSIZE_T_MAX) {
bufsize++;
}There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, replace bufsize = Py_SAFE_DOWNCAST(end, Py_off_t, size_t) + 1; with just bufsize = (size_t)end + 1;. I just dislike Py_SAFE_DOWNCAST() macro, it's not safe, the name is misleading.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the purpose of the "+1"? It may overallocate 1 byte which is inefficient.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The read loop currently needs to do a
os.read()/_py_Readwhich is a single byte which returns 0 size to find the end of the file and exit the loop. The very beginning of that loop does a check for "if buffer is full, grow buffer" so not over-allocating by one byte results in a much bigger allocation by that. In the_iocase it then shrinks it back down at the end, whereas in the_pyiocase the EOF read is never appended.Could avoid the extra byte by writing a specialized "read known size" (w/ fallback to "read until EOF"), but was trying to avoid making more variants of the read loop and limit risk a bit.
As an aside: the
_pyioimplementation seems to have a lot of extra memory allocation and copy in the default case becauseos.read()internally allocates a buffer which it then copies into itsbytearray...