diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 75b5ad1b1a47d2..90a20755139c03 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -64,6 +64,15 @@ def text_encoding(encoding, stacklevel=2): return encoding +def _shortcut_isatty(raw): + """Regular files are never TTYs. In cases where we know we have a stat + result, use it to shortcut / skip a call to isatty.""" + if raw._stat_atopen and stat.S_ISREG(raw._stat_atopen.st_mode): + return False + + return raw.isatty() + + # Wrapper for builtins.open # # Trick so that open() won't become a bound method when stored @@ -238,18 +247,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, result = raw try: line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): + + if buffering == 1 or buffering < 0 and _shortcut_isatty(raw): buffering = -1 line_buffering = True if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (OSError, AttributeError): - pass - else: - if bs > 1: - buffering = bs + buffering = raw._blksize if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: @@ -1565,19 +1568,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None): os.set_inheritable(fd, False) self._closefd = closefd - fdfstat = os.fstat(fd) + self._stat_atopen = os.fstat(fd) try: - if stat.S_ISDIR(fdfstat.st_mode): + if stat.S_ISDIR(self._stat_atopen.st_mode): raise IsADirectoryError(errno.EISDIR, os.strerror(errno.EISDIR), file) except AttributeError: # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR # don't exist. pass - self._blksize = getattr(fdfstat, 'st_blksize', 0) - if self._blksize <= 1: - self._blksize = DEFAULT_BUFFER_SIZE - self._estimated_size = fdfstat.st_size if _setmode: # don't translate newlines (\r\n <=> \n) @@ -1623,6 +1622,14 @@ def __repr__(self): return ('<%s name=%r mode=%r closefd=%r>' % (class_name, name, self.mode, self._closefd)) + @property + def _blksize(self): + if self._stat_atopen: + res = getattr(self._stat_atopen, "st_blksize", 0) + + # WASI sets blksize to 0 + return res if res > 0 else DEFAULT_BUFFER_SIZE + def _checkReadable(self): if not self._readable: raise UnsupportedOperation('File not open for reading') @@ -1655,16 +1662,16 @@ def readall(self): """ self._checkClosed() self._checkReadable() - if self._estimated_size <= 0: + if not self._stat_atopen or self._stat_atopen.st_size <= 0: bufsize = DEFAULT_BUFFER_SIZE else: - bufsize = self._estimated_size + 1 + bufsize = self._stat_atopen.st_size + 1 - if self._estimated_size > 65536: + if self._stat_atopen.st_size > 65536: try: pos = os.lseek(self._fd, 0, SEEK_CUR) - if self._estimated_size >= pos: - bufsize = self._estimated_size - pos + 1 + if self._stat_atopen.st_size >= pos: + bufsize = self._stat_atopen.st_size - pos + 1 except OSError: pass @@ -1742,7 +1749,7 @@ def truncate(self, size=None): if size is None: size = self.tell() os.ftruncate(self._fd, size) - self._estimated_size = size + self._stat_atopen = None return size def close(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-10-01-55-33.gh-issue-120754.KSWAmz.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-10-01-55-33.gh-issue-120754.KSWAmz.rst new file mode 100644 index 00000000000000..d8a13db3849eff --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-10-01-55-33.gh-issue-120754.KSWAmz.rst @@ -0,0 +1,2 @@ +Skip the ``isatty`` system call on just-opened regular files. This provides a +slight performance improvement when reading whole files. diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 5d9d87d6118a75..a141662a496133 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -74,8 +74,7 @@ typedef struct { signed int seekable : 2; /* -1 means unknown */ unsigned int closefd : 1; char finalizing; - unsigned int blksize; - Py_off_t estimated_size; + struct _Py_stat_struct *stat_atopen; PyObject *weakreflist; PyObject *dict; } fileio; @@ -199,8 +198,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->writable = 0; self->appending = 0; self->seekable = -1; - self->blksize = 0; - self->estimated_size = -1; + self->stat_atopen = NULL; self->closefd = 1; self->weakreflist = NULL; } @@ -256,7 +254,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, #elif !defined(MS_WINDOWS) int *atomic_flag_works = NULL; #endif - struct _Py_stat_struct fdfstat; int fstat_result; int async_err = 0; @@ -454,9 +451,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, #endif } - self->blksize = DEFAULT_BUFFER_SIZE; + self->stat_atopen = PyMem_New(struct _Py_stat_struct, 1); + if (self->stat_atopen == NULL) { + PyErr_Format(PyExc_MemoryError, "Unable to allocate space for stat result"); + goto error; + } Py_BEGIN_ALLOW_THREADS - fstat_result = _Py_fstat_noraise(self->fd, &fdfstat); + fstat_result = _Py_fstat_noraise(self->fd, self->stat_atopen); Py_END_ALLOW_THREADS if (fstat_result < 0) { /* Tolerate fstat() errors other than EBADF. See Issue #25717, where @@ -471,25 +472,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, #endif goto error; } + + PyMem_Free(self->stat_atopen); + self->stat_atopen = NULL; } else { #if defined(S_ISDIR) && defined(EISDIR) /* On Unix, open will succeed for directories. In Python, there should be no file objects referring to directories, so we need a check. */ - if (S_ISDIR(fdfstat.st_mode)) { + if (S_ISDIR(self->stat_atopen->st_mode)) { errno = EISDIR; PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, nameobj); goto error; } #endif /* defined(S_ISDIR) */ -#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE - if (fdfstat.st_blksize > 1) - self->blksize = fdfstat.st_blksize; -#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ - if (fdfstat.st_size < PY_SSIZE_T_MAX) { - self->estimated_size = (Py_off_t)fdfstat.st_size; - } } #if defined(MS_WINDOWS) || defined(__CYGWIN__) @@ -521,6 +518,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, internal_close(self); _PyErr_ChainExceptions1(exc); } + if (self->stat_atopen != NULL) { + PyMem_Free(self->stat_atopen); + self->stat_atopen = NULL; + } done: #ifdef MS_WINDOWS @@ -553,6 +554,10 @@ fileio_dealloc(fileio *self) if (_PyIOBase_finalize((PyObject *) self) < 0) return; _PyObject_GC_UNTRACK(self); + if (self->stat_atopen != NULL) { + PyMem_Free(self->stat_atopen); + self->stat_atopen = NULL; + } if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); (void)fileio_clear(self); @@ -725,7 +730,13 @@ _io_FileIO_readall_impl(fileio *self) return err_closed(); } - end = self->estimated_size; + if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) { + end = (Py_off_t)self->stat_atopen->st_size; + } + else { + end = -1; + } + if (end <= 0) { /* Use a default size and resize as needed. */ bufsize = SMALLCHUNK; @@ -764,7 +775,6 @@ _io_FileIO_readall_impl(fileio *self) } } - result = PyBytes_FromStringAndSize(NULL, bufsize); if (result == NULL) return NULL; @@ -1098,7 +1108,10 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj) estimate, that it is much larger than the actual size can result in a significant over allocation and sometimes a MemoryError / running out of memory. */ - self->estimated_size = pos; + if (self->stat_atopen != NULL) { + PyMem_Free(self->stat_atopen); + self->stat_atopen = NULL; + } return posobj; } @@ -1179,6 +1192,11 @@ _io_FileIO_isatty_impl(fileio *self) if (self->fd < 0) return err_closed(); + + if (self->stat_atopen != NULL && S_ISREG(self->stat_atopen->st_mode)) { + return Py_False; + } + Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH res = isatty(self->fd); @@ -1229,16 +1247,28 @@ get_mode(fileio *self, void *closure) return PyUnicode_FromString(mode_string(self)); } +static PyObject * +get_blksize(fileio *self, void *closure) +{ +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + if (self->stat_atopen != NULL && self->stat_atopen->st_blksize > 1) { + return PyLong_FromLong(self->stat_atopen->st_blksize); + } +#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ + return PyLong_FromLong(DEFAULT_BUFFER_SIZE); +} + + static PyGetSetDef fileio_getsetlist[] = { {"closed", (getter)get_closed, NULL, "True if the file is closed"}, {"closefd", (getter)get_closefd, NULL, "True if the file descriptor will be closed by close()."}, {"mode", (getter)get_mode, NULL, "String giving the file mode"}, + {"_blksize", (getter)get_blksize, NULL, "Stat st_blksize if available"}, {NULL}, }; static PyMemberDef fileio_members[] = { - {"_blksize", Py_T_UINT, offsetof(fileio, blksize), 0}, {"_finalizing", Py_T_BOOL, offsetof(fileio, finalizing), 0}, {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(fileio, weakreflist), Py_READONLY}, {"__dictoffset__", Py_T_PYSSIZET, offsetof(fileio, dict), Py_READONLY},