Skip to content

GH-120754: Remove isatty call during regular open #121593

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 28 additions & 21 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ def text_encoding(encoding, stacklevel=2):
return encoding


def _shortcut_isatty(raw):
"""Regular files are never TTYs. In cases where we know we have a stat
result, use it to shortcut / skip a call to isatty."""
if raw._stat_atopen and stat.S_ISREG(raw._stat_atopen.st_mode):
return False

return raw.isatty()


# Wrapper for builtins.open
#
# Trick so that open() won't become a bound method when stored
Expand Down Expand Up @@ -238,18 +247,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
result = raw
try:
line_buffering = False
if buffering == 1 or buffering < 0 and raw.isatty():

if buffering == 1 or buffering < 0 and _shortcut_isatty(raw):
buffering = -1
line_buffering = True
if buffering < 0:
buffering = DEFAULT_BUFFER_SIZE
try:
bs = os.fstat(raw.fileno()).st_blksize
except (OSError, AttributeError):
pass
else:
if bs > 1:
buffering = bs
buffering = raw._blksize
if buffering < 0:
raise ValueError("invalid buffering size")
if buffering == 0:
Expand Down Expand Up @@ -1565,19 +1568,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
os.set_inheritable(fd, False)

self._closefd = closefd
fdfstat = os.fstat(fd)
self._stat_atopen = os.fstat(fd)
try:
if stat.S_ISDIR(fdfstat.st_mode):
if stat.S_ISDIR(self._stat_atopen.st_mode):
raise IsADirectoryError(errno.EISDIR,
os.strerror(errno.EISDIR), file)
except AttributeError:
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
# don't exist.
pass
self._blksize = getattr(fdfstat, 'st_blksize', 0)
if self._blksize <= 1:
self._blksize = DEFAULT_BUFFER_SIZE
self._estimated_size = fdfstat.st_size

if _setmode:
# don't translate newlines (\r\n <=> \n)
Expand Down Expand Up @@ -1623,6 +1622,14 @@ def __repr__(self):
return ('<%s name=%r mode=%r closefd=%r>' %
(class_name, name, self.mode, self._closefd))

@property
def _blksize(self):
if self._stat_atopen:
res = getattr(self._stat_atopen, "st_blksize", 0)

# WASI sets blksize to 0
return res if res > 0 else DEFAULT_BUFFER_SIZE

def _checkReadable(self):
if not self._readable:
raise UnsupportedOperation('File not open for reading')
Expand Down Expand Up @@ -1655,16 +1662,16 @@ def readall(self):
"""
self._checkClosed()
self._checkReadable()
if self._estimated_size <= 0:
if not self._stat_atopen or self._stat_atopen.st_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
bufsize = self._estimated_size + 1
bufsize = self._stat_atopen.st_size + 1

if self._estimated_size > 65536:
if self._stat_atopen.st_size > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
if self._estimated_size >= pos:
bufsize = self._estimated_size - pos + 1
if self._stat_atopen.st_size >= pos:
bufsize = self._stat_atopen.st_size - pos + 1
except OSError:
pass

Expand Down Expand Up @@ -1742,7 +1749,7 @@ def truncate(self, size=None):
if size is None:
size = self.tell()
os.ftruncate(self._fd, size)
self._estimated_size = size
self._stat_atopen = None
return size

def close(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Skip the ``isatty`` system call on just-opened regular files. This provides a
slight performance improvement when reading whole files.
68 changes: 49 additions & 19 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ typedef struct {
signed int seekable : 2; /* -1 means unknown */
unsigned int closefd : 1;
char finalizing;
unsigned int blksize;
Py_off_t estimated_size;
struct _Py_stat_struct *stat_atopen;
PyObject *weakreflist;
PyObject *dict;
} fileio;
Expand Down Expand Up @@ -199,8 +198,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->writable = 0;
self->appending = 0;
self->seekable = -1;
self->blksize = 0;
self->estimated_size = -1;
self->stat_atopen = NULL;
self->closefd = 1;
self->weakreflist = NULL;
}
Expand Down Expand Up @@ -256,7 +254,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#elif !defined(MS_WINDOWS)
int *atomic_flag_works = NULL;
#endif
struct _Py_stat_struct fdfstat;
int fstat_result;
int async_err = 0;

Expand Down Expand Up @@ -454,9 +451,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#endif
}

self->blksize = DEFAULT_BUFFER_SIZE;
self->stat_atopen = PyMem_New(struct _Py_stat_struct, 1);
if (self->stat_atopen == NULL) {
PyErr_Format(PyExc_MemoryError, "Unable to allocate space for stat result");
goto error;
}
Py_BEGIN_ALLOW_THREADS
fstat_result = _Py_fstat_noraise(self->fd, &fdfstat);
fstat_result = _Py_fstat_noraise(self->fd, self->stat_atopen);
Py_END_ALLOW_THREADS
if (fstat_result < 0) {
/* Tolerate fstat() errors other than EBADF. See Issue #25717, where
Expand All @@ -471,25 +472,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#endif
goto error;
}

PyMem_Free(self->stat_atopen);
self->stat_atopen = NULL;
}
else {
#if defined(S_ISDIR) && defined(EISDIR)
/* On Unix, open will succeed for directories.
In Python, there should be no file objects referring to
directories, so we need a check. */
if (S_ISDIR(fdfstat.st_mode)) {
if (S_ISDIR(self->stat_atopen->st_mode)) {
errno = EISDIR;
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, nameobj);
goto error;
}
#endif /* defined(S_ISDIR) */
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
if (fdfstat.st_blksize > 1)
self->blksize = fdfstat.st_blksize;
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
self->estimated_size = (Py_off_t)fdfstat.st_size;
}
}

#if defined(MS_WINDOWS) || defined(__CYGWIN__)
Expand Down Expand Up @@ -521,6 +518,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
internal_close(self);
_PyErr_ChainExceptions1(exc);
}
if (self->stat_atopen != NULL) {
PyMem_Free(self->stat_atopen);
self->stat_atopen = NULL;
}

done:
#ifdef MS_WINDOWS
Expand Down Expand Up @@ -553,6 +554,10 @@ fileio_dealloc(fileio *self)
if (_PyIOBase_finalize((PyObject *) self) < 0)
return;
_PyObject_GC_UNTRACK(self);
if (self->stat_atopen != NULL) {
PyMem_Free(self->stat_atopen);
self->stat_atopen = NULL;
}
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
(void)fileio_clear(self);
Expand Down Expand Up @@ -725,7 +730,13 @@ _io_FileIO_readall_impl(fileio *self)
return err_closed();
}

end = self->estimated_size;
if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) {
end = (Py_off_t)self->stat_atopen->st_size;
}
else {
end = -1;
}

if (end <= 0) {
/* Use a default size and resize as needed. */
bufsize = SMALLCHUNK;
Expand Down Expand Up @@ -764,7 +775,6 @@ _io_FileIO_readall_impl(fileio *self)
}
}


result = PyBytes_FromStringAndSize(NULL, bufsize);
if (result == NULL)
return NULL;
Expand Down Expand Up @@ -1098,7 +1108,10 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
estimate, that it is much larger than the actual size can result in a
significant over allocation and sometimes a MemoryError / running out of
memory. */
self->estimated_size = pos;
if (self->stat_atopen != NULL) {
PyMem_Free(self->stat_atopen);
self->stat_atopen = NULL;
}

return posobj;
}
Expand Down Expand Up @@ -1179,6 +1192,11 @@ _io_FileIO_isatty_impl(fileio *self)

if (self->fd < 0)
return err_closed();

if (self->stat_atopen != NULL && S_ISREG(self->stat_atopen->st_mode)) {
return Py_False;
}

Py_BEGIN_ALLOW_THREADS
_Py_BEGIN_SUPPRESS_IPH
res = isatty(self->fd);
Expand Down Expand Up @@ -1229,16 +1247,28 @@ get_mode(fileio *self, void *closure)
return PyUnicode_FromString(mode_string(self));
}

static PyObject *
get_blksize(fileio *self, void *closure)
{
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
if (self->stat_atopen != NULL && self->stat_atopen->st_blksize > 1) {
return PyLong_FromLong(self->stat_atopen->st_blksize);
}
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
return PyLong_FromLong(DEFAULT_BUFFER_SIZE);
}


static PyGetSetDef fileio_getsetlist[] = {
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
{"closefd", (getter)get_closefd, NULL,
"True if the file descriptor will be closed by close()."},
{"mode", (getter)get_mode, NULL, "String giving the file mode"},
{"_blksize", (getter)get_blksize, NULL, "Stat st_blksize if available"},
{NULL},
};

static PyMemberDef fileio_members[] = {
{"_blksize", Py_T_UINT, offsetof(fileio, blksize), 0},
{"_finalizing", Py_T_BOOL, offsetof(fileio, finalizing), 0},
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(fileio, weakreflist), Py_READONLY},
{"__dictoffset__", Py_T_PYSSIZET, offsetof(fileio, dict), Py_READONLY},
Expand Down
Loading