@@ -74,8 +74,13 @@ typedef struct {
7474 signed int seekable : 2 ; /* -1 means unknown */
7575 unsigned int closefd : 1 ;
7676 char finalizing ;
77- unsigned int blksize ;
78- Py_off_t estimated_size ;
77+ /* Stat result which was grabbed at file open, useful for optimizing common
78+ File I/O patterns to be more efficient. This is only guidance / an
79+ estimate, as it is subject to Time-Of-Check to Time-Of-Use (TOCTOU)
80+ issues / bugs. Both the underlying file descriptor and file may be
81+ modified outside of the fileio object / Python (ex. gh-90102, GH-121941,
82+ gh-109523). */
83+ struct _Py_stat_struct * stat_atopen ;
7984 PyObject * weakreflist ;
8085 PyObject * dict ;
8186} fileio ;
@@ -199,8 +204,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
199204 self -> writable = 0 ;
200205 self -> appending = 0 ;
201206 self -> seekable = -1 ;
202- self -> blksize = 0 ;
203- self -> estimated_size = -1 ;
207+ self -> stat_atopen = NULL ;
204208 self -> closefd = 1 ;
205209 self -> weakreflist = NULL ;
206210 }
@@ -256,7 +260,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
256260#elif !defined(MS_WINDOWS )
257261 int * atomic_flag_works = NULL ;
258262#endif
259- struct _Py_stat_struct fdfstat ;
260263 int fstat_result ;
261264 int async_err = 0 ;
262265
@@ -454,9 +457,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
454457#endif
455458 }
456459
457- self -> blksize = DEFAULT_BUFFER_SIZE ;
460+ self -> stat_atopen = PyMem_New (struct _Py_stat_struct , 1 );
461+ if (self -> stat_atopen == NULL ) {
462+ PyErr_NoMemory ();
463+ goto error ;
464+ }
458465 Py_BEGIN_ALLOW_THREADS
459- fstat_result = _Py_fstat_noraise (self -> fd , & fdfstat );
466+ fstat_result = _Py_fstat_noraise (self -> fd , self -> stat_atopen );
460467 Py_END_ALLOW_THREADS
461468 if (fstat_result < 0 ) {
462469 /* Tolerate fstat() errors other than EBADF. See Issue #25717, where
@@ -471,25 +478,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
471478#endif
472479 goto error ;
473480 }
481+
482+ PyMem_Free (self -> stat_atopen );
483+ self -> stat_atopen = NULL ;
474484 }
475485 else {
476486#if defined(S_ISDIR ) && defined(EISDIR )
477487 /* On Unix, open will succeed for directories.
478488 In Python, there should be no file objects referring to
479489 directories, so we need a check. */
480- if (S_ISDIR (fdfstat . st_mode )) {
490+ if (S_ISDIR (self -> stat_atopen -> st_mode )) {
481491 errno = EISDIR ;
482492 PyErr_SetFromErrnoWithFilenameObject (PyExc_OSError , nameobj );
483493 goto error ;
484494 }
485495#endif /* defined(S_ISDIR) */
486- #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
487- if (fdfstat .st_blksize > 1 )
488- self -> blksize = fdfstat .st_blksize ;
489- #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
490- if (fdfstat .st_size < PY_SSIZE_T_MAX ) {
491- self -> estimated_size = (Py_off_t )fdfstat .st_size ;
492- }
493496 }
494497
495498#if defined(MS_WINDOWS ) || defined(__CYGWIN__ )
@@ -521,6 +524,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
521524 internal_close (self );
522525 _PyErr_ChainExceptions1 (exc );
523526 }
527+ if (self -> stat_atopen != NULL ) {
528+ PyMem_Free (self -> stat_atopen );
529+ self -> stat_atopen = NULL ;
530+ }
524531
525532 done :
526533#ifdef MS_WINDOWS
@@ -553,6 +560,10 @@ fileio_dealloc(fileio *self)
553560 if (_PyIOBase_finalize ((PyObject * ) self ) < 0 )
554561 return ;
555562 _PyObject_GC_UNTRACK (self );
563+ if (self -> stat_atopen != NULL ) {
564+ PyMem_Free (self -> stat_atopen );
565+ self -> stat_atopen = NULL ;
566+ }
556567 if (self -> weakreflist != NULL )
557568 PyObject_ClearWeakRefs ((PyObject * ) self );
558569 (void )fileio_clear (self );
@@ -725,20 +736,27 @@ _io_FileIO_readall_impl(fileio *self)
725736 return err_closed ();
726737 }
727738
728- end = self -> estimated_size ;
739+ if (self -> stat_atopen != NULL && self -> stat_atopen -> st_size < _PY_READ_MAX ) {
740+ end = (Py_off_t )self -> stat_atopen -> st_size ;
741+ }
742+ else {
743+ end = -1 ;
744+ }
729745 if (end <= 0 ) {
730746 /* Use a default size and resize as needed. */
731747 bufsize = SMALLCHUNK ;
732748 }
733749 else {
734- /* This is probably a real file, so we try to allocate a
735- buffer one byte larger than the rest of the file. If the
736- calculation is right then we should get EOF without having
737- to enlarge the buffer. */
750+ /* This is probably a real file. */
738751 if (end > _PY_READ_MAX - 1 ) {
739752 bufsize = _PY_READ_MAX ;
740753 }
741754 else {
755+ /* In order to detect end of file, need a read() of at
756+ least 1 byte which returns size 0. Oversize the buffer
757+ by 1 byte so the I/O can be completed with two read()
758+ calls (one for all data, one for EOF) without needing
759+ to resize the buffer. */
742760 bufsize = (size_t )end + 1 ;
743761 }
744762
@@ -1094,11 +1112,13 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
10941112 return NULL ;
10951113 }
10961114
1097- /* Sometimes a large file is truncated. While estimated_size is used as a
1098- estimate, that it is much larger than the actual size can result in a
1099- significant over allocation and sometimes a MemoryError / running out of
1100- memory. */
1101- self -> estimated_size = pos ;
1115+ /* Since the file was truncated, its size at open is no longer accurate
1116+ as an estimate. Clear out the stat result, and rely on dynamic resize
1117+ code if a readall is requested. */
1118+ if (self -> stat_atopen != NULL ) {
1119+ PyMem_Free (self -> stat_atopen );
1120+ self -> stat_atopen = NULL ;
1121+ }
11021122
11031123 return posobj ;
11041124}
@@ -1229,16 +1249,27 @@ get_mode(fileio *self, void *closure)
12291249 return PyUnicode_FromString (mode_string (self ));
12301250}
12311251
1252+ static PyObject *
1253+ get_blksize (fileio * self , void * closure )
1254+ {
1255+ #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1256+ if (self -> stat_atopen != NULL && self -> stat_atopen -> st_blksize > 1 ) {
1257+ return PyLong_FromLong (self -> stat_atopen -> st_blksize );
1258+ }
1259+ #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
1260+ return PyLong_FromLong (DEFAULT_BUFFER_SIZE );
1261+ }
1262+
12321263static PyGetSetDef fileio_getsetlist [] = {
12331264 {"closed" , (getter )get_closed , NULL , "True if the file is closed" },
12341265 {"closefd" , (getter )get_closefd , NULL ,
12351266 "True if the file descriptor will be closed by close()." },
12361267 {"mode" , (getter )get_mode , NULL , "String giving the file mode" },
1268+ {"_blksize" , (getter )get_blksize , NULL , "Stat st_blksize if available" },
12371269 {NULL },
12381270};
12391271
12401272static PyMemberDef fileio_members [] = {
1241- {"_blksize" , Py_T_UINT , offsetof(fileio , blksize ), 0 },
12421273 {"_finalizing" , Py_T_BOOL , offsetof(fileio , finalizing ), 0 },
12431274 {"__weaklistoffset__" , Py_T_PYSSIZET , offsetof(fileio , weakreflist ), Py_READONLY },
12441275 {"__dictoffset__" , Py_T_PYSSIZET , offsetof(fileio , dict ), Py_READONLY },
0 commit comments