@@ -74,8 +74,13 @@ typedef struct {
74
74
signed int seekable : 2 ; /* -1 means unknown */
75
75
unsigned int closefd : 1 ;
76
76
char finalizing ;
77
- unsigned int blksize ;
78
- Py_off_t estimated_size ;
77
+ /* Stat result which was grabbed at file open, useful for optimizing common
78
+ File I/O patterns to be more efficient. This is only guidance / an
79
+ estimate, as it is subject to Time-Of-Check to Time-Of-Use (TOCTOU)
80
+ issues / bugs. Both the underlying file descriptor and file may be
81
+ modified outside of the fileio object / Python (ex. gh-90102, GH-121941,
82
+ gh-109523). */
83
+ struct _Py_stat_struct * stat_atopen ;
79
84
PyObject * weakreflist ;
80
85
PyObject * dict ;
81
86
} fileio ;
@@ -199,8 +204,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
199
204
self -> writable = 0 ;
200
205
self -> appending = 0 ;
201
206
self -> seekable = -1 ;
202
- self -> blksize = 0 ;
203
- self -> estimated_size = -1 ;
207
+ self -> stat_atopen = NULL ;
204
208
self -> closefd = 1 ;
205
209
self -> weakreflist = NULL ;
206
210
}
@@ -256,7 +260,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
256
260
#elif !defined(MS_WINDOWS )
257
261
int * atomic_flag_works = NULL ;
258
262
#endif
259
- struct _Py_stat_struct fdfstat ;
260
263
int fstat_result ;
261
264
int async_err = 0 ;
262
265
@@ -454,9 +457,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
454
457
#endif
455
458
}
456
459
457
- self -> blksize = DEFAULT_BUFFER_SIZE ;
460
+ self -> stat_atopen = PyMem_New (struct _Py_stat_struct , 1 );
461
+ if (self -> stat_atopen == NULL ) {
462
+ PyErr_NoMemory ();
463
+ goto error ;
464
+ }
458
465
Py_BEGIN_ALLOW_THREADS
459
- fstat_result = _Py_fstat_noraise (self -> fd , & fdfstat );
466
+ fstat_result = _Py_fstat_noraise (self -> fd , self -> stat_atopen );
460
467
Py_END_ALLOW_THREADS
461
468
if (fstat_result < 0 ) {
462
469
/* Tolerate fstat() errors other than EBADF. See Issue #25717, where
@@ -471,25 +478,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
471
478
#endif
472
479
goto error ;
473
480
}
481
+
482
+ PyMem_Free (self -> stat_atopen );
483
+ self -> stat_atopen = NULL ;
474
484
}
475
485
else {
476
486
#if defined(S_ISDIR ) && defined(EISDIR )
477
487
/* On Unix, open will succeed for directories.
478
488
In Python, there should be no file objects referring to
479
489
directories, so we need a check. */
480
- if (S_ISDIR (fdfstat . st_mode )) {
490
+ if (S_ISDIR (self -> stat_atopen -> st_mode )) {
481
491
errno = EISDIR ;
482
492
PyErr_SetFromErrnoWithFilenameObject (PyExc_OSError , nameobj );
483
493
goto error ;
484
494
}
485
495
#endif /* defined(S_ISDIR) */
486
- #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
487
- if (fdfstat .st_blksize > 1 )
488
- self -> blksize = fdfstat .st_blksize ;
489
- #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
490
- if (fdfstat .st_size < PY_SSIZE_T_MAX ) {
491
- self -> estimated_size = (Py_off_t )fdfstat .st_size ;
492
- }
493
496
}
494
497
495
498
#if defined(MS_WINDOWS ) || defined(__CYGWIN__ )
@@ -521,6 +524,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
521
524
internal_close (self );
522
525
_PyErr_ChainExceptions1 (exc );
523
526
}
527
+ if (self -> stat_atopen != NULL ) {
528
+ PyMem_Free (self -> stat_atopen );
529
+ self -> stat_atopen = NULL ;
530
+ }
524
531
525
532
done :
526
533
#ifdef MS_WINDOWS
@@ -553,6 +560,10 @@ fileio_dealloc(fileio *self)
553
560
if (_PyIOBase_finalize ((PyObject * ) self ) < 0 )
554
561
return ;
555
562
_PyObject_GC_UNTRACK (self );
563
+ if (self -> stat_atopen != NULL ) {
564
+ PyMem_Free (self -> stat_atopen );
565
+ self -> stat_atopen = NULL ;
566
+ }
556
567
if (self -> weakreflist != NULL )
557
568
PyObject_ClearWeakRefs ((PyObject * ) self );
558
569
(void )fileio_clear (self );
@@ -725,20 +736,27 @@ _io_FileIO_readall_impl(fileio *self)
725
736
return err_closed ();
726
737
}
727
738
728
- end = self -> estimated_size ;
739
+ if (self -> stat_atopen != NULL && self -> stat_atopen -> st_size < _PY_READ_MAX ) {
740
+ end = (Py_off_t )self -> stat_atopen -> st_size ;
741
+ }
742
+ else {
743
+ end = -1 ;
744
+ }
729
745
if (end <= 0 ) {
730
746
/* Use a default size and resize as needed. */
731
747
bufsize = SMALLCHUNK ;
732
748
}
733
749
else {
734
- /* This is probably a real file, so we try to allocate a
735
- buffer one byte larger than the rest of the file. If the
736
- calculation is right then we should get EOF without having
737
- to enlarge the buffer. */
750
+ /* This is probably a real file. */
738
751
if (end > _PY_READ_MAX - 1 ) {
739
752
bufsize = _PY_READ_MAX ;
740
753
}
741
754
else {
755
+ /* In order to detect end of file, need a read() of at
756
+ least 1 byte which returns size 0. Oversize the buffer
757
+ by 1 byte so the I/O can be completed with two read()
758
+ calls (one for all data, one for EOF) without needing
759
+ to resize the buffer. */
742
760
bufsize = (size_t )end + 1 ;
743
761
}
744
762
@@ -1094,11 +1112,13 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
1094
1112
return NULL ;
1095
1113
}
1096
1114
1097
- /* Sometimes a large file is truncated. While estimated_size is used as a
1098
- estimate, that it is much larger than the actual size can result in a
1099
- significant over allocation and sometimes a MemoryError / running out of
1100
- memory. */
1101
- self -> estimated_size = pos ;
1115
+ /* Since the file was truncated, its size at open is no longer accurate
1116
+ as an estimate. Clear out the stat result, and rely on dynamic resize
1117
+ code if a readall is requested. */
1118
+ if (self -> stat_atopen != NULL ) {
1119
+ PyMem_Free (self -> stat_atopen );
1120
+ self -> stat_atopen = NULL ;
1121
+ }
1102
1122
1103
1123
return posobj ;
1104
1124
}
@@ -1229,16 +1249,27 @@ get_mode(fileio *self, void *closure)
1229
1249
return PyUnicode_FromString (mode_string (self ));
1230
1250
}
1231
1251
1252
+ static PyObject *
1253
+ get_blksize (fileio * self , void * closure )
1254
+ {
1255
+ #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1256
+ if (self -> stat_atopen != NULL && self -> stat_atopen -> st_blksize > 1 ) {
1257
+ return PyLong_FromLong (self -> stat_atopen -> st_blksize );
1258
+ }
1259
+ #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
1260
+ return PyLong_FromLong (DEFAULT_BUFFER_SIZE );
1261
+ }
1262
+
1232
1263
static PyGetSetDef fileio_getsetlist [] = {
1233
1264
{"closed" , (getter )get_closed , NULL , "True if the file is closed" },
1234
1265
{"closefd" , (getter )get_closefd , NULL ,
1235
1266
"True if the file descriptor will be closed by close()." },
1236
1267
{"mode" , (getter )get_mode , NULL , "String giving the file mode" },
1268
+ {"_blksize" , (getter )get_blksize , NULL , "Stat st_blksize if available" },
1237
1269
{NULL },
1238
1270
};
1239
1271
1240
1272
static PyMemberDef fileio_members [] = {
1241
- {"_blksize" , Py_T_UINT , offsetof(fileio , blksize ), 0 },
1242
1273
{"_finalizing" , Py_T_BOOL , offsetof(fileio , finalizing ), 0 },
1243
1274
{"__weaklistoffset__" , Py_T_PYSSIZET , offsetof(fileio , weakreflist ), Py_READONLY },
1244
1275
{"__dictoffset__" , Py_T_PYSSIZET , offsetof(fileio , dict ), Py_READONLY },
0 commit comments