Skip to content

Commit 1d97d36

Browse files
committed
pythongh-117151: IO performance improvement, increase io.DEFAULT_BUFFER_SIZE to 128k, adjust open() to use max(st_blksize, io.DEFAULT_BUFFER_SIZE)
1 parent e5c3b7e commit 1d97d36

File tree

6 files changed

+38
-16
lines changed

6 files changed

+38
-16
lines changed

Lib/_pyio.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
valid_seek_flags.add(os.SEEK_HOLE)
2424
valid_seek_flags.add(os.SEEK_DATA)
2525

26-
# open() uses st_blksize whenever we can
27-
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26+
# open() uses max(st_blksize, io.DEFAULT_BUFFER_SIZE) when st_blksize is available
27+
DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes
2828

2929
# NOTE: Base classes defined here are registered with the "official" ABCs
3030
# defined in io.py. We don't use real inheritance though, because we don't want
@@ -123,10 +123,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
123123
the size of a fixed-size chunk buffer. When no buffering argument is
124124
given, the default buffering policy works as follows:
125125
126-
* Binary files are buffered in fixed-size chunks; the size of the buffer
127-
is chosen using a heuristic trying to determine the underlying device's
128-
"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
129-
On many systems, the buffer will typically be 4096 or 8192 bytes long.
126+
* Binary files are buffered in fixed-size chunks; the size of the buffer
127+
is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic
128+
trying to determine the underlying device's "block size" when available
129+
and falling back on `io.DEFAULT_BUFFER_SIZE`.
130+
On most systems, the buffer will typically be 131072 bytes long.
130131
131132
* "Interactive" text files (files for which isatty() returns True)
132133
use line buffering. Other text files use the policy described above
@@ -242,7 +243,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
242243
buffering = -1
243244
line_buffering = True
244245
if buffering < 0:
245-
buffering = raw._blksize
246+
buffering = max(raw._blksize, DEFAULT_BUFFER_SIZE)
246247
if buffering < 0:
247248
raise ValueError("invalid buffering size")
248249
if buffering == 0:

Lib/test/test_file.py

+12
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,18 @@ def testSetBufferSize(self):
216216
with self.assertWarnsRegex(RuntimeWarning, 'line buffering'):
217217
self._checkBufferSize(1)
218218

219+
def testDefaultBufferSize(self):
220+
f = self.open(TESTFN, 'wb')
221+
blksize = f.raw._blksize
222+
f.write(bytes([0] * 5_000_000))
223+
f.close()
224+
225+
f = self.open(TESTFN, 'rb')
226+
data = f.read1()
227+
expected_size = max(blksize, io.DEFAULT_BUFFER_SIZE)
228+
self.assertEqual(len(data), expected_size)
229+
f.close()
230+
219231
def testTruncateOnWindows(self):
220232
# SF bug <https://bugs.python.org/issue801631>
221233
# "file.truncate fault on windows"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Increase ``io.DEFAULT_BUFFER_SIZE`` from 8k to 128k and adjust :func:`open` on
2+
platforms where ``fstat`` provides a ``st_blksize`` field (such as Linux) to use
3+
``max(io.DEFAULT_BUFFER_SIZE, device block size)`` rather than always using the
4+
device block size. This should improve I/O performance.
5+
Patch by Romain Morotti.

Modules/_io/_iomodule.c

+7-4
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,10 @@ the size of a fixed-size chunk buffer. When no buffering argument is
132132
given, the default buffering policy works as follows:
133133
134134
* Binary files are buffered in fixed-size chunks; the size of the buffer
135-
is chosen using a heuristic trying to determine the underlying device's
136-
"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
137-
On many systems, the buffer will typically be 4096 or 8192 bytes long.
135+
is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic
136+
trying to determine the underlying device's "block size" when available
137+
and falling back on `io.DEFAULT_BUFFER_SIZE`.
138+
On most systems, the buffer will typically be 131072 bytes long.
138139
139140
* "Interactive" text files (files for which isatty() returns True)
140141
use line buffering. Other text files use the policy described above
@@ -200,7 +201,7 @@ static PyObject *
200201
_io_open_impl(PyObject *module, PyObject *file, const char *mode,
201202
int buffering, const char *encoding, const char *errors,
202203
const char *newline, int closefd, PyObject *opener)
203-
/*[clinic end generated code: output=aefafc4ce2b46dc0 input=cd034e7cdfbf4e78]*/
204+
/*[clinic end generated code: output=aefafc4ce2b46dc0 input=bac1cd70f431fe9a]*/
204205
{
205206
size_t i;
206207

@@ -368,6 +369,8 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
368369
if (blksize_obj == NULL)
369370
goto error;
370371
buffering = PyLong_AsLong(blksize_obj);
372+
if (buffering < DEFAULT_BUFFER_SIZE)
373+
buffering = DEFAULT_BUFFER_SIZE;
371374
Py_DECREF(blksize_obj);
372375
if (buffering == -1 && PyErr_Occurred())
373376
goto error;

Modules/_io/_iomodule.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ extern Py_ssize_t _PyIO_find_line_ending(
7878
*/
7979
extern int _PyIO_trap_eintr(void);
8080

81-
#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */
81+
#define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */
8282

8383
/*
8484
* Offset type for positioning.

Modules/_io/clinic/_iomodule.c.h

+5-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)