Skip to content

Commit a3c014b

Browse files
committed
Raise a warning when encoding is omitted
The warning is raised only in dev mode. pathlib uses the new `io.text_encoding` helper function. Other libraries will follow.
1 parent 23a567c commit a3c014b

File tree

6 files changed

+142
-11
lines changed

6 files changed

+142
-11
lines changed

Lib/_pyio.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,28 @@
4040
_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
4141

4242

43+
def text_encoding(encoding, stacklevel=1):
44+
"""
45+
Helper function to choose the text encoding.
46+
47+
When encoding is not None, just return it.
48+
Otherwise, return the default text encoding ("locale" for now)
49+
and raise a DeprecationWarning in dev mode.
50+
51+
This function can be used in APIs having encoding=None option.
52+
But please consider encoding="utf-8" for new APIs.
53+
"""
54+
if encoding is None:
55+
if sys.flags.dev_mode:
56+
import warnings
57+
warnings.warn(
58+
"'encoding' option is not specified. The default encoding "
59+
"will be changed to 'utf-8' in the future",
60+
DeprecationWarning, stacklevel + 2)
61+
encoding = "locale"
62+
return encoding
63+
64+
4365
def open(file, mode="r", buffering=-1, encoding=None, errors=None,
4466
newline=None, closefd=True, opener=None):
4567

@@ -248,6 +270,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
248270
result = buffer
249271
if binary:
250272
return result
273+
encoding = text_encoding(encoding)
251274
text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
252275
result = text
253276
text.mode = mode
@@ -2004,19 +2027,22 @@ class TextIOWrapper(TextIOBase):
20042027
def __init__(self, buffer, encoding=None, errors=None, newline=None,
20052028
line_buffering=False, write_through=False):
20062029
self._check_newline(newline)
2007-
if encoding is None:
2030+
encoding = text_encoding(encoding)
2031+
2032+
if encoding == "locale":
20082033
try:
2009-
encoding = os.device_encoding(buffer.fileno())
2034+
encoding = os.device_encoding(buffer.fileno()) or "locale"
20102035
except (AttributeError, UnsupportedOperation):
20112036
pass
2012-
if encoding is None:
2013-
try:
2014-
import locale
2015-
except ImportError:
2016-
# Importing locale may fail if Python is being built
2017-
encoding = "ascii"
2018-
else:
2019-
encoding = locale.getpreferredencoding(False)
2037+
2038+
if encoding == "locale":
2039+
try:
2040+
import locale
2041+
except ImportError:
2042+
# Importing locale may fail if Python is being built
2043+
encoding = "utf-8"
2044+
else:
2045+
encoding = locale.getpreferredencoding(False)
20202046

20212047
if not isinstance(encoding, str):
20222048
raise ValueError("invalid encoding: %r" % encoding)

Lib/io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation,
5555
open, open_code, FileIO, BytesIO, StringIO, BufferedReader,
5656
BufferedWriter, BufferedRWPair, BufferedRandom,
57-
IncrementalNewlineDecoder, TextIOWrapper)
57+
IncrementalNewlineDecoder, text_encoding, TextIOWrapper)
5858

5959
OpenWrapper = _io.open # for compatibility with _pyio
6060

Lib/pathlib.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,8 @@ def open(self, mode='r', buffering=-1, encoding=None,
12411241
Open the file pointed by this path and return a file object, as
12421242
the built-in open() function does.
12431243
"""
1244+
if "b" not in mode:
1245+
encoding = io.text_encoding(encoding)
12441246
return io.open(self, mode, buffering, encoding, errors, newline,
12451247
opener=self._opener)
12461248

@@ -1255,6 +1257,7 @@ def read_text(self, encoding=None, errors=None):
12551257
"""
12561258
Open the file in text mode, read it, and close the file.
12571259
"""
1260+
encoding = io.text_encoding(encoding)
12581261
with self.open(mode='r', encoding=encoding, errors=errors) as f:
12591262
return f.read()
12601263

@@ -1274,6 +1277,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
12741277
if not isinstance(data, str):
12751278
raise TypeError('data must be str, not %s' %
12761279
data.__class__.__name__)
1280+
encoding = io.text_encoding(encoding)
12771281
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
12781282
return f.write(data)
12791283

Modules/_io/_iomodule.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ PyObject *_PyIO_str_fileno = NULL;
3333
PyObject *_PyIO_str_flush = NULL;
3434
PyObject *_PyIO_str_getstate = NULL;
3535
PyObject *_PyIO_str_isatty = NULL;
36+
PyObject *_PyIO_str_locale = NULL;
3637
PyObject *_PyIO_str_newlines = NULL;
3738
PyObject *_PyIO_str_nl = NULL;
3839
PyObject *_PyIO_str_peek = NULL;
@@ -504,6 +505,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
504505
return NULL;
505506
}
506507

508+
509+
/*[clinic input]
510+
_io.text_encoding
511+
encoding: object = NULL
512+
stacklevel: int = 1
513+
/
514+
515+
Helper function to choose the text encoding.
516+
517+
When encoding is not None, just return it.
518+
Otherwise, return the default text encoding ("locale" for now)
519+
and raise a DeprecationWarning in dev mode.
520+
521+
This function can be used in APIs having encoding=None option.
522+
But please consider encoding="utf-8" for new APIs.
523+
[clinic start generated code]*/
524+
525+
static PyObject *
526+
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
527+
/*[clinic end generated code: output=91b2cfea6934cc0c input=46b896c6a7111a95]*/
528+
{
529+
if (encoding == NULL || encoding == Py_None) {
530+
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
531+
if (interp->config.dev_mode) {
532+
PyErr_WarnEx(PyExc_DeprecationWarning,
533+
"'encoding' option is not specified. The default encoding "
534+
"will be changed to 'utf-8' in the future",
535+
stacklevel + 1);
536+
}
537+
Py_INCREF(_PyIO_str_locale);
538+
return _PyIO_str_locale;
539+
}
540+
Py_INCREF(encoding);
541+
return encoding;
542+
}
543+
544+
507545
/*[clinic input]
508546
_io.open_code
509547
@@ -629,6 +667,7 @@ iomodule_free(PyObject *mod) {
629667

630668
static PyMethodDef module_methods[] = {
631669
_IO_OPEN_METHODDEF
670+
_IO_TEXT_ENCODING_METHODDEF
632671
_IO_OPEN_CODE_METHODDEF
633672
{NULL, NULL}
634673
};
@@ -747,6 +786,7 @@ PyInit__io(void)
747786
ADD_INTERNED(flush)
748787
ADD_INTERNED(getstate)
749788
ADD_INTERNED(isatty)
789+
ADD_INTERNED(locale)
750790
ADD_INTERNED(newlines)
751791
ADD_INTERNED(peek)
752792
ADD_INTERNED(read)

Modules/_io/clinic/_iomodule.c.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,55 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw
272272
return return_value;
273273
}
274274

275+
PyDoc_STRVAR(_io_text_encoding__doc__,
276+
"text_encoding($module, encoding=<unrepresentable>, stacklevel=1, /)\n"
277+
"--\n"
278+
"\n"
279+
"Select text encoding for TextIOWrapper.\n"
280+
"\n"
281+
"Returns the default text encoding for TextIOWrapper when encoding is None.\n"
282+
"The default text encoding is \"locale\" for now, but it will be changed\n"
283+
"to \"utf-8\" in the future.");
284+
285+
#define _IO_TEXT_ENCODING_METHODDEF \
286+
{"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__},
287+
288+
static PyObject *
289+
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel);
290+
291+
static PyObject *
292+
_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
293+
{
294+
PyObject *return_value = NULL;
295+
PyObject *encoding = NULL;
296+
int stacklevel = 1;
297+
298+
if (!_PyArg_CheckPositional("text_encoding", nargs, 0, 2)) {
299+
goto exit;
300+
}
301+
if (nargs < 1) {
302+
goto skip_optional;
303+
}
304+
encoding = args[0];
305+
if (nargs < 2) {
306+
goto skip_optional;
307+
}
308+
if (PyFloat_Check(args[1])) {
309+
PyErr_SetString(PyExc_TypeError,
310+
"integer argument expected, got float" );
311+
goto exit;
312+
}
313+
stacklevel = _PyLong_AsInt(args[1]);
314+
if (stacklevel == -1 && PyErr_Occurred()) {
315+
goto exit;
316+
}
317+
skip_optional:
318+
return_value = _io_text_encoding_impl(module, encoding, stacklevel);
319+
320+
exit:
321+
return return_value;
322+
}
323+
275324
PyDoc_STRVAR(_io_open_code__doc__,
276325
"open_code($module, /, path)\n"
277326
"--\n"

Modules/_io/textio.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,18 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
11231123
self->encodefunc = NULL;
11241124
self->b2cratio = 0.0;
11251125

1126+
if (encoding == NULL) {
1127+
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
1128+
if (interp->config.dev_mode) {
1129+
PyErr_WarnEx(PyExc_DeprecationWarning,
1130+
"'encoding' option is not specified. The default encoding "
1131+
"will be changed to 'utf-8' in the future", 1);
1132+
}
1133+
}
1134+
else if (strcmp(encoding, "locale") == 0) {
1135+
encoding = NULL;
1136+
}
1137+
11261138
if (encoding == NULL) {
11271139
/* Try os.device_encoding(fileno) */
11281140
PyObject *fileno;

0 commit comments

Comments
 (0)