From cd4bccc51bc8eeca5ff4bd8329770b76c09ce03e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 20 Mar 2022 16:58:59 +0900 Subject: [PATCH 01/13] bpo-47000: Add locale.get_encoding() --- Lib/locale.py | 20 ++++++++++---------- Modules/_localemodule.c | 8 ++++---- Modules/clinic/_localemodule.c.h | 16 ++++++++-------- Python/fileutils.c | 7 +++---- Python/initconfig.c | 8 +++++++- 5 files changed, 32 insertions(+), 27 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index a710f27a807b09..d729c3f394bdcb 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -28,7 +28,7 @@ "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", "str", "atof", "atoi", "format", "format_string", "currency", "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", - "LC_NUMERIC", "LC_ALL", "CHAR_MAX"] + "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "get_encoding"] def _strcoll(a,b): """ strcoll(string,string) -> int. @@ -637,19 +637,17 @@ def resetlocale(category=LC_ALL): try: - from _locale import _get_locale_encoding + from _locale import get_encoding except ImportError: - def _get_locale_encoding(): + def get_encoding(): if hasattr(sys, 'getandroidapilevel'): # On Android langinfo.h and CODESET are missing, and UTF-8 is # always used in mbstowcs() and wcstombs(). return 'UTF-8' - if sys.flags.utf8_mode: - return 'UTF-8' encoding = getdefaultlocale()[1] if encoding is None: - # LANG not set, default conservatively to ASCII - encoding = 'ascii' + # LANG not set, default to UTF-8 + encoding = 'UTF-8' return encoding try: @@ -657,7 +655,9 @@ def _get_locale_encoding(): except NameError: def getpreferredencoding(do_setlocale=True): """Return the charset that the user is likely using.""" - return _get_locale_encoding() + if sys.flags.utf8_mode: + return 'UTF-8' + return get_encoding() else: # On Unix, if CODESET is available, use that. def getpreferredencoding(do_setlocale=True): @@ -667,7 +667,7 @@ def getpreferredencoding(do_setlocale=True): return 'UTF-8' if not do_setlocale: - return _get_locale_encoding() + return get_encoding() old_loc = setlocale(LC_CTYPE) try: @@ -675,7 +675,7 @@ def getpreferredencoding(do_setlocale=True): setlocale(LC_CTYPE, "") except Error: pass - return _get_locale_encoding() + return get_encoding() finally: setlocale(LC_CTYPE, old_loc) diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 564f5598edcc66..ff6623f9e3a48a 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -773,14 +773,14 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain, /*[clinic input] -_locale._get_locale_encoding +_locale.get_encoding Get the current locale encoding. [clinic start generated code]*/ static PyObject * -_locale__get_locale_encoding_impl(PyObject *module) -/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/ +_locale_get_encoding_impl(PyObject *module) +/*[clinic end generated code: output=7811932060493a1b input=163753befd041d95]*/ { return _Py_GetLocaleEncodingObject(); } @@ -811,7 +811,7 @@ static struct PyMethodDef PyLocale_Methods[] = { _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif #endif - _LOCALE__GET_LOCALE_ENCODING_METHODDEF + _LOCALE_GET_ENCODING_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h index 703d034c32e801..c556bb58cb0cb3 100644 --- a/Modules/clinic/_localemodule.c.h +++ b/Modules/clinic/_localemodule.c.h @@ -545,22 +545,22 @@ _locale_bind_textdomain_codeset(PyObject *module, PyObject *const *args, Py_ssiz #endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */ -PyDoc_STRVAR(_locale__get_locale_encoding__doc__, -"_get_locale_encoding($module, /)\n" +PyDoc_STRVAR(_locale_get_encoding__doc__, +"get_encoding($module, /)\n" "--\n" "\n" "Get the current locale encoding."); -#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \ - {"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__}, +#define _LOCALE_GET_ENCODING_METHODDEF \ + {"get_encoding", (PyCFunction)_locale_get_encoding, METH_NOARGS, _locale_get_encoding__doc__}, static PyObject * -_locale__get_locale_encoding_impl(PyObject *module); +_locale_get_encoding_impl(PyObject *module); static PyObject * -_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) +_locale_get_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return _locale__get_locale_encoding_impl(module); + return _locale_get_encoding_impl(module); } #ifndef _LOCALE_STRCOLL_METHODDEF @@ -602,4 +602,4 @@ _locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */ -/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=08f5d9744143d9f2 input=a9049054013a1b77]*/ diff --git a/Python/fileutils.c b/Python/fileutils.c index 111d7fa84b1882..ecf22ba1c6798b 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -93,6 +93,9 @@ _Py_device_encoding(int fd) return PyUnicode_FromFormat("cp%u", (unsigned int)cp); #else + if (_PyRuntime.preconfig.utf8_mode) { + return PyUnicode_FromString("utf-8"); //TODO: Use _Py_STR + } return _Py_GetLocaleEncodingObject(); #endif } @@ -890,10 +893,6 @@ _Py_GetLocaleEncoding(void) // and UTF-8 is always used in mbstowcs() and wcstombs(). return _PyMem_RawWcsdup(L"UTF-8"); #else - const PyPreConfig *preconfig = &_PyRuntime.preconfig; - if (preconfig->utf8_mode) { - return _PyMem_RawWcsdup(L"UTF-8"); - } #ifdef MS_WINDOWS wchar_t encoding[23]; diff --git a/Python/initconfig.c b/Python/initconfig.c index 47ebc64c8470a9..d9a9e429b81f51 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -1779,7 +1779,13 @@ static PyStatus config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig, wchar_t **locale_encoding) { - wchar_t *encoding = _Py_GetLocaleEncoding(); + wchar_t *encoding; + if (preconfig->utf8_mode) { + encoding = _PyMem_RawWcsdup(L"UTF-8"); + } + else { + encoding = _Py_GetLocaleEncoding(); + } if (encoding == NULL) { return _PyStatus_NO_MEMORY(); } From aaa89568fd9ea76c5fa5216f9dd1faeca7e5555f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 31 Mar 2022 15:17:57 +0900 Subject: [PATCH 02/13] get_encoding() -> getencoding() --- Lib/locale.py | 6 +++--- Modules/_localemodule.c | 8 ++++---- Modules/clinic/_localemodule.c.h | 16 ++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index d729c3f394bdcb..ceffe8370ac1e2 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -28,7 +28,7 @@ "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", "str", "atof", "atoi", "format", "format_string", "currency", "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", - "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "get_encoding"] + "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"] def _strcoll(a,b): """ strcoll(string,string) -> int. @@ -637,9 +637,9 @@ def resetlocale(category=LC_ALL): try: - from _locale import get_encoding + from _locale import getencoding except ImportError: - def get_encoding(): + def getencoding(): if hasattr(sys, 'getandroidapilevel'): # On Android langinfo.h and CODESET are missing, and UTF-8 is # always used in mbstowcs() and wcstombs(). diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index ff6623f9e3a48a..23c38e14d997d1 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -773,14 +773,14 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain, /*[clinic input] -_locale.get_encoding +_locale.getencoding Get the current locale encoding. [clinic start generated code]*/ static PyObject * -_locale_get_encoding_impl(PyObject *module) -/*[clinic end generated code: output=7811932060493a1b input=163753befd041d95]*/ +_locale_getencoding_impl(PyObject *module) +/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/ { return _Py_GetLocaleEncodingObject(); } @@ -811,7 +811,7 @@ static struct PyMethodDef PyLocale_Methods[] = { _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif #endif - _LOCALE_GET_ENCODING_METHODDEF + _LOCALE_GETENCODING_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h index c556bb58cb0cb3..2958127e430de5 100644 --- a/Modules/clinic/_localemodule.c.h +++ b/Modules/clinic/_localemodule.c.h @@ -545,22 +545,22 @@ _locale_bind_textdomain_codeset(PyObject *module, PyObject *const *args, Py_ssiz #endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */ -PyDoc_STRVAR(_locale_get_encoding__doc__, -"get_encoding($module, /)\n" +PyDoc_STRVAR(_locale_getencoding__doc__, +"getencoding($module, /)\n" "--\n" "\n" "Get the current locale encoding."); -#define _LOCALE_GET_ENCODING_METHODDEF \ - {"get_encoding", (PyCFunction)_locale_get_encoding, METH_NOARGS, _locale_get_encoding__doc__}, +#define _LOCALE_GETENCODING_METHODDEF \ + {"getencoding", (PyCFunction)_locale_getencoding, METH_NOARGS, _locale_getencoding__doc__}, static PyObject * -_locale_get_encoding_impl(PyObject *module); +_locale_getencoding_impl(PyObject *module); static PyObject * -_locale_get_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) +_locale_getencoding(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return _locale_get_encoding_impl(module); + return _locale_getencoding_impl(module); } #ifndef _LOCALE_STRCOLL_METHODDEF @@ -602,4 +602,4 @@ _locale_get_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */ -/*[clinic end generated code: output=08f5d9744143d9f2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ea71e9b94bdaa47d input=a9049054013a1b77]*/ From 104206a17ca897533da83e85634970a79a82f51b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 31 Mar 2022 15:22:09 +0900 Subject: [PATCH 03/13] fix TextIOWrapper.__init__ --- Modules/_io/textio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d9d1c881418742..f0d74b5fe758a2 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1145,7 +1145,14 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, } } if (encoding == NULL && self->encoding == NULL) { - self->encoding = _Py_GetLocaleEncodingObject(); + const PyPreConfig *preconfig = &_PyRuntime.preconfig; + if (preconfig->utf8_mode) { + // TODO: Use _Py_STR + self->encoding = PyUnicode_FromString("utf-8"); + } + else { + self->encoding = _Py_GetLocaleEncodingObject(); + } if (self->encoding == NULL) { goto error; } From 8b50eb10de0805d0eda6a3e9aeb8865d32c6f7b7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 31 Mar 2022 15:31:43 +0900 Subject: [PATCH 04/13] get_encoding -> getencoding --- Lib/locale.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index ceffe8370ac1e2..bc11164015245e 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -657,7 +657,7 @@ def getpreferredencoding(do_setlocale=True): """Return the charset that the user is likely using.""" if sys.flags.utf8_mode: return 'UTF-8' - return get_encoding() + return getencoding() else: # On Unix, if CODESET is available, use that. def getpreferredencoding(do_setlocale=True): @@ -667,7 +667,7 @@ def getpreferredencoding(do_setlocale=True): return 'UTF-8' if not do_setlocale: - return get_encoding() + return getencoding() old_loc = setlocale(LC_CTYPE) try: @@ -675,7 +675,7 @@ def getpreferredencoding(do_setlocale=True): setlocale(LC_CTYPE, "") except Error: pass - return get_encoding() + return getencoding() finally: setlocale(LC_CTYPE, old_loc) From a0204c25f4cc6bd91beba56cba0395f6d4906ac6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 4 Apr 2022 12:23:32 +0900 Subject: [PATCH 05/13] Use _Py_STR for "utf-8" --- Modules/_io/textio.c | 8 ++++---- Python/fileutils.c | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index f0d74b5fe758a2..ea0f9b3d45a374 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1145,10 +1145,10 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, } } if (encoding == NULL && self->encoding == NULL) { - const PyPreConfig *preconfig = &_PyRuntime.preconfig; - if (preconfig->utf8_mode) { - // TODO: Use _Py_STR - self->encoding = PyUnicode_FromString("utf-8"); + if (_PyRuntime.preconfig.utf8_mode) { + _Py_DECLARE_STR(utf_8, "utf-8"); + self->encoding = &_Py_STR(utf_8); + Py_INCREF(self->encoding); } else { self->encoding = _Py_GetLocaleEncodingObject(); diff --git a/Python/fileutils.c b/Python/fileutils.c index ecf22ba1c6798b..f880375b0d3aec 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -94,7 +94,10 @@ _Py_device_encoding(int fd) return PyUnicode_FromFormat("cp%u", (unsigned int)cp); #else if (_PyRuntime.preconfig.utf8_mode) { - return PyUnicode_FromString("utf-8"); //TODO: Use _Py_STR + _Py_DECLARE_STR(utf_8, "utf-8"); + PyObject *encoding = &_Py_STR(utf_8); + Py_INCREF(encoding); + return encoding; } return _Py_GetLocaleEncodingObject(); #endif From 08413f1208c5cdfcfd10e6ddc2d4c5bf9dad914d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 6 Apr 2022 11:54:23 +0900 Subject: [PATCH 06/13] Update the doc. --- Doc/library/locale.rst | 15 ++++++++++++++- Doc/whatsnew/3.11.rst | 6 ++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 01e14a151d299c..5783c5c78be5f2 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -334,10 +334,23 @@ The :mod:`locale` module defines the following exception and functions: locale. See also the :term:`filesystem encoding and error handler`. .. versionchanged:: 3.7 - The function now always returns ``UTF-8`` on Android or if the + The function now always returns ``"UTF-8"`` on Android or if the :ref:`Python UTF-8 Mode ` is enabled. +.. function:: getencoding() + + Returns the :term:`locale encoding`. + + On Android, it always returns ``"UTF-8"``, the :term:`locale encoding` is + ignored. + + This function is same to ``getpreferredencoding(False)`` except this + function ignore the :ref:`UTF-8 Mode `. + + .. versionadded:: 3.11 + + .. function:: normalize(localename) Returns a normalized locale code for the given locale name. The returned locale diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 809cbd556c8c7f..d91b83ee494bc8 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -274,6 +274,12 @@ inspect * Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a :class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.) +locale +------ + +* Add :func:`locale.getencoding` that is same to + ``locale.getpreferredencoding(False)`` but ignores :ref:`UTF-8 Mode `. + math ---- From f61b62269e6ae38886c1ca6c4a5bd4aa6f3c436b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 6 Apr 2022 11:54:56 +0900 Subject: [PATCH 07/13] Add news --- .../NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst diff --git a/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst new file mode 100644 index 00000000000000..c1d655068da74d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst @@ -0,0 +1 @@ +Add :func:`locale.getencoding`. From 1c60e5b6bfae00a47fd1eb63581a49aacb7a7953 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 6 Apr 2022 17:16:17 +0900 Subject: [PATCH 08/13] Apply proposed changes. --- Doc/glossary.rst | 5 +++-- Doc/library/locale.rst | 9 ++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 258a06f7217035..233a129faaefba 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -710,8 +710,9 @@ Glossary On Windows, it is the ANSI code page (ex: ``cp1252``). - ``locale.getpreferredencoding(False)`` can be used to get the locale - encoding. + On Android and VxWorks, return ``"UTF-8"``. + + ``locale.getencoding()`` can be used to get the locale encoding. Python uses the :term:`filesystem encoding and error handler` to convert between Unicode filenames and bytes filenames. diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 5783c5c78be5f2..08e8b6266d0e1e 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -340,10 +340,13 @@ The :mod:`locale` module defines the following exception and functions: .. function:: getencoding() - Returns the :term:`locale encoding`. + Get the current :term:`locale encoding`: - On Android, it always returns ``"UTF-8"``, the :term:`locale encoding` is - ignored. + * On Android and VxWorks, return ``"UTF-8"``. + * On Unix, return the encoding of the current :data:`LC_CTYPE` locale. + Return ``"UTF-8"`` if :ref:`nl_langinfo(CODESET) ` + returns an empty string: for example, if the current LC_CTYPE locale is not supported. + * On Windows, return the ANSI code page. This function is same to ``getpreferredencoding(False)`` except this function ignore the :ref:`UTF-8 Mode `. From 7397493344e6ebd16a655e4319550a3df78b7acb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 6 Apr 2022 17:35:26 +0900 Subject: [PATCH 09/13] Fix doc error. --- Doc/library/locale.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 08e8b6266d0e1e..acd772ace61ce3 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -344,8 +344,8 @@ The :mod:`locale` module defines the following exception and functions: * On Android and VxWorks, return ``"UTF-8"``. * On Unix, return the encoding of the current :data:`LC_CTYPE` locale. - Return ``"UTF-8"`` if :ref:`nl_langinfo(CODESET) ` - returns an empty string: for example, if the current LC_CTYPE locale is not supported. + Return ``"UTF-8"`` if ``nl_langinfo(CODESET)`` returns an empty string: + for example, if the current LC_CTYPE locale is not supported. * On Windows, return the ANSI code page. This function is same to ``getpreferredencoding(False)`` except this From 44730dd227b4381f65653c3783b6dfb59871da3a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 7 Apr 2022 12:39:30 +0900 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Victor Stinner --- Doc/glossary.rst | 4 ++-- Doc/library/locale.rst | 4 ++-- Doc/whatsnew/3.11.rst | 2 +- Modules/_io/textio.c | 3 +-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 233a129faaefba..54f32810a6e6f9 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -708,9 +708,9 @@ Glossary On Unix, it is the encoding of the LC_CTYPE locale. It can be set with ``locale.setlocale(locale.LC_CTYPE, new_locale)``. - On Windows, it is the ANSI code page (ex: ``cp1252``). + On Windows, it is the ANSI code page (ex: ``"cp1252"``). - On Android and VxWorks, return ``"UTF-8"``. + On Android and VxWorks, Python uses ``"UTF-8"`` as the locale encoding. ``locale.getencoding()`` can be used to get the locale encoding. diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index acd772ace61ce3..efe72fa38c4ccd 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -348,8 +348,8 @@ The :mod:`locale` module defines the following exception and functions: for example, if the current LC_CTYPE locale is not supported. * On Windows, return the ANSI code page. - This function is same to ``getpreferredencoding(False)`` except this - function ignore the :ref:`UTF-8 Mode `. + This function is similar to :func:`getpreferredencoding(False) ` except this + function ignores the :ref:`Python UTF-8 Mode `. .. versionadded:: 3.11 diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index d91b83ee494bc8..29f8196f52caa2 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -277,7 +277,7 @@ inspect locale ------ -* Add :func:`locale.getencoding` that is same to +* Add :func:`locale.getencoding` to get the current locale encoding. It is similar to ``locale.getpreferredencoding(False)`` but ignores :ref:`UTF-8 Mode `. math diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index ea0f9b3d45a374..0e207413257f49 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1147,8 +1147,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, if (encoding == NULL && self->encoding == NULL) { if (_PyRuntime.preconfig.utf8_mode) { _Py_DECLARE_STR(utf_8, "utf-8"); - self->encoding = &_Py_STR(utf_8); - Py_INCREF(self->encoding); + self->encoding = Py_NewRef(&_Py_STR(utf_8)); } else { self->encoding = _Py_GetLocaleEncodingObject(); From 7720b109f491879dea4a93cc3955e4ae6895a918 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 7 Apr 2022 13:16:21 +0900 Subject: [PATCH 11/13] UTF-8 -> utf-8 --- Doc/glossary.rst | 4 ++-- Doc/library/locale.rst | 13 +++++++++---- Lib/locale.py | 8 ++++---- Python/fileutils.c | 14 ++++++-------- Python/initconfig.c | 2 +- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 54f32810a6e6f9..479071436eb3e7 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -706,11 +706,11 @@ Glossary locale encoding On Unix, it is the encoding of the LC_CTYPE locale. It can be set with - ``locale.setlocale(locale.LC_CTYPE, new_locale)``. + :func:`locale.setlocale(locale.LC_CTYPE, new_locale) `. On Windows, it is the ANSI code page (ex: ``"cp1252"``). - On Android and VxWorks, Python uses ``"UTF-8"`` as the locale encoding. + On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding. ``locale.getencoding()`` can be used to get the locale encoding. diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index efe72fa38c4ccd..f0a86651f9a93f 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -327,7 +327,7 @@ The :mod:`locale` module defines the following exception and functions: is not necessary or desired, *do_setlocale* should be set to ``False``. On Android or if the :ref:`Python UTF-8 Mode ` is enabled, always - return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale* + return ``'utf-8'``, the :term:`locale encoding` and the *do_setlocale* argument are ignored. The :ref:`Python preinitialization ` configures the LC_CTYPE @@ -337,18 +337,23 @@ The :mod:`locale` module defines the following exception and functions: The function now always returns ``"UTF-8"`` on Android or if the :ref:`Python UTF-8 Mode ` is enabled. + .. versionchanged:: 3.11 + The function now returns ``"utf-8"`` instead of ``"UTF-8"`` on Android + or if the :ref:`Python UTF-8 Mode ` is enabled. + .. function:: getencoding() Get the current :term:`locale encoding`: - * On Android and VxWorks, return ``"UTF-8"``. + * On Android and VxWorks, return ``"utf-8"``. * On Unix, return the encoding of the current :data:`LC_CTYPE` locale. - Return ``"UTF-8"`` if ``nl_langinfo(CODESET)`` returns an empty string: + Return ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string: for example, if the current LC_CTYPE locale is not supported. * On Windows, return the ANSI code page. - This function is similar to :func:`getpreferredencoding(False) ` except this + This function is similar to + :func:`getpreferredencoding(False) ` except this function ignores the :ref:`Python UTF-8 Mode `. .. versionadded:: 3.11 diff --git a/Lib/locale.py b/Lib/locale.py index bc11164015245e..496cc803c88f7c 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -643,11 +643,11 @@ def getencoding(): if hasattr(sys, 'getandroidapilevel'): # On Android langinfo.h and CODESET are missing, and UTF-8 is # always used in mbstowcs() and wcstombs(). - return 'UTF-8' + return 'utf-8' encoding = getdefaultlocale()[1] if encoding is None: # LANG not set, default to UTF-8 - encoding = 'UTF-8' + encoding = 'utf-8' return encoding try: @@ -656,7 +656,7 @@ def getencoding(): def getpreferredencoding(do_setlocale=True): """Return the charset that the user is likely using.""" if sys.flags.utf8_mode: - return 'UTF-8' + return 'utf-8' return getencoding() else: # On Unix, if CODESET is available, use that. @@ -664,7 +664,7 @@ def getpreferredencoding(do_setlocale=True): """Return the charset that the user is likely using, according to the system configuration.""" if sys.flags.utf8_mode: - return 'UTF-8' + return 'utf-8' if not do_setlocale: return getencoding() diff --git a/Python/fileutils.c b/Python/fileutils.c index f880375b0d3aec..f1b253446bd178 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -95,9 +95,7 @@ _Py_device_encoding(int fd) #else if (_PyRuntime.preconfig.utf8_mode) { _Py_DECLARE_STR(utf_8, "utf-8"); - PyObject *encoding = &_Py_STR(utf_8); - Py_INCREF(encoding); - return encoding; + return Py_NewRef(&_Py_STR(utf_8)); } return _Py_GetLocaleEncodingObject(); #endif @@ -879,10 +877,10 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str, // Get the current locale encoding name: // -// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android) -// - Return "UTF-8" if the UTF-8 Mode is enabled +// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android) +// - Return "utf-8" if the UTF-8 Mode is enabled // - On Windows, return the ANSI code page (ex: "cp1250") -// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string. +// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string. // - Otherwise, return nl_langinfo(CODESET). // // Return NULL on memory allocation failure. @@ -894,7 +892,7 @@ _Py_GetLocaleEncoding(void) #ifdef _Py_FORCE_UTF8_LOCALE // On Android langinfo.h and CODESET are missing, // and UTF-8 is always used in mbstowcs() and wcstombs(). - return _PyMem_RawWcsdup(L"UTF-8"); + return _PyMem_RawWcsdup(L"utf-8"); #else #ifdef MS_WINDOWS @@ -908,7 +906,7 @@ _Py_GetLocaleEncoding(void) if (!encoding || encoding[0] == '\0') { // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on // macOS if the LC_CTYPE locale is not supported. - return _PyMem_RawWcsdup(L"UTF-8"); + return _PyMem_RawWcsdup(L"utf-8"); } wchar_t *wstr; diff --git a/Python/initconfig.c b/Python/initconfig.c index d9a9e429b81f51..d2e74f5878a513 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -1781,7 +1781,7 @@ config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig, { wchar_t *encoding; if (preconfig->utf8_mode) { - encoding = _PyMem_RawWcsdup(L"UTF-8"); + encoding = _PyMem_RawWcsdup(L"utf-8"); } else { encoding = _Py_GetLocaleEncoding(); From 104008dfdb1b0807439db93522221f4a04b91a8b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 7 Apr 2022 13:43:22 +0900 Subject: [PATCH 12/13] Fix test_utf8_mode --- Lib/test/test_utf8_mode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 308e8e8aea6c22..ec29ba6d51b127 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -203,12 +203,12 @@ def test_pyio_encoding(self): def test_locale_getpreferredencoding(self): code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))' out = self.get_output('-X', 'utf8', '-c', code) - self.assertEqual(out, 'UTF-8 UTF-8') + self.assertEqual(out, 'utf-8 utf-8') for loc in POSIX_LOCALES: with self.subTest(LC_ALL=loc): out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc) - self.assertEqual(out, 'UTF-8 UTF-8') + self.assertEqual(out, 'utf-8 utf-8') @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') def test_cmd_line(self): @@ -276,7 +276,7 @@ def test_device_encoding(self): # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY with open(filename, encoding="utf8") as fp: out = fp.read().rstrip() - self.assertEqual(out, 'True UTF-8') + self.assertEqual(out, 'True utf-8') if __name__ == "__main__": From a8e50bd0112e79768c8360256bfc7b1ca965c840 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 8 Apr 2022 16:20:37 +0900 Subject: [PATCH 13/13] Update based on suggestions --- Doc/glossary.rst | 3 +-- Doc/library/locale.rst | 9 ++++----- Doc/whatsnew/3.11.rst | 3 ++- .../Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst | 4 +++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 479071436eb3e7..75db433abd6007 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -714,8 +714,7 @@ Glossary ``locale.getencoding()`` can be used to get the locale encoding. - Python uses the :term:`filesystem encoding and error handler` to convert - between Unicode filenames and bytes filenames. + See also the :term:`filesystem encoding and error handler`. list A built-in Python :term:`sequence`. Despite its name it is more akin diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index f0a86651f9a93f..77a3e036841baa 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -334,13 +334,9 @@ The :mod:`locale` module defines the following exception and functions: locale. See also the :term:`filesystem encoding and error handler`. .. versionchanged:: 3.7 - The function now always returns ``"UTF-8"`` on Android or if the + The function now always returns ``"utf-8"`` on Android or if the :ref:`Python UTF-8 Mode ` is enabled. - .. versionchanged:: 3.11 - The function now returns ``"utf-8"`` instead of ``"UTF-8"`` on Android - or if the :ref:`Python UTF-8 Mode ` is enabled. - .. function:: getencoding() @@ -352,6 +348,9 @@ The :mod:`locale` module defines the following exception and functions: for example, if the current LC_CTYPE locale is not supported. * On Windows, return the ANSI code page. + The :ref:`Python preinitialization ` configures the LC_CTYPE + locale. See also the :term:`filesystem encoding and error handler`. + This function is similar to :func:`getpreferredencoding(False) ` except this function ignores the :ref:`Python UTF-8 Mode `. diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 29f8196f52caa2..9256642c981a59 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -278,7 +278,8 @@ locale ------ * Add :func:`locale.getencoding` to get the current locale encoding. It is similar to - ``locale.getpreferredencoding(False)`` but ignores :ref:`UTF-8 Mode `. + ``locale.getpreferredencoding(False)`` but ignores the + :ref:`Python UTF-8 Mode `. math ---- diff --git a/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst index c1d655068da74d..0dd3d416c51e59 100644 --- a/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst +++ b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst @@ -1 +1,3 @@ -Add :func:`locale.getencoding`. +Add :func:`locale.getencoding` to get the current locale encoding. +It is similar to ``locale.getpreferredencoding(False)`` but ignores the +:ref:`Python UTF-8 Mode `.