Skip to content

Commit 710e826

Browse files
authored
bpo-42208: Add _Py_GetLocaleEncoding() (GH-23050)
_io.TextIOWrapper no longer calls getpreferredencoding(False) of _bootlocale to get the locale encoding, but calls _Py_GetLocaleEncoding() instead. Add config_get_fs_encoding() sub-function. Reorganize also config_get_locale_encoding() code.
1 parent 06f8c33 commit 710e826

File tree

6 files changed

+112
-110
lines changed

6 files changed

+112
-110
lines changed

Include/internal/pycore_fileutils.h

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
5050

5151
PyAPI_FUNC(void) _Py_closerange(int first, int last);
5252

53+
PyAPI_FUNC(PyObject*) _Py_GetLocaleEncoding(void);
54+
5355
#ifdef __cplusplus
5456
}
5557
#endif

Modules/_io/_iomodule.c

-25
Original file line numberDiff line numberDiff line change
@@ -593,31 +593,6 @@ _PyIO_get_module_state(void)
593593
return state;
594594
}
595595

596-
PyObject *
597-
_PyIO_get_locale_module(_PyIO_State *state)
598-
{
599-
PyObject *mod;
600-
if (state->locale_module != NULL) {
601-
assert(PyWeakref_CheckRef(state->locale_module));
602-
mod = PyWeakref_GET_OBJECT(state->locale_module);
603-
if (mod != Py_None) {
604-
Py_INCREF(mod);
605-
return mod;
606-
}
607-
Py_CLEAR(state->locale_module);
608-
}
609-
mod = PyImport_ImportModule("_bootlocale");
610-
if (mod == NULL)
611-
return NULL;
612-
state->locale_module = PyWeakref_NewRef(mod, NULL);
613-
if (state->locale_module == NULL) {
614-
Py_DECREF(mod);
615-
return NULL;
616-
}
617-
return mod;
618-
}
619-
620-
621596
static int
622597
iomodule_traverse(PyObject *mod, visitproc visit, void *arg) {
623598
_PyIO_State *state = get_io_state(mod);

Modules/_io/_iomodule.h

-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ typedef struct {
150150
#define IO_STATE() _PyIO_get_module_state()
151151

152152
extern _PyIO_State *_PyIO_get_module_state(void);
153-
extern PyObject *_PyIO_get_locale_module(_PyIO_State *);
154153

155154
#ifdef MS_WINDOWS
156155
extern char _PyIO_get_console_type(PyObject *);

Modules/_io/textio.c

+4-22
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "Python.h"
1111
#include "pycore_interp.h" // PyInterpreterState.fs_codec
1212
#include "pycore_long.h" // _PyLong_GetZero()
13+
#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
1314
#include "pycore_object.h"
1415
#include "pycore_pystate.h" // _PyInterpreterState_GET()
1516
#include "structmember.h" // PyMemberDef
@@ -27,7 +28,6 @@ _Py_IDENTIFIER(_dealloc_warn);
2728
_Py_IDENTIFIER(decode);
2829
_Py_IDENTIFIER(fileno);
2930
_Py_IDENTIFIER(flush);
30-
_Py_IDENTIFIER(getpreferredencoding);
3131
_Py_IDENTIFIER(isatty);
3232
_Py_IDENTIFIER(mode);
3333
_Py_IDENTIFIER(name);
@@ -1155,29 +1155,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
11551155
}
11561156
}
11571157
if (encoding == NULL && self->encoding == NULL) {
1158-
PyObject *locale_module = _PyIO_get_locale_module(state);
1159-
if (locale_module == NULL)
1160-
goto catch_ImportError;
1161-
self->encoding = _PyObject_CallMethodIdOneArg(
1162-
locale_module, &PyId_getpreferredencoding, Py_False);
1163-
Py_DECREF(locale_module);
1158+
self->encoding = _Py_GetLocaleEncoding();
11641159
if (self->encoding == NULL) {
1165-
catch_ImportError:
1166-
/*
1167-
Importing locale can raise an ImportError because of
1168-
_functools, and locale.getpreferredencoding can raise an
1169-
ImportError if _locale is not available. These will happen
1170-
during module building.
1171-
*/
1172-
if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1173-
PyErr_Clear();
1174-
self->encoding = PyUnicode_FromString("ascii");
1175-
}
1176-
else
1177-
goto error;
1160+
goto error;
11781161
}
1179-
else if (!PyUnicode_Check(self->encoding))
1180-
Py_CLEAR(self->encoding);
1162+
assert(PyUnicode_Check(self->encoding));
11811163
}
11821164
if (self->encoding != NULL) {
11831165
encoding = PyUnicode_AsUTF8(self->encoding);

Python/fileutils.c

+42-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "Python.h"
2-
#include "pycore_fileutils.h"
2+
#include "pycore_fileutils.h" // fileutils definitions
3+
#include "pycore_runtime.h" // _PyRuntime
34
#include "osdefs.h" // SEP
45
#include <locale.h>
56

@@ -820,6 +821,46 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
820821
}
821822

822823

824+
// Get the current locale encoding: locale.getpreferredencoding(False).
825+
// See also config_get_locale_encoding()
826+
PyObject *
827+
_Py_GetLocaleEncoding(void)
828+
{
829+
#ifdef _Py_FORCE_UTF8_LOCALE
830+
// On Android langinfo.h and CODESET are missing,
831+
// and UTF-8 is always used in mbstowcs() and wcstombs().
832+
return PyUnicode_FromString("UTF-8");
833+
#else
834+
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
835+
if (preconfig->utf8_mode) {
836+
return PyUnicode_FromString("UTF-8");
837+
}
838+
839+
#if defined(MS_WINDOWS)
840+
return PyUnicode_FromFormat("cp%u", GetACP());
841+
#else
842+
const char *encoding = nl_langinfo(CODESET);
843+
if (!encoding || encoding[0] == '\0') {
844+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
845+
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
846+
// not supported. Default to UTF-8 in that case, because UTF-8 is the
847+
// default charset on macOS.
848+
encoding = "UTF-8";
849+
#else
850+
PyErr_SetString(PyExc_ValueError,
851+
"failed to get the locale encoding: "
852+
"nl_langinfo(CODESET) returns an empty string");
853+
return NULL;
854+
#endif
855+
}
856+
// Decode from UTF-8
857+
return PyUnicode_FromString(encoding);
858+
#endif // !CODESET
859+
860+
#endif
861+
}
862+
863+
823864
#ifdef MS_WINDOWS
824865
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
825866

Python/initconfig.c

+64-61
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str,
766766
configured. */
767767
PyStatus
768768
PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str,
769-
const char *str)
769+
const char *str)
770770
{
771771
return CONFIG_SET_BYTES_STR(config, config_str, str, "string");
772772
}
@@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config)
14661466

14671467

14681468
static const wchar_t *
1469-
config_get_stdio_errors(void)
1469+
config_get_stdio_errors(const PyPreConfig *preconfig)
14701470
{
1471+
if (preconfig->utf8_mode) {
1472+
/* UTF-8 Mode uses UTF-8/surrogateescape */
1473+
return L"surrogateescape";
1474+
}
1475+
14711476
#ifndef MS_WINDOWS
14721477
const char *loc = setlocale(LC_CTYPE, NULL);
14731478
if (loc != NULL) {
@@ -1492,26 +1497,41 @@ config_get_stdio_errors(void)
14921497
}
14931498

14941499

1500+
// See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
14951501
static PyStatus
1496-
config_get_locale_encoding(PyConfig *config, wchar_t **locale_encoding)
1502+
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
1503+
wchar_t **locale_encoding)
14971504
{
1505+
#ifdef _Py_FORCE_UTF8_LOCALE
1506+
return PyConfig_SetString(config, locale_encoding, L"utf-8");
1507+
#else
1508+
if (preconfig->utf8_mode) {
1509+
return PyConfig_SetString(config, locale_encoding, L"utf-8");
1510+
}
1511+
14981512
#ifdef MS_WINDOWS
14991513
char encoding[20];
15001514
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
15011515
return PyConfig_SetBytesString(config, locale_encoding, encoding);
1502-
#elif defined(_Py_FORCE_UTF8_LOCALE)
1503-
return PyConfig_SetString(config, locale_encoding, L"utf-8");
15041516
#else
15051517
const char *encoding = nl_langinfo(CODESET);
15061518
if (!encoding || encoding[0] == '\0') {
1519+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
1520+
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
1521+
// not supported. Default to UTF-8 in that case, because UTF-8 is the
1522+
// default charset on macOS.
1523+
encoding = "UTF-8";
1524+
#else
15071525
return _PyStatus_ERR("failed to get the locale encoding: "
1508-
"nl_langinfo(CODESET) failed");
1526+
"nl_langinfo(CODESET) returns an empty string");
1527+
#endif
15091528
}
15101529
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
15111530
return CONFIG_SET_BYTES_STR(config,
15121531
locale_encoding, encoding,
15131532
"nl_langinfo(CODESET)");
1514-
#endif
1533+
#endif // !MS_WINDOWS
1534+
#endif // !_Py_FORCE_UTF8_LOCALE
15151535
}
15161536

15171537

@@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config,
15961616
PyMem_RawFree(pythonioencoding);
15971617
}
15981618

1599-
/* UTF-8 Mode uses UTF-8/surrogateescape */
1600-
if (preconfig->utf8_mode) {
1601-
if (config->stdio_encoding == NULL) {
1602-
status = PyConfig_SetString(config, &config->stdio_encoding,
1603-
L"utf-8");
1604-
if (_PyStatus_EXCEPTION(status)) {
1605-
return status;
1606-
}
1607-
}
1608-
if (config->stdio_errors == NULL) {
1609-
status = PyConfig_SetString(config, &config->stdio_errors,
1610-
L"surrogateescape");
1611-
if (_PyStatus_EXCEPTION(status)) {
1612-
return status;
1613-
}
1614-
}
1615-
}
1616-
16171619
/* Choose the default error handler based on the current locale. */
16181620
if (config->stdio_encoding == NULL) {
1619-
status = config_get_locale_encoding(config, &config->stdio_encoding);
1621+
status = config_get_locale_encoding(config, preconfig,
1622+
&config->stdio_encoding);
16201623
if (_PyStatus_EXCEPTION(status)) {
16211624
return status;
16221625
}
16231626
}
16241627
if (config->stdio_errors == NULL) {
1625-
const wchar_t *errors = config_get_stdio_errors();
1628+
const wchar_t *errors = config_get_stdio_errors(preconfig);
16261629
assert(errors != NULL);
16271630

16281631
status = PyConfig_SetString(config, &config->stdio_errors, errors);
@@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config,
16351638
}
16361639

16371640

1641+
// See also config_get_locale_encoding()
1642+
static PyStatus
1643+
config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
1644+
wchar_t **fs_encoding)
1645+
{
1646+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
1647+
return PyConfig_SetString(config, fs_encoding, L"utf-8");
1648+
#elif defined(MS_WINDOWS)
1649+
const wchar_t *encoding;
1650+
if (preconfig->legacy_windows_fs_encoding) {
1651+
// Legacy Windows filesystem encoding: mbcs/replace
1652+
encoding = L"mbcs";
1653+
}
1654+
else {
1655+
// Windows defaults to utf-8/surrogatepass (PEP 529)
1656+
encoding = L"utf-8";
1657+
}
1658+
return PyConfig_SetString(config, fs_encoding, encoding);
1659+
#else // !MS_WINDOWS
1660+
if (preconfig->utf8_mode) {
1661+
return PyConfig_SetString(config, fs_encoding, L"utf-8");
1662+
}
1663+
else if (_Py_GetForceASCII()) {
1664+
return PyConfig_SetString(config, fs_encoding, L"ascii");
1665+
}
1666+
else {
1667+
return config_get_locale_encoding(config, preconfig, fs_encoding);
1668+
}
1669+
#endif // !MS_WINDOWS
1670+
}
1671+
1672+
16381673
static PyStatus
16391674
config_init_fs_encoding(PyConfig *config, const PyPreConfig *preconfig)
16401675
{
16411676
PyStatus status;
16421677

16431678
if (config->filesystem_encoding == NULL) {
1644-
#ifdef _Py_FORCE_UTF8_FS_ENCODING
1645-
status = PyConfig_SetString(config, &config->filesystem_encoding, L"utf-8");
1646-
#else
1647-
1648-
#ifdef MS_WINDOWS
1649-
if (preconfig->legacy_windows_fs_encoding) {
1650-
/* Legacy Windows filesystem encoding: mbcs/replace */
1651-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1652-
L"mbcs");
1653-
}
1654-
else
1655-
#endif
1656-
if (preconfig->utf8_mode) {
1657-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1658-
L"utf-8");
1659-
}
1660-
#ifndef MS_WINDOWS
1661-
else if (_Py_GetForceASCII()) {
1662-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1663-
L"ascii");
1664-
}
1665-
#endif
1666-
else {
1667-
#ifdef MS_WINDOWS
1668-
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
1669-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1670-
L"utf-8");
1671-
#else
1672-
status = config_get_locale_encoding(config,
1673-
&config->filesystem_encoding);
1674-
#endif
1675-
}
1676-
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
1677-
1679+
status = config_get_fs_encoding(config, preconfig,
1680+
&config->filesystem_encoding);
16781681
if (_PyStatus_EXCEPTION(status)) {
16791682
return status;
16801683
}

0 commit comments

Comments
 (0)