Skip to content

Commit 3d17c04

Browse files
authored
bpo-40521: Add PyInterpreterState.unicode (GH-20081)
Move PyInterpreterState.fs_codec into a new PyInterpreterState.unicode structure. Give a name to the fs_codec structure and use this structure in unicodeobject.c.
1 parent 75cd8e4 commit 3d17c04

File tree

3 files changed

+48
-40
lines changed

3 files changed

+48
-40
lines changed

Include/internal/pycore_interp.h

+14-8
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,19 @@ struct _ceval_state {
5151
#endif
5252
};
5353

54+
/* fs_codec.encoding is initialized to NULL.
55+
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
56+
struct _Py_unicode_fs_codec {
57+
char *encoding; // Filesystem encoding (encoded to UTF-8)
58+
int utf8; // encoding=="utf-8"?
59+
char *errors; // Filesystem errors (encoded to UTF-8)
60+
_Py_error_handler error_handler;
61+
};
62+
63+
struct _Py_unicode_state {
64+
struct _Py_unicode_fs_codec fs_codec;
65+
};
66+
5467

5568
/* interpreter state */
5669

@@ -97,14 +110,7 @@ struct _is {
97110
PyObject *codec_error_registry;
98111
int codecs_initialized;
99112

100-
/* fs_codec.encoding is initialized to NULL.
101-
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
102-
struct {
103-
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
104-
int utf8; /* encoding=="utf-8"? */
105-
char *errors; /* Filesystem errors (encoded to UTF-8) */
106-
_Py_error_handler error_handler;
107-
} fs_codec;
113+
struct _Py_unicode_state unicode;
108114

109115
PyConfig config;
110116
#ifdef HAVE_DLOPEN

Modules/_io/textio.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ io_check_errors(PyObject *errors)
10071007

10081008
/* Avoid calling PyCodec_LookupError() before the codec registry is ready:
10091009
before_PyUnicode_InitEncodings() is called. */
1010-
if (!interp->fs_codec.encoding) {
1010+
if (!interp->unicode.fs_codec.encoding) {
10111011
return 0;
10121012
}
10131013

Objects/unicodeobject.c

+33-31
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ unicode_check_encoding_errors(const char *encoding, const char *errors)
463463

464464
/* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the
465465
codec registry is ready: before_PyUnicode_InitEncodings() is called. */
466-
if (!interp->fs_codec.encoding) {
466+
if (!interp->unicode.fs_codec.encoding) {
467467
return 0;
468468
}
469469

@@ -3650,16 +3650,17 @@ PyObject *
36503650
PyUnicode_EncodeFSDefault(PyObject *unicode)
36513651
{
36523652
PyInterpreterState *interp = _PyInterpreterState_GET();
3653-
if (interp->fs_codec.utf8) {
3653+
struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
3654+
if (fs_codec->utf8) {
36543655
return unicode_encode_utf8(unicode,
3655-
interp->fs_codec.error_handler,
3656-
interp->fs_codec.errors);
3656+
fs_codec->error_handler,
3657+
fs_codec->errors);
36573658
}
36583659
#ifndef _Py_FORCE_UTF8_FS_ENCODING
3659-
else if (interp->fs_codec.encoding) {
3660+
else if (fs_codec->encoding) {
36603661
return PyUnicode_AsEncodedString(unicode,
3661-
interp->fs_codec.encoding,
3662-
interp->fs_codec.errors);
3662+
fs_codec->encoding,
3663+
fs_codec->errors);
36633664
}
36643665
#endif
36653666
else {
@@ -3886,17 +3887,18 @@ PyObject*
38863887
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
38873888
{
38883889
PyInterpreterState *interp = _PyInterpreterState_GET();
3889-
if (interp->fs_codec.utf8) {
3890+
struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
3891+
if (fs_codec->utf8) {
38903892
return unicode_decode_utf8(s, size,
3891-
interp->fs_codec.error_handler,
3892-
interp->fs_codec.errors,
3893+
fs_codec->error_handler,
3894+
fs_codec->errors,
38933895
NULL);
38943896
}
38953897
#ifndef _Py_FORCE_UTF8_FS_ENCODING
3896-
else if (interp->fs_codec.encoding) {
3898+
else if (fs_codec->encoding) {
38973899
return PyUnicode_Decode(s, size,
3898-
interp->fs_codec.encoding,
3899-
interp->fs_codec.errors);
3900+
fs_codec->encoding,
3901+
fs_codec->errors);
39003902
}
39013903
#endif
39023904
else {
@@ -16071,16 +16073,17 @@ init_fs_codec(PyInterpreterState *interp)
1607116073
return -1;
1607216074
}
1607316075

16074-
PyMem_RawFree(interp->fs_codec.encoding);
16075-
interp->fs_codec.encoding = encoding;
16076+
struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
16077+
PyMem_RawFree(fs_codec->encoding);
16078+
fs_codec->encoding = encoding;
1607616079
/* encoding has been normalized by init_fs_encoding() */
16077-
interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0);
16078-
PyMem_RawFree(interp->fs_codec.errors);
16079-
interp->fs_codec.errors = errors;
16080-
interp->fs_codec.error_handler = error_handler;
16080+
fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0);
16081+
PyMem_RawFree(fs_codec->errors);
16082+
fs_codec->errors = errors;
16083+
fs_codec->error_handler = error_handler;
1608116084

1608216085
#ifdef _Py_FORCE_UTF8_FS_ENCODING
16083-
assert(interp->fs_codec.utf8 == 1);
16086+
assert(fs_codec->utf8 == 1);
1608416087
#endif
1608516088

1608616089
/* At this point, PyUnicode_EncodeFSDefault() and
@@ -16089,8 +16092,8 @@ init_fs_codec(PyInterpreterState *interp)
1608916092

1609016093
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
1609116094
global configuration variables. */
16092-
if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding,
16093-
interp->fs_codec.errors) < 0) {
16095+
if (_Py_SetFileSystemEncoding(fs_codec->encoding,
16096+
fs_codec->errors) < 0) {
1609416097
PyErr_NoMemory();
1609516098
return -1;
1609616099
}
@@ -16133,15 +16136,14 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
1613316136

1613416137

1613516138
static void
16136-
_PyUnicode_FiniEncodings(PyThreadState *tstate)
16139+
_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
1613716140
{
16138-
PyInterpreterState *interp = tstate->interp;
16139-
PyMem_RawFree(interp->fs_codec.encoding);
16140-
interp->fs_codec.encoding = NULL;
16141-
interp->fs_codec.utf8 = 0;
16142-
PyMem_RawFree(interp->fs_codec.errors);
16143-
interp->fs_codec.errors = NULL;
16144-
interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
16141+
PyMem_RawFree(fs_codec->encoding);
16142+
fs_codec->encoding = NULL;
16143+
fs_codec->utf8 = 0;
16144+
PyMem_RawFree(fs_codec->errors);
16145+
fs_codec->errors = NULL;
16146+
fs_codec->error_handler = _Py_ERROR_UNKNOWN;
1614516147
}
1614616148

1614716149

@@ -16199,7 +16201,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
1619916201
unicode_clear_static_strings();
1620016202
}
1620116203

16202-
_PyUnicode_FiniEncodings(tstate);
16204+
_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
1620316205
}
1620416206

1620516207

0 commit comments

Comments
 (0)