Skip to content

Commit 47e1afd

Browse files
authored
bpo-1635741: _PyUnicode_Name_CAPI moves to internal C API (GH-22713)
The private _PyUnicode_Name_CAPI structure of the PyCapsule API unicodedata.ucnhash_CAPI moves to the internal C API. Moreover, the structure gets a new state member which must be passed to the getcode() and getname() functions. * Move Include/ucnhash.h to Include/internal/pycore_ucnhash.h * unicodedata module is now built with Py_BUILD_CORE_MODULE. * unicodedata: move hashAPI variable into unicodedata_module_state.
1 parent b510e10 commit 47e1afd

File tree

11 files changed

+74
-49
lines changed

11 files changed

+74
-49
lines changed

Doc/whatsnew/3.10.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,12 @@ Porting to Python 3.10
407407
Unicode object without initial data.
408408
(Contributed by Inada Naoki in :issue:`36346`.)
409409

410+
* The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
411+
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
412+
the structure gets a new ``state`` member which must be passed to the
413+
``getcode()`` and ``getname()`` functions.
414+
(Contributed by Victor Stinner in :issue:`1635741`.)
415+
410416
Deprecated
411417
----------
412418

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
/* Unicode name database interface */
2-
#ifndef Py_LIMITED_API
3-
#ifndef Py_UCNHASH_H
4-
#define Py_UCNHASH_H
2+
#ifndef Py_INTERNAL_UCNHASH_H
3+
#define Py_INTERNAL_UCNHASH_H
54
#ifdef __cplusplus
65
extern "C" {
76
#endif
87

8+
#ifndef Py_BUILD_CORE
9+
# error "this header requires Py_BUILD_CORE define"
10+
#endif
11+
912
/* revised ucnhash CAPI interface (exported through a "wrapper") */
1013

1114
#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
@@ -15,22 +18,27 @@ typedef struct {
1518
/* Size of this struct */
1619
int size;
1720

21+
// state which must be passed as the first parameter to getname()
22+
// and getcode()
23+
void *state;
24+
1825
/* Get name for a given character code. Returns non-zero if
1926
success, zero if not. Does not set Python exceptions.
2027
If self is NULL, data come from the default version of the database.
2128
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
22-
int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
29+
int (*getname)(void *state, PyObject *self, Py_UCS4 code,
30+
char* buffer, int buflen,
2331
int with_alias_and_seq);
2432

2533
/* Get character code for a given name. Same error handling
2634
as for getname. */
27-
int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code,
35+
int (*getcode)(void *state, PyObject *self,
36+
const char* name, int namelen, Py_UCS4* code,
2837
int with_named_seq);
2938

3039
} _PyUnicode_Name_CAPI;
3140

3241
#ifdef __cplusplus
3342
}
3443
#endif
35-
#endif /* !Py_UCNHASH_H */
36-
#endif /* !Py_LIMITED_API */
44+
#endif /* !Py_INTERNAL_UCNHASH_H */

Makefile.pre.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,6 @@ PYTHON_HEADERS= \
10651065
$(srcdir)/Include/traceback.h \
10661066
$(srcdir)/Include/tracemalloc.h \
10671067
$(srcdir)/Include/tupleobject.h \
1068-
$(srcdir)/Include/ucnhash.h \
10691068
$(srcdir)/Include/unicodeobject.h \
10701069
$(srcdir)/Include/warnings.h \
10711070
$(srcdir)/Include/weakrefobject.h \
@@ -1129,6 +1128,7 @@ PYTHON_HEADERS= \
11291128
$(srcdir)/Include/internal/pycore_sysmodule.h \
11301129
$(srcdir)/Include/internal/pycore_traceback.h \
11311130
$(srcdir)/Include/internal/pycore_tuple.h \
1131+
$(srcdir)/Include/internal/pycore_ucnhash.h \
11321132
$(srcdir)/Include/internal/pycore_unionobject.h \
11331133
$(srcdir)/Include/internal/pycore_warnings.h \
11341134
$(DTRACE_HEADERS)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
2+
``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
3+
structure gets a new ``state`` member which must be passed to the
4+
``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.

Modules/Setup

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ _symtable symtablemodule.c
185185
#_json -I$(srcdir)/Include/internal -DPy_BUILD_CORE_BUILTIN _json.c # _json speedups
186186
#_statistics _statisticsmodule.c # statistics accelerator
187187

188-
#unicodedata unicodedata.c # static Unicode character database
188+
#unicodedata unicodedata.c -DPy_BUILD_CORE_BUILTIN # static Unicode character database
189189

190190

191191
# Modules with some UNIX dependencies -- on by default:

Modules/unicodedata.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#define PY_SSIZE_T_CLEAN
1717

1818
#include "Python.h"
19-
#include "ucnhash.h"
19+
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
2020
#include "structmember.h" // PyMemberDef
2121

2222
#include <stdbool.h>
@@ -97,6 +97,8 @@ typedef struct {
9797
// Borrowed reference to &UCD_Type. It is used to prepare the code
9898
// to convert the UCD_Type static type to a heap type.
9999
PyTypeObject *ucd_type;
100+
101+
_PyUnicode_Name_CAPI capi;
100102
} unicodedata_module_state;
101103

102104
// bpo-1635741: Temporary global state until the unicodedata module
@@ -1180,10 +1182,11 @@ _getucname(unicodedata_module_state *state, PyObject *self,
11801182
}
11811183

11821184
static int
1183-
capi_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
1185+
capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
1186+
char* buffer, int buflen,
11841187
int with_alias_and_seq)
11851188
{
1186-
unicodedata_module_state *state = &global_module_state;
1189+
unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
11871190
return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
11881191

11891192
}
@@ -1323,21 +1326,15 @@ _getcode(unicodedata_module_state *state, PyObject* self,
13231326
}
13241327

13251328
static int
1326-
capi_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
1329+
capi_getcode(void *state_raw, PyObject* self,
1330+
const char* name, int namelen, Py_UCS4* code,
13271331
int with_named_seq)
13281332
{
1329-
unicodedata_module_state *state = &global_module_state;
1333+
unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
13301334
return _getcode(state, self, name, namelen, code, with_named_seq);
13311335

13321336
}
13331337

1334-
static const _PyUnicode_Name_CAPI hashAPI =
1335-
{
1336-
sizeof(_PyUnicode_Name_CAPI),
1337-
capi_getucname,
1338-
capi_getcode
1339-
};
1340-
13411338
/* -------------------------------------------------------------------- */
13421339
/* Python bindings */
13431340

@@ -1510,6 +1507,11 @@ PyInit_unicodedata(void)
15101507
PyObject *m, *v;
15111508
unicodedata_module_state *state = &global_module_state;
15121509

1510+
state->capi.size = sizeof(_PyUnicode_Name_CAPI);
1511+
state->capi.state = state;
1512+
state->capi.getname = capi_getucname;
1513+
state->capi.getcode = capi_getcode;
1514+
15131515
Py_SET_TYPE(&UCD_Type, &PyType_Type);
15141516
state->ucd_type = &UCD_Type;
15151517

@@ -1528,7 +1530,7 @@ PyInit_unicodedata(void)
15281530
PyModule_AddObject(m, "ucd_3_2_0", v);
15291531

15301532
/* Export C API */
1531-
v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL);
1533+
v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
15321534
if (v != NULL)
15331535
PyModule_AddObject(m, "ucnhash_CAPI", v);
15341536
return m;

Objects/unicodeobject.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4040

4141
#define PY_SSIZE_T_CLEAN
4242
#include "Python.h"
43-
#include "pycore_abstract.h" // _PyIndex_Check()
44-
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
45-
#include "pycore_initconfig.h" // _PyStatus_OK()
46-
#include "pycore_interp.h" // PyInterpreterState.fs_codec
47-
#include "pycore_object.h" // _PyObject_GC_TRACK()
48-
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
49-
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
50-
#include "pycore_pystate.h" // _PyInterpreterState_GET()
51-
#include "ucnhash.h" // _PyUnicode_Name_CAPI
52-
#include "stringlib/eq.h" // unicode_eq()
43+
#include "pycore_abstract.h" // _PyIndex_Check()
44+
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
45+
#include "pycore_initconfig.h" // _PyStatus_OK()
46+
#include "pycore_interp.h" // PyInterpreterState.fs_codec
47+
#include "pycore_object.h" // _PyObject_GC_TRACK()
48+
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
49+
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
50+
#include "pycore_pystate.h" // _PyInterpreterState_GET()
51+
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
52+
#include "stringlib/eq.h" // unicode_eq()
5353

5454
#ifdef MS_WINDOWS
5555
#include <windows.h>
@@ -6344,7 +6344,7 @@ PyUnicode_AsUTF16String(PyObject *unicode)
63446344

63456345
/* --- Unicode Escape Codec ----------------------------------------------- */
63466346

6347-
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
6347+
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
63486348

63496349
PyObject *
63506350
_PyUnicode_DecodeUnicodeEscape(const char *s,
@@ -6497,11 +6497,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
64976497

64986498
/* \N{name} */
64996499
case 'N':
6500-
if (ucnhash_CAPI == NULL) {
6500+
if (ucnhash_capi == NULL) {
65016501
/* load the unicode data module */
6502-
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
6502+
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
65036503
PyUnicodeData_CAPSULE_NAME, 1);
6504-
if (ucnhash_CAPI == NULL) {
6504+
if (ucnhash_capi == NULL) {
65056505
PyErr_SetString(
65066506
PyExc_UnicodeError,
65076507
"\\N escapes not supported (can't load unicodedata module)"
@@ -6523,7 +6523,8 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
65236523
s++;
65246524
ch = 0xffffffff; /* in case 'getcode' messes up */
65256525
if (namelen <= INT_MAX &&
6526-
ucnhash_CAPI->getcode(NULL, start, (int)namelen,
6526+
ucnhash_capi->getcode(ucnhash_capi->state, NULL,
6527+
start, (int)namelen,
65276528
&ch, 0)) {
65286529
assert(ch <= MAX_UNICODE);
65296530
WRITE_CHAR(ch);

PCbuild/pythoncore.vcxproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@
196196
<ClInclude Include="..\Include\internal\pycore_sysmodule.h" />
197197
<ClInclude Include="..\Include\internal\pycore_traceback.h" />
198198
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
199+
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
199200
<ClInclude Include="..\Include\internal\pycore_unionobject.h" />
200201
<ClInclude Include="..\Include\internal\pycore_warnings.h" />
201202
<ClInclude Include="..\Include\interpreteridobject.h" />
@@ -252,7 +253,6 @@
252253
<ClInclude Include="..\Include\traceback.h" />
253254
<ClInclude Include="..\Include\tracemalloc.h" />
254255
<ClInclude Include="..\Include\tupleobject.h" />
255-
<ClInclude Include="..\Include\ucnhash.h" />
256256
<ClInclude Include="..\Include\unicodeobject.h" />
257257
<ClInclude Include="..\Include\weakrefobject.h" />
258258
<ClInclude Include="..\Modules\_math.h" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,6 @@
273273
<ClInclude Include="..\Include\tupleobject.h">
274274
<Filter>Include</Filter>
275275
</ClInclude>
276-
<ClInclude Include="..\Include\ucnhash.h">
277-
<Filter>Include</Filter>
278-
</ClInclude>
279276
<ClInclude Include="..\Include\unicodeobject.h">
280277
<Filter>Include</Filter>
281278
</ClInclude>
@@ -573,6 +570,9 @@
573570
<ClInclude Include="..\Include\internal\pycore_tuple.h">
574571
<Filter>Include\internal</Filter>
575572
</ClInclude>
573+
<ClInclude Include="..\Include\internal\pycore_ucnhash.h">
574+
<Filter>Include\internal</Filter>
575+
</ClInclude>
576576
<ClInclude Include="..\Include\internal\pycore_unionobject.h">
577577
<Filter>Include\internal</Filter>
578578
</ClInclude>

Python/codecs.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
1111
#include "Python.h"
1212
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
1313
#include "pycore_pystate.h" // _PyInterpreterState_GET()
14-
#include "ucnhash.h"
14+
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
1515
#include <ctype.h>
1616

1717
const char *Py_hexdigits = "0123456789abcdef";
@@ -954,7 +954,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
954954
return Py_BuildValue("(Nn)", res, end);
955955
}
956956

957-
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
957+
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
958958

959959
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
960960
{
@@ -976,17 +976,19 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
976976
return NULL;
977977
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
978978
return NULL;
979-
if (!ucnhash_CAPI) {
979+
if (!ucnhash_capi) {
980980
/* load the unicode data module */
981-
ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
981+
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
982982
PyUnicodeData_CAPSULE_NAME, 1);
983-
if (!ucnhash_CAPI)
983+
if (!ucnhash_capi) {
984984
return NULL;
985+
}
985986
}
986987
for (i = start, ressize = 0; i < end; ++i) {
987988
/* object is guaranteed to be "ready" */
988989
c = PyUnicode_READ_CHAR(object, i);
989-
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
990+
if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
991+
c, buffer, sizeof(buffer), 1)) {
990992
replsize = 1+1+1+(int)strlen(buffer)+1;
991993
}
992994
else if (c >= 0x10000) {
@@ -1009,7 +1011,8 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
10091011
i < end; ++i) {
10101012
c = PyUnicode_READ_CHAR(object, i);
10111013
*outp++ = '\\';
1012-
if (ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
1014+
if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
1015+
c, buffer, sizeof(buffer), 1)) {
10131016
*outp++ = 'N';
10141017
*outp++ = '{';
10151018
strcpy((char *)outp, buffer);

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,8 @@ def detect_simple_extensions(self):
878878
self.add(Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']))
879879
# static Unicode character database
880880
self.add(Extension('unicodedata', ['unicodedata.c'],
881-
depends=['unicodedata_db.h', 'unicodename_db.h']))
881+
depends=['unicodedata_db.h', 'unicodename_db.h'],
882+
extra_compile_args=['-DPy_BUILD_CORE_MODULE']))
882883
# _opcode module
883884
self.add(Extension('_opcode', ['_opcode.c']))
884885
# asyncio speedups

0 commit comments

Comments
 (0)