From 954fd688137ec09d88ab1437d44c442a00816a08 Mon Sep 17 00:00:00 2001 From: Mohamed Koubaa Date: Mon, 6 Jul 2020 21:07:36 -0500 Subject: [PATCH 1/4] port unicodedata to multi-phase init --- Modules/unicodedata.c | 157 +++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 79 deletions(-) diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 8a1198a2b712d9..8ede48f21ad66d 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -26,6 +26,18 @@ _Py_IDENTIFIER(NFD); _Py_IDENTIFIER(NFKC); _Py_IDENTIFIER(NFKD); +typedef struct { + PyTypeObject *ucd_type; +} UnicodeDataState; + +static inline UnicodeDataState* +unicodedata_get_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (UnicodeDataState *)state; +} + /*[clinic input] module unicodedata class unicodedata.UCD 'PreviousDBVersion *' '&UCD_Type' @@ -90,22 +102,28 @@ static PyMemberDef DB_members[] = { {NULL} }; -/* forward declaration */ -static PyTypeObject UCD_Type; -#define UCD_Check(o) Py_IS_TYPE(o, &UCD_Type) +inline int UCD_Check(PyObject *o) +{ + UnicodeDataState *state; + state = PyType_GetModuleState(Py_TYPE(o)); + return Py_IS_TYPE(o, state->ucd_type); +} static PyObject* -new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4), +new_previous_version(PyObject *module, const char*name, const change_record* (*getrecord)(Py_UCS4), Py_UCS4 (*normalization)(Py_UCS4)) { - PreviousDBVersion *self; - self = PyObject_New(PreviousDBVersion, &UCD_Type); - if (self == NULL) - return NULL; - self->name = name; - self->getrecord = getrecord; - self->normalization = normalization; - return (PyObject*)self; + UnicodeDataState* st; + PreviousDBVersion *self; + + st = unicodedata_get_state(module); + self = PyObject_New(PreviousDBVersion, st->ucd_type); + if (self == NULL) + return NULL; + self->name = name; + self->getrecord = getrecord; + self->normalization = normalization; + return (PyObject*)self; } @@ -1384,50 +1402,19 @@ static PyMethodDef unicodedata_functions[] = { {NULL, NULL} /* sentinel */ }; -static PyTypeObject UCD_Type = { - /* The ob_type field must be initialized in the module init function - * to be portable to Windows without using C++. */ - PyVarObject_HEAD_INIT(NULL, 0) - "unicodedata.UCD", /*tp_name*/ - sizeof(PreviousDBVersion), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)PyObject_Del, /*tp_dealloc*/ - 0, /*tp_vectorcall_offset*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_as_async*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - PyObject_GenericGetAttr,/*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - unicodedata_functions, /*tp_methods*/ - DB_members, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - 0, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ +static PyType_Slot unicodedata_ucd_type_slots[] = { + {Py_tp_dealloc, PyObject_Del}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_methods, unicodedata_functions}, + {Py_tp_members, DB_members}, + {0,0} +}; + +static PyType_Spec unicodedata_ucd_type_spec = { + .name = "unicodedata.UCD", + .basicsize = sizeof(PreviousDBVersion), + .flags = Py_TPFLAGS_DEFAULT, + .slots = unicodedata_ucd_type_slots }; PyDoc_STRVAR(unicodedata_docstring, @@ -1439,35 +1426,28 @@ this database is based on the UnicodeData.txt file version\n\ The module uses the same names and symbols as defined by the\n\ UnicodeData File Format " UNIDATA_VERSION "."); -static struct PyModuleDef unicodedatamodule = { - PyModuleDef_HEAD_INIT, - "unicodedata", - unicodedata_docstring, - -1, - unicodedata_functions, - NULL, - NULL, - NULL, - NULL -}; - -PyMODINIT_FUNC -PyInit_unicodedata(void) +static int unicodedata_exec(PyObject *m) { - PyObject *m, *v; + PyObject *v; + UnicodeDataState* st; - Py_SET_TYPE(&UCD_Type, &PyType_Type); + st = unicodedata_get_state(m); - m = PyModule_Create(&unicodedatamodule); - if (!m) - return NULL; + st->ucd_type = (PyTypeObject *)PyType_FromModuleAndSpec(m, &unicodedata_ucd_type_spec, NULL); + if (st->ucd_type == NULL) { + return -1; + } PyModule_AddStringConstant(m, "unidata_version", UNIDATA_VERSION); - Py_INCREF(&UCD_Type); - PyModule_AddObject(m, "UCD", (PyObject*)&UCD_Type); + + Py_INCREF(st->ucd_type); + if (PyModule_AddObject(m, "UCD", (PyObject*)st->ucd_type) < 0) { + Py_DECREF(st->ucd_type); + return -1; + } /* Previous versions */ - v = new_previous_version("3.2.0", get_change_3_2_0, normalization_3_2_0); + v = new_previous_version(m, "3.2.0", get_change_3_2_0, normalization_3_2_0); if (v != NULL) PyModule_AddObject(m, "ucd_3_2_0", v); @@ -1475,7 +1455,26 @@ PyInit_unicodedata(void) v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL); if (v != NULL) PyModule_AddObject(m, "ucnhash_CAPI", v); - return m; + return 0; +} + +static PyModuleDef_Slot unicodedata_slots[] = { + {Py_mod_exec, unicodedata_exec}, + {0, NULL} +}; + +static struct PyModuleDef unicodedata_module = { + PyModuleDef_HEAD_INIT, + .m_name = "unicodedata", + .m_size = sizeof(UnicodeDataState), + .m_methods = unicodedata_functions, + .m_slots = unicodedata_slots, +}; + +PyMODINIT_FUNC +PyInit_unicodedata(void) +{ + return PyModuleDef_Init(&unicodedata_module); } /* From 0b3b0c0442e047b668ae91c3fcfaf52638056941 Mon Sep 17 00:00:00 2001 From: Mohamed Koubaa Date: Tue, 7 Jul 2020 09:35:26 -0500 Subject: [PATCH 2/4] fix type check --- Modules/unicodedata.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 8ede48f21ad66d..e0c258dbf1c75b 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -102,12 +102,7 @@ static PyMemberDef DB_members[] = { {NULL} }; -inline int UCD_Check(PyObject *o) -{ - UnicodeDataState *state; - state = PyType_GetModuleState(Py_TYPE(o)); - return Py_IS_TYPE(o, state->ucd_type); -} +#define UCD_Check(o) PyModule_Check(o) ? 0 : 1 static PyObject* new_previous_version(PyObject *module, const char*name, const change_record* (*getrecord)(Py_UCS4), @@ -1439,7 +1434,7 @@ static int unicodedata_exec(PyObject *m) } PyModule_AddStringConstant(m, "unidata_version", UNIDATA_VERSION); - + Py_INCREF(st->ucd_type); if (PyModule_AddObject(m, "UCD", (PyObject*)st->ucd_type) < 0) { Py_DECREF(st->ucd_type); From 751081e4f12770640a880ce0b6acd71f26830bd2 Mon Sep 17 00:00:00 2001 From: Mohamed Koubaa Date: Tue, 7 Jul 2020 11:40:51 -0500 Subject: [PATCH 3/4] fix issue and blurb --- .../2020-07-07-11-40-34.bpo-1635461.Yq22Ql.rst | 1 + Modules/unicodedata.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-07-07-11-40-34.bpo-1635461.Yq22Ql.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-07-07-11-40-34.bpo-1635461.Yq22Ql.rst b/Misc/NEWS.d/next/Core and Builtins/2020-07-07-11-40-34.bpo-1635461.Yq22Ql.rst new file mode 100644 index 00000000000000..7a7251113ba37c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-07-07-11-40-34.bpo-1635461.Yq22Ql.rst @@ -0,0 +1 @@ +Port :mod:`unicodedata` to multiphase initialization diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index e0c258dbf1c75b..ec2ca91cc2f2cc 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -102,7 +102,9 @@ static PyMemberDef DB_members[] = { {NULL} }; -#define UCD_Check(o) PyModule_Check(o) ? 0 : 1 +static inline int UCD_Check(PyObject *o) { + return PyModule_Check(o) ? 0 : 1; +} static PyObject* new_previous_version(PyObject *module, const char*name, const change_record* (*getrecord)(Py_UCS4), From 1cba0bc10899e04e2a5c12b449d882fb1d8aa91a Mon Sep 17 00:00:00 2001 From: Mohamed Koubaa Date: Tue, 7 Jul 2020 21:42:30 -0500 Subject: [PATCH 4/4] update code --- Modules/unicodedata.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index ec2ca91cc2f2cc..2bd1cbeede1fc2 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1426,9 +1426,8 @@ UnicodeData File Format " UNIDATA_VERSION "."); static int unicodedata_exec(PyObject *m) { PyObject *v; - UnicodeDataState* st; - st = unicodedata_get_state(m); + UnicodeDataState *st = unicodedata_get_state(m); st->ucd_type = (PyTypeObject *)PyType_FromModuleAndSpec(m, &unicodedata_ucd_type_spec, NULL); if (st->ucd_type == NULL) { @@ -1445,13 +1444,25 @@ static int unicodedata_exec(PyObject *m) /* Previous versions */ v = new_previous_version(m, "3.2.0", get_change_3_2_0, normalization_3_2_0); - if (v != NULL) - PyModule_AddObject(m, "ucd_3_2_0", v); + if (v == NULL) + return -1; + + if (PyModule_AddObject(m, "ucd_3_2_0", v) < 0) { + Py_DECREF(v); + return -1; + } /* Export C API */ v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME, NULL); - if (v != NULL) - PyModule_AddObject(m, "ucnhash_CAPI", v); + if (v == NULL) + return -1; + + if (PyModule_AddObject(m, "ucnhash_CAPI", v) < 0) + { + Py_DECREF(v); + return -1; + } + return 0; } @@ -1463,6 +1474,7 @@ static PyModuleDef_Slot unicodedata_slots[] = { static struct PyModuleDef unicodedata_module = { PyModuleDef_HEAD_INIT, .m_name = "unicodedata", + .m_doc = unicodedata_docstring, .m_size = sizeof(UnicodeDataState), .m_methods = unicodedata_functions, .m_slots = unicodedata_slots,