From 5906d65b21b97f1dcf78409eb365345a48a40f29 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Mon, 28 Mar 2022 09:11:21 +0000 Subject: [PATCH 1/2] share global strings in deepfreeze --- Modules/_io/textio.c | 1 + Modules/_pickle.c | 2 +- Objects/unicodeobject.c | 1 + Python/compile.c | 3 +++ Tools/scripts/deepfreeze.py | 4 +++- Tools/scripts/generate_global_objects.py | 19 ++++++------------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d9d1c881418742..e856a541c4384b 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1989,6 +1989,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) if (chunks != NULL) { if (result != NULL && PyList_Append(chunks, result) < 0) goto fail; + _Py_DECLARE_STR(empty, ""); Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks)); if (result == NULL) goto fail; diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 84f469dee99840..a5595eb10c8f10 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1812,7 +1812,7 @@ get_dotted_path(PyObject *obj, PyObject *name) { PyObject *dotted_path; Py_ssize_t i, n; - + _Py_DECLARE_STR(dot, "."); dotted_path = PyUnicode_Split(name, &_Py_STR(dot), -1); if (dotted_path == NULL) return NULL; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ce3ebce1ff72d0..0ae03776447cde 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -257,6 +257,7 @@ static int unicode_is_singleton(PyObject *unicode); // Return a borrowed reference to the empty string singleton. static inline PyObject* unicode_get_empty(void) { + _Py_DECLARE_STR(empty, ""); return &_Py_STR(empty); } diff --git a/Python/compile.c b/Python/compile.c index e24f425229b6af..1c5cf4ad92ec50 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -720,6 +720,7 @@ compiler_set_qualname(struct compiler *c) } if (base != NULL) { + _Py_DECLARE_STR(empty, ""); name = PyUnicode_Concat(base, &_Py_STR(dot)); Py_DECREF(base); if (name == NULL) @@ -3841,6 +3842,7 @@ compiler_from_import(struct compiler *c, stmt_ty s) ADDOP_NAME(c, IMPORT_NAME, s->v.ImportFrom.module, names); } else { + _Py_DECLARE_STR(empty, ""); ADDOP_NAME(c, IMPORT_NAME, &_Py_STR(empty), names); } for (i = 0; i < n; i++) { @@ -4781,6 +4783,7 @@ compiler_joined_str(struct compiler *c, expr_ty e) Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values); if (value_count > STACK_USE_GUIDELINE) { + _Py_DECLARE_STR(empty, ""); ADDOP_LOAD_CONST_NEW(c, &_Py_STR(empty)); ADDOP_NAME(c, LOAD_METHOD, &_Py_ID(join), names); ADDOP_I(c, BUILD_LIST, 0); diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 1831c15784af77..698f90ed1492f9 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -18,7 +18,7 @@ from generate_global_objects import get_identifiers_and_strings verbose = False -identifiers = get_identifiers_and_strings()[0] +identifiers, strings = get_identifiers_and_strings() def isprintable(b: bytes) -> bool: return all(0x20 <= c < 0x7f for c in b) @@ -168,6 +168,8 @@ def generate_bytes(self, name: str, b: bytes) -> str: return f"& {name}.ob_base.ob_base" def generate_unicode(self, name: str, s: str) -> str: + if s in strings: + return f"&_Py_STR({strings[s]})" if s in identifiers: return f"&_Py_ID({s})" kind, ascii = analyze_character_width(s) diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index f7653604e822b2..20e36f771d6173 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -1,20 +1,13 @@ import contextlib -import glob import io import os.path import re -import sys - __file__ = os.path.abspath(__file__) ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) INTERNAL = os.path.join(ROOT, 'Include', 'internal') -STRING_LITERALS = { - 'empty': '', - 'dot': '.', -} IGNORED = { 'ACTION', # Python/_warnings.c 'ATTR', # Python/_warnings.c and Objects/funcobject.c @@ -206,7 +199,7 @@ def generate_global_strings(identifiers, strings): printer.write(START) with printer.block('struct _Py_global_strings', ';'): with printer.block('struct', ' literals;'): - for name, literal in sorted(strings.items()): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') outfile.write('\n') with printer.block('struct', ' identifiers;'): @@ -271,7 +264,7 @@ def generate_runtime_init(identifiers, strings): # Global strings. with printer.block('.strings =', ','): with printer.block('.literals =', ','): - for name, literal in sorted(strings.items()): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): printer.write(f'INIT_STR({name}, "{literal}"),') with printer.block('.identifiers =', ','): for name in sorted(identifiers): @@ -292,15 +285,15 @@ def generate_runtime_init(identifiers, strings): def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': identifiers = set(IDENTIFIERS) - strings = dict(STRING_LITERALS) + strings = {} for name, string, *_ in iter_global_strings(): if string is None: if name not in IGNORED: identifiers.add(name) else: - if name not in strings: - strings[name] = string - elif string != strings[name]: + if string not in strings: + strings[string] = name + elif name != strings[string]: raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') return identifiers, strings From fc87870837761f56b4c76535d30ef492d309a672 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Mon, 28 Mar 2022 09:15:36 +0000 Subject: [PATCH 2/2] fix compile.c --- Python/compile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index 1c5cf4ad92ec50..2fe94822c723a6 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -720,7 +720,7 @@ compiler_set_qualname(struct compiler *c) } if (base != NULL) { - _Py_DECLARE_STR(empty, ""); + _Py_DECLARE_STR(dot, "."); name = PyUnicode_Concat(base, &_Py_STR(dot)); Py_DECREF(base); if (name == NULL)