Skip to content

bpo-46712: share more global strings in deepfreeze #32152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Modules/_io/textio.c
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
if (chunks != NULL) {
if (result != NULL && PyList_Append(chunks, result) < 0)
goto fail;
_Py_DECLARE_STR(empty, "");
Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
if (result == NULL)
goto fail;
Expand Down
2 changes: 1 addition & 1 deletion Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,7 @@ get_dotted_path(PyObject *obj, PyObject *name)
{
PyObject *dotted_path;
Py_ssize_t i, n;

_Py_DECLARE_STR(dot, ".");
dotted_path = PyUnicode_Split(name, &_Py_STR(dot), -1);
if (dotted_path == NULL)
return NULL;
Expand Down
1 change: 1 addition & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ static int unicode_is_singleton(PyObject *unicode);
// Return a borrowed reference to the empty string singleton.
static inline PyObject* unicode_get_empty(void)
{
_Py_DECLARE_STR(empty, "");
return &_Py_STR(empty);
}

Expand Down
3 changes: 3 additions & 0 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ compiler_set_qualname(struct compiler *c)
}

if (base != NULL) {
_Py_DECLARE_STR(dot, ".");
name = PyUnicode_Concat(base, &_Py_STR(dot));
Py_DECREF(base);
if (name == NULL)
Expand Down Expand Up @@ -3945,6 +3946,7 @@ compiler_from_import(struct compiler *c, stmt_ty s)
ADDOP_NAME(c, IMPORT_NAME, s->v.ImportFrom.module, names);
}
else {
_Py_DECLARE_STR(empty, "");
ADDOP_NAME(c, IMPORT_NAME, &_Py_STR(empty), names);
}
for (i = 0; i < n; i++) {
Expand Down Expand Up @@ -4885,6 +4887,7 @@ compiler_joined_str(struct compiler *c, expr_ty e)

Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values);
if (value_count > STACK_USE_GUIDELINE) {
_Py_DECLARE_STR(empty, "");
ADDOP_LOAD_CONST_NEW(c, &_Py_STR(empty));
ADDOP_NAME(c, LOAD_METHOD, &_Py_ID(join), names);
ADDOP_I(c, BUILD_LIST, 0);
Expand Down
4 changes: 3 additions & 1 deletion Tools/scripts/deepfreeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from generate_global_objects import get_identifiers_and_strings

verbose = False
identifiers = get_identifiers_and_strings()[0]
identifiers, strings = get_identifiers_and_strings()

def isprintable(b: bytes) -> bool:
return all(0x20 <= c < 0x7f for c in b)
Expand Down Expand Up @@ -168,6 +168,8 @@ def generate_bytes(self, name: str, b: bytes) -> str:
return f"& {name}.ob_base.ob_base"

def generate_unicode(self, name: str, s: str) -> str:
if s in strings:
return f"&_Py_STR({strings[s]})"
if s in identifiers:
return f"&_Py_ID({s})"
if re.match(r'\A[A-Za-z0-9_]+\Z', s):
Expand Down
19 changes: 6 additions & 13 deletions Tools/scripts/generate_global_objects.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
import contextlib
import glob
import io
import os.path
import re
import sys


__file__ = os.path.abspath(__file__)
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
INTERNAL = os.path.join(ROOT, 'Include', 'internal')


STRING_LITERALS = {
'empty': '',
'dot': '.',
}
IGNORED = {
'ACTION', # Python/_warnings.c
'ATTR', # Python/_warnings.c and Objects/funcobject.c
Expand Down Expand Up @@ -211,7 +204,7 @@ def generate_global_strings(identifiers, strings):
printer.write(START)
with printer.block('struct _Py_global_strings', ';'):
with printer.block('struct', ' literals;'):
for name, literal in sorted(strings.items()):
for literal, name in sorted(strings.items(), key=lambda x: x[1]):
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
outfile.write('\n')
with printer.block('struct', ' identifiers;'):
Expand Down Expand Up @@ -276,7 +269,7 @@ def generate_runtime_init(identifiers, strings):
# Global strings.
with printer.block('.strings =', ','):
with printer.block('.literals =', ','):
for name, literal in sorted(strings.items()):
for literal, name in sorted(strings.items(), key=lambda x: x[1]):
printer.write(f'INIT_STR({name}, "{literal}"),')
with printer.block('.identifiers =', ','):
for name in sorted(identifiers):
Expand All @@ -297,15 +290,15 @@ def generate_runtime_init(identifiers, strings):

def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
identifiers = set(IDENTIFIERS)
strings = dict(STRING_LITERALS)
strings = {}
for name, string, *_ in iter_global_strings():
if string is None:
if name not in IGNORED:
identifiers.add(name)
else:
if name not in strings:
strings[name] = string
elif string != strings[name]:
if string not in strings:
strings[string] = name
elif name != strings[string]:
raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
return identifiers, strings

Expand Down