Skip to content

Commit 097fc40

Browse files
Add _PyMarshal_WriteForFreezing().
1 parent 82b4f85 commit 097fc40

File tree

140 files changed

+4687
-4575
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+4687
-4575
lines changed

Programs/_freeze_module.c

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
#include <Python.h>
1111
#include <marshal.h>
12+
#include <pycore_pystate.h> // _PyInterpreterState_GET()
13+
#include "pycore_interp.h" // PyInterpreterState
1214

1315
#include <stdio.h>
1416
#include <sys/types.h>
@@ -99,19 +101,110 @@ read_text(const char *inpath)
99101
return (const char *)text;
100102
}
101103

104+
static PyObject *
105+
get_ref_counts(void)
106+
{
107+
// The only objects where this currently matters are interned strings.
108+
// (See https://github.com/python/cpython/pull/28107#issuecomment-915627148.)
109+
// So for now we only get ref counts for those.
110+
PyObject *counts = PyDict_New();
111+
if (counts == NULL) {
112+
return NULL;
113+
}
114+
PyInterpreterState *interp = _PyInterpreterState_GET();
115+
if (interp->unicode.interned == NULL) {
116+
return counts;
117+
}
118+
// Remember the refcounts.
119+
PyObject *obj, *_value;
120+
Py_ssize_t pos = 0;
121+
while (PyDict_Next(interp->unicode.interned, &pos, &obj, &_value)) {
122+
PyObject *count = PyLong_FromLongLong(obj->ob_refcnt);
123+
if (count == NULL) {
124+
Py_DECREF(counts);
125+
return NULL;
126+
}
127+
int res = PyDict_SetItem(counts, obj, count);
128+
Py_DECREF(count);
129+
if (res != 0) {
130+
Py_DECREF(counts);
131+
return NULL;
132+
}
133+
}
134+
return counts;
135+
}
136+
137+
static PyObject *
138+
get_nonref_objects(PyObject *before)
139+
{
140+
PyObject *nonref = PySet_New(NULL);
141+
if (nonref == NULL) {
142+
return NULL;
143+
}
144+
PyObject *obj, *count;
145+
Py_ssize_t pos = 0;
146+
while (PyDict_Next(before, &pos, &obj, &count)) {
147+
Py_ssize_t old = PyLong_AsLongLong(count);
148+
if (old == -1 && PyErr_Occurred()) {
149+
Py_DECREF(nonref);
150+
return NULL;
151+
}
152+
// We take into account the reference "before" holds.
153+
if (obj->ob_refcnt == old + 2) {
154+
if (PySet_Add(nonref, obj) != 0) {
155+
Py_DECREF(nonref);
156+
return NULL;
157+
}
158+
}
159+
}
160+
return nonref;
161+
}
162+
163+
extern PyObject* _PyMarshal_WriteForFreezing(PyObject *code, PyObject *nonref);
164+
102165
static PyObject *
103166
compile_and_marshal(const char *name, const char *text)
104167
{
168+
// To avoid unnecessary duplication during marshaling, all "complex"
169+
// objects get stored in a lookup table that allows them to be
170+
// referenced by later entries using an integer ID. However, this
171+
// optimization is not applied if the refcount is exactly 1.
172+
//
173+
// The problem is that any objects cached during runtime
174+
// initialization will get re-used in the code object during
175+
// compilation. This means some objects will have a refcount > 1,
176+
// even though there is only one instance within the code object.
177+
// This is problematic it this happens inconsistently, such as only
178+
// in non-debug builds (e.g. init_filters() in Python/_warnings.c);
179+
// the generated marshal data we are freezing will be different even
180+
// though the Python code hasn't changed.
181+
//
182+
// We address this by giving marshal the set of objects with
183+
// refcount > 1 that actually only get used once in the code object.
184+
// This mostly consists of interned strings and small integers.
185+
PyObject *refs_before = get_ref_counts();
186+
if (refs_before == NULL) {
187+
return NULL;
188+
}
189+
105190
char *filename = (char *) malloc(strlen(name) + 10);
106191
sprintf(filename, "<frozen %s>", name);
107192
PyObject *code = Py_CompileStringExFlags(text, filename,
108193
Py_file_input, NULL, 0);
109194
free(filename);
110195
if (code == NULL) {
196+
Py_DECREF(refs_before);
197+
return NULL;
198+
}
199+
200+
PyObject *nonref = get_nonref_objects(refs_before);
201+
Py_DECREF(refs_before);
202+
if (nonref == NULL) {
111203
return NULL;
112204
}
113205

114-
PyObject *marshalled = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION);
206+
PyObject *marshalled = _PyMarshal_WriteForFreezing(code, nonref);
207+
Py_DECREF(nonref);
115208
Py_CLEAR(code);
116209
if (marshalled == NULL) {
117210
return NULL;

Python/frozen_modules/MANIFEST

Lines changed: 140 additions & 140 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)