Skip to content

gh-100403: Collect GC statistics via -X option #100958

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions Include/cpython/initconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ typedef struct PyConfig {
int use_frozen_modules;
int safe_path;
int int_max_str_digits;
wchar_t *gc_stats_file;

/* --- Path configuration inputs ------------ */
int pathconfig_warnings;
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ struct _gc_runtime_state {
collections, and are awaiting to undergo a full collection for
the first time. */
Py_ssize_t long_lived_pending;
int advanced_stats;
};


Expand Down
1 change: 0 additions & 1 deletion Include/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ typedef struct _stats {
ObjectStats object_stats;
} PyStats;


Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spurious edit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sir Bedevere obtained new powers?

PyAPI_DATA(PyStats) _py_stats_struct;
PyAPI_DATA(PyStats *) _py_stats;

Expand Down
145 changes: 144 additions & 1 deletion Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "pycore_pyerrors.h"
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pydtrace.h"
#include "pystats.h"

typedef struct _gc_runtime_state GCState;

Expand All @@ -39,6 +40,17 @@ module gc
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/

typedef struct _gc_stats {
size_t n_collections;
PyObject* generation_number;
PyObject* total_objects;
PyObject* uncollectable;
PyObject* collected_cycles;
PyObject* collection_time;
} PyGCStats;

PyGCStats _pygc_stats_struct = { 0 };


#ifdef Py_DEBUG
# define GC_DEBUG
Expand Down Expand Up @@ -135,6 +147,32 @@ get_gc_state(void)
return &interp->gc;
}

static inline int
is_main_interpreter(void)
{
return (PyInterpreterState_Get() == PyInterpreterState_Main());
}

static int
_PyInitGCStats() {
if (!is_main_interpreter()) {
return 0;
}
#define INIT_FIELD(field) {\
_pygc_stats_struct.field = PyList_New(0);\
if (_pygc_stats_struct.field== NULL) {\
return -1; \
}\
_PyObject_GC_UNTRACK(_pygc_stats_struct.field); }

INIT_FIELD(generation_number);
INIT_FIELD(total_objects);
INIT_FIELD(uncollectable);
INIT_FIELD(collected_cycles);
INIT_FIELD(collection_time);
#undef INIT_FIELD
return 0;
}

void
_PyGC_InitState(GCState *gcstate)
Expand All @@ -151,8 +189,15 @@ _PyGC_InitState(GCState *gcstate)
};
gcstate->generation0 = GEN_HEAD(gcstate, 0);
INIT_HEAD(gcstate->permanent_generation);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spurious edit.

#undef INIT_HEAD

char *s = Py_GETENV("PYTHONGCTHRESHOLD");
if (s) {
long n = atol(s);
if (n > 0) {
gcstate->generations[0].threshold = n;
}
}
}


Expand All @@ -171,6 +216,13 @@ _PyGC_Init(PyInterpreterState *interp)
return _PyStatus_NO_MEMORY();
}

if (_Py_GetConfig()->gc_stats_file) {
if(_PyInitGCStats()) {
Py_FatalError("Could not initialize GC stats");
}
gcstate->advanced_stats = 1;
}

return _PyStatus_OK();
}

Expand Down Expand Up @@ -1192,6 +1244,13 @@ gc_collect_main(PyThreadState *tstate, int generation,
PyGC_Head *gc;
_PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
GCState *gcstate = &tstate->interp->gc;
_PyTime_t gc_t1 = 0;
_PyTime_t gc_t2 = 0;
Py_ssize_t t = 0;

if (gcstate->advanced_stats) {
gc_t1 = _PyTime_GetPerfCounter();
}

// gc_collect_main() must not be called before _PyGC_Init
// or after _PyGC_Fini()
Expand Down Expand Up @@ -1226,6 +1285,10 @@ gc_collect_main(PyThreadState *tstate, int generation,
old = young;
validate_list(old, collecting_clear_unreachable_clear);

if (gcstate->advanced_stats) {
t = gc_list_size(young);
}

deduce_unreachable(young, &unreachable);

untrack_tuples(young);
Expand Down Expand Up @@ -1324,6 +1387,31 @@ gc_collect_main(PyThreadState *tstate, int generation,
_PyErr_WriteUnraisableMsg("in garbage collection", NULL);
}
}
if (gcstate->advanced_stats) {
gc_t2 = _PyTime_GetPerfCounter();
#define ADD_ELEMENT(field, elem, converter_fn) \
{ \
PyObject* item = converter_fn(elem); \
if (!item) { \
_PyErr_WriteUnraisableMsg("in garbage collection", NULL); \
} \
if (item && PyList_Append(_pygc_stats_struct.field, item) >= 0) { \
_PyErr_WriteUnraisableMsg("in garbage collection", NULL); \
} \
Py_XDECREF(item); \
} \

if (_pygc_stats_struct.generation_number) {
_pygc_stats_struct.n_collections++;
ADD_ELEMENT(generation_number, generation, PyLong_FromLong);
ADD_ELEMENT(total_objects, t, PyLong_FromSsize_t);
ADD_ELEMENT(uncollectable, n, PyLong_FromSsize_t);
ADD_ELEMENT(collected_cycles, m, PyLong_FromSsize_t);
double d = _PyTime_AsSecondsDouble(gc_t2 - gc_t1);
ADD_ELEMENT(collection_time, d, PyFloat_FromDouble);
}
#undef ADD_ELEMENT
}

/* Update stats */
if (n_collected) {
Expand Down Expand Up @@ -2168,11 +2256,66 @@ gc_fini_untrack(PyGC_Head *list)
}
}

static void
print_stats(FILE *out) {
#define WRITE_ITEM(collection, index, type, converter_fn, str_format) { \
PyObject* item = PyList_GET_ITEM(_pygc_stats_struct.collection, index); \
if (!item) { \
_PyErr_WriteUnraisableMsg(" when writing gc stats", NULL); \
} \
type num = converter_fn(item); \
if (!num && PyErr_Occurred()) { \
_PyErr_WriteUnraisableMsg(" when writing gc stats", NULL); \
} \
fprintf(out, str_format, num); } \

fprintf(out, "collection,generation_number,total_objects,uncollectable,collected_cycles,collection_time\n");
for (size_t i = 0; i < _pygc_stats_struct.n_collections; i++) {
fprintf(out, "%zd", i);
WRITE_ITEM(generation_number, i, long, PyLong_AsLong, "%zd,");
WRITE_ITEM(total_objects, i, long, PyLong_AsLong, "%zd,");
WRITE_ITEM(uncollectable, i, long, PyLong_AsLong, "%zd,");
WRITE_ITEM(collected_cycles, i, long, PyLong_AsLong, "%zd,");
WRITE_ITEM(collection_time, i, double, PyFloat_AsDouble, "%f");
fprintf(out, "\n");
}
}

void
_Py_PrintGCStats(int to_file)
{
FILE *out = stderr;
wchar_t *filename = _Py_GetConfig()->gc_stats_file;
if (filename && wcslen(filename) != 0) {
// Open a file from the gc_stats_file (wchar)
out = _Py_wfopen(filename, L"wb");
if (!out) {
_PyErr_WriteUnraisableMsg(" when writing gc stats", NULL);
return;
}
}
else {
fprintf(out, "GC stats:\n");
}
print_stats(out);
if (out != stderr) {
fclose(out);
}

Py_CLEAR(_pygc_stats_struct.generation_number);
Py_CLEAR(_pygc_stats_struct.total_objects);
Py_CLEAR(_pygc_stats_struct.uncollectable);
Py_CLEAR(_pygc_stats_struct.collected_cycles);
Py_CLEAR(_pygc_stats_struct.collection_time);
}

void
_PyGC_Fini(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
if (is_main_interpreter() && gcstate->advanced_stats) {
_Py_PrintGCStats(1);
}
Py_CLEAR(gcstate->garbage);
Py_CLEAR(gcstate->callbacks);

Expand Down
46 changes: 45 additions & 1 deletion Python/initconfig.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,11 @@ The following implementation-specific options are available:\n\
\n\
-X int_max_str_digits=number: limit the size of int<->str conversions.\n\
This helps avoid denial of service attacks when parsing untrusted data.\n\
The default is sys.int_info.default_max_str_digits. 0 disables."
The default is sys.int_info.default_max_str_digits. 0 disables.\n\
\n\
-X gc_stats_file=filename: activates advance statistics for the Python garbage collector\n\
and writes the results to the specified file. If no file is provided (by writting\n\
-X gc_stats_file), the results are written to stderr. The default value is \"off\"."

#ifdef Py_STATS
"\n\
Expand Down Expand Up @@ -176,6 +180,8 @@ static const char usage_envvars[] =
" and end column offset) to every instruction in code objects. This is useful \n"
" when smaller code objects and pyc files are desired as well as suppressing the \n"
" extra visual location indicators when the interpreter displays tracebacks.\n"
"PYTHONGCSTATSFILE: If this variable is set, it enables advance statistics for the \n"
" cycle garbage collector and writes them in file specified by the environment variable.\n"
"These variables have equivalent command-line parameters (see --help for details):\n"
"PYTHONDEBUG : enable parser debug mode (-d)\n"
"PYTHONDONTWRITEBYTECODE : don't write .pyc files (-B)\n"
Expand Down Expand Up @@ -703,6 +709,7 @@ config_check_consistency(const PyConfig *config)
assert(config->_is_python_build >= 0);
assert(config->safe_path >= 0);
assert(config->int_max_str_digits >= 0);
assert(config->gc_stats_file != NULL);
// config->use_frozen_modules is initialized later
// by _PyConfig_InitImportConfig().
return 1;
Expand Down Expand Up @@ -799,6 +806,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
config->int_max_str_digits = -1;
config->_is_python_build = 0;
config->code_debug_ranges = 1;
config->gc_stats_file = NULL;
}


Expand Down Expand Up @@ -1026,6 +1034,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
COPY_WSTRLIST(orig_argv);
COPY_ATTR(_is_python_build);
COPY_ATTR(int_max_str_digits);
COPY_WSTR_ATTR(gc_stats_file);

#undef COPY_ATTR
#undef COPY_WSTR_ATTR
Expand Down Expand Up @@ -1133,6 +1142,7 @@ _PyConfig_AsDict(const PyConfig *config)
SET_ITEM_INT(safe_path);
SET_ITEM_INT(_is_python_build);
SET_ITEM_INT(int_max_str_digits);
SET_ITEM_WSTR(gc_stats_file);

return dict;

Expand Down Expand Up @@ -1426,6 +1436,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict)
GET_UINT(safe_path);
GET_UINT(_is_python_build);
GET_INT(int_max_str_digits);
GET_WSTR_OPT(gc_stats_file);

#undef CHECK_VALUE
#undef GET_UINT
Expand Down Expand Up @@ -1861,6 +1872,31 @@ config_init_pycache_prefix(PyConfig *config)
"PYTHONPYCACHEPREFIX");
}

static PyStatus
config_init_gc_stats_file(PyConfig *config)
{
assert(config->gc_stats_file == NULL);

const wchar_t *xoption = config_get_xoption(config, L"gc_stats_file");
if (xoption) {
const wchar_t *sep = wcschr(xoption, L'=');
if (sep && wcslen(sep) > 1) {
config->gc_stats_file = _PyMem_RawWcsdup(sep + 1);
if (config->gc_stats_file == NULL) {
return _PyStatus_NO_MEMORY();
}
}
else {
config->gc_stats_file = L"";
}
return _PyStatus_OK();
}

return CONFIG_GET_ENV_DUP(config, &config->gc_stats_file,
L"PYTHONGCSTATSFILE",
"PYTHONGCSTATSFILE");
}


static PyStatus
config_read_complex_options(PyConfig *config)
Expand Down Expand Up @@ -1910,6 +1946,14 @@ config_read_complex_options(PyConfig *config)
return status;
}
}

if (config->gc_stats_file == NULL) {
status = config_init_gc_stats_file(config);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}

return _PyStatus_OK();
}

Expand Down
Loading