Skip to content

gh-109329: Support for basic pystats for Tier 2 #109913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,6 @@ typedef struct _object_stats {
uint64_t type_cache_dunder_hits;
uint64_t type_cache_dunder_misses;
uint64_t type_cache_collisions;
uint64_t optimization_attempts;
uint64_t optimization_traces_created;
uint64_t optimization_traces_executed;
uint64_t optimization_uops_executed;
/* Temporary value used during GC */
uint64_t object_visits;
} ObjectStats;
Expand All @@ -100,10 +96,34 @@ typedef struct _gc_stats {
uint64_t objects_collected;
} GCStats;

typedef struct _uop_stats {
uint64_t execution_count;
} UOpStats;

#define _Py_UOP_HIST_SIZE 32

typedef struct _optimization_stats {
uint64_t attempts;
uint64_t traces_created;
uint64_t traces_executed;
uint64_t uops_executed;
uint64_t trace_stack_overflow;
uint64_t trace_stack_underflow;
uint64_t trace_too_long;
uint64_t inner_loop;
uint64_t recursive_call;
UOpStats opcode[512];
uint64_t unsupported_opcode[256];
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
} OptimizationStats;

typedef struct _stats {
OpcodeStats opcode_stats[256];
CallStats call_stats;
ObjectStats object_stats;
OptimizationStats optimization_stats;
GCStats *gc_stats;
} PyStats;

Expand Down
15 changes: 15 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,17 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
#define OPT_HIST(length, name) \
do { \
if (_Py_stats) { \
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
_Py_stats->optimization_stats.name[bucket]++; \
} \
} while (0)

// Export for '_opcode' shared extension
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
Expand All @@ -296,6 +307,10 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
#define EVAL_CALL_STAT_INC(name) ((void)0)
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
#define GC_STAT_ADD(gen, name, n) ((void)0)
#define OPT_STAT_INC(name) ((void)0)
#define UOP_EXE_INC(opname) ((void)0)
#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
#define OPT_HIST(length, name) ((void)0)
#endif // !Py_STATS

// Utility functions for reading/writing 32/64-bit values in the inline caches.
Expand Down
3 changes: 2 additions & 1 deletion Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2278,7 +2278,7 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
here->op.code == JUMP_BACKWARD)
{
OBJECT_STAT_INC(optimization_attempts);
OPT_STAT_INC(attempts);
int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
Expand Down Expand Up @@ -3965,6 +3965,7 @@ dummy_func(
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
return frame;
}

Expand Down
25 changes: 23 additions & 2 deletions Python/executor.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "opcode.h"

#include "pycore_bitutils.h"
#include "pycore_call.h"
#include "pycore_ceval.h"
#include "pycore_dict.h"
Expand Down Expand Up @@ -30,6 +31,16 @@
goto deoptimize; \
}

#ifdef Py_STATS
// Disable these macros that apply to Tier 1 stats when we are in Tier 2
#undef STAT_INC
#define STAT_INC(opname, name) ((void)0)
#undef STAT_DEC
#define STAT_DEC(opname, name) ((void)0)
#undef CALL_STAT_INC
#define CALL_STAT_INC(name) ((void)0)
#endif

#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0

Expand Down Expand Up @@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject

CHECK_EVAL_BREAKER();

OBJECT_STAT_INC(optimization_traces_executed);
OPT_STAT_INC(traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
int pc = 0;
int opcode;
int oparg;
uint64_t operand;
#ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0;
#endif

for (;;) {
opcode = self->trace[pc].opcode;
Expand All @@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
pc++;
OBJECT_STAT_INC(optimization_uops_executed);
OPT_STAT_INC(uops_executed);
UOP_EXE_INC(opcode);
#ifdef Py_STATS
trace_uop_execution_counter++;
#endif

switch (opcode) {

#include "executor_cases.c.h"
Expand Down Expand Up @@ -114,6 +133,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
// On ERROR_IF we return NULL as the frame.
// The caller recovers the frame from tstate->current_frame.
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return NULL;
Expand All @@ -122,6 +142,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
Expand Down
1 change: 1 addition & 0 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ translate_bytecode_to_trace(
if (trace_length + (n) > max_length) { \
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
(opname), (n), max_length - trace_length); \
OPT_STAT_INC(trace_too_long); \
goto done; \
} \
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest
Expand All @@ -472,6 +473,7 @@ translate_bytecode_to_trace(
#define TRACE_STACK_PUSH() \
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
OPT_STAT_INC(trace_stack_overflow); \
ADD_TO_TRACE(_SET_IP, 0, 0); \
goto done; \
} \
Expand Down Expand Up @@ -572,6 +574,7 @@ translate_bytecode_to_trace(
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0);
}
else {
OPT_STAT_INC(inner_loop);
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
}
goto done;
Expand Down Expand Up @@ -638,7 +641,9 @@ translate_bytecode_to_trace(
// LOAD_CONST + _POP_FRAME.
if (trace_stack_depth == 0) {
DPRINTF(2, "Trace stack underflow\n");
goto done;}
OPT_STAT_INC(trace_stack_underflow);
goto done;
}
}
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
for (int i = 0; i < nuops; i++) {
Expand Down Expand Up @@ -713,6 +718,7 @@ translate_bytecode_to_trace(
PyUnicode_AsUTF8(new_code->co_qualname),
PyUnicode_AsUTF8(new_code->co_filename),
new_code->co_firstlineno);
OPT_STAT_INC(recursive_call);
ADD_TO_TRACE(_SET_IP, 0, 0);
goto done;
}
Expand Down Expand Up @@ -744,6 +750,7 @@ translate_bytecode_to_trace(
break;
}
DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
OPT_UNSUPPORTED_OPCODE(opcode);
goto done; // Break out of loop
} // End default

Expand Down Expand Up @@ -891,7 +898,8 @@ uop_optimize(
// Error or nothing translated
return trace_length;
}
OBJECT_STAT_INC(optimization_traces_created);
OPT_HIST(trace_length, trace_length_hist);
OPT_STAT_INC(traces_created);
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
if (uop_optimize != NULL && *uop_optimize > '0') {
trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
Expand All @@ -901,6 +909,7 @@ uop_optimize(
if (executor == NULL) {
return -1;
}
OPT_HIST(trace_length, optimized_trace_length_hist);
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
*exec_ptr = (_PyExecutorObject *)executor;
Expand Down
54 changes: 50 additions & 4 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
}

static void
Expand All @@ -215,13 +211,63 @@ print_gc_stats(FILE *out, GCStats *stats)
}
}

static void
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
{
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
}
}

static void
print_optimization_stats(FILE *out, OptimizationStats *stats)
{
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);

print_histogram(out, "Trace length", stats->trace_length_hist);
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);

const char* const* names;
for (int i = 0; i < 512; i++) {
if (i < 256) {
names = _PyOpcode_OpName;
} else {
names = _PyOpcode_uop_name;
}
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
}
}

for (int i = 0; i < 256; i++) {
if (stats->unsupported_opcode[i]) {
fprintf(
out,
"unsupported_opcode[%s].count : %" PRIu64 "\n",
_PyOpcode_OpName[i],
stats->unsupported_opcode[i]
);
}
}
}

static void
print_stats(FILE *out, PyStats *stats)
{
print_spec_stats(out, stats->opcode_stats);
print_call_stats(out, &stats->call_stats);
print_object_stats(out, &stats->object_stats);
print_gc_stats(out, stats->gc_stats);
print_optimization_stats(out, &stats->optimization_stats);
}

void
Expand Down
Loading