Skip to content

Commit e561e98

Browse files
authored
GH-109329: Add tier 2 stats (GH-109913)
1 parent f786029 commit e561e98

File tree

9 files changed

+483
-128
lines changed

9 files changed

+483
-128
lines changed

Include/cpython/pystats.h

+24-4
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,6 @@ typedef struct _object_stats {
8686
uint64_t type_cache_dunder_hits;
8787
uint64_t type_cache_dunder_misses;
8888
uint64_t type_cache_collisions;
89-
uint64_t optimization_attempts;
90-
uint64_t optimization_traces_created;
91-
uint64_t optimization_traces_executed;
92-
uint64_t optimization_uops_executed;
9389
/* Temporary value used during GC */
9490
uint64_t object_visits;
9591
} ObjectStats;
@@ -100,10 +96,34 @@ typedef struct _gc_stats {
10096
uint64_t objects_collected;
10197
} GCStats;
10298

99+
typedef struct _uop_stats {
100+
uint64_t execution_count;
101+
} UOpStats;
102+
103+
#define _Py_UOP_HIST_SIZE 32
104+
105+
typedef struct _optimization_stats {
106+
uint64_t attempts;
107+
uint64_t traces_created;
108+
uint64_t traces_executed;
109+
uint64_t uops_executed;
110+
uint64_t trace_stack_overflow;
111+
uint64_t trace_stack_underflow;
112+
uint64_t trace_too_long;
113+
uint64_t inner_loop;
114+
uint64_t recursive_call;
115+
UOpStats opcode[512];
116+
uint64_t unsupported_opcode[256];
117+
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
118+
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
119+
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
120+
} OptimizationStats;
121+
103122
typedef struct _stats {
104123
OpcodeStats opcode_stats[256];
105124
CallStats call_stats;
106125
ObjectStats object_stats;
126+
OptimizationStats optimization_stats;
107127
GCStats *gc_stats;
108128
} PyStats;
109129

Include/internal/pycore_code.h

+15
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,17 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
282282
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
283283
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
284284
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
285+
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
286+
#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
287+
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
288+
#define OPT_HIST(length, name) \
289+
do { \
290+
if (_Py_stats) { \
291+
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
292+
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
293+
_Py_stats->optimization_stats.name[bucket]++; \
294+
} \
295+
} while (0)
285296

286297
// Export for '_opcode' shared extension
287298
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -296,6 +307,10 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
296307
#define EVAL_CALL_STAT_INC(name) ((void)0)
297308
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
298309
#define GC_STAT_ADD(gen, name, n) ((void)0)
310+
#define OPT_STAT_INC(name) ((void)0)
311+
#define UOP_EXE_INC(opname) ((void)0)
312+
#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
313+
#define OPT_HIST(length, name) ((void)0)
299314
#endif // !Py_STATS
300315

301316
// Utility functions for reading/writing 32/64-bit values in the inline caches.

Python/bytecodes.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -2278,7 +2278,7 @@ dummy_func(
22782278
// Double-check that the opcode isn't instrumented or something:
22792279
here->op.code == JUMP_BACKWARD)
22802280
{
2281-
OBJECT_STAT_INC(optimization_attempts);
2281+
OPT_STAT_INC(attempts);
22822282
int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
22832283
ERROR_IF(optimized < 0, error);
22842284
if (optimized) {
@@ -3965,6 +3965,7 @@ dummy_func(
39653965
frame->prev_instr--; // Back up to just before destination
39663966
_PyFrame_SetStackPointer(frame, stack_pointer);
39673967
Py_DECREF(self);
3968+
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
39683969
return frame;
39693970
}
39703971

Python/executor.c

+23-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "opcode.h"
44

5+
#include "pycore_bitutils.h"
56
#include "pycore_call.h"
67
#include "pycore_ceval.h"
78
#include "pycore_dict.h"
@@ -30,6 +31,16 @@
3031
goto deoptimize; \
3132
}
3233

34+
#ifdef Py_STATS
35+
// Disable these macros that apply to Tier 1 stats when we are in Tier 2
36+
#undef STAT_INC
37+
#define STAT_INC(opname, name) ((void)0)
38+
#undef STAT_DEC
39+
#define STAT_DEC(opname, name) ((void)0)
40+
#undef CALL_STAT_INC
41+
#define CALL_STAT_INC(name) ((void)0)
42+
#endif
43+
3344
#undef ENABLE_SPECIALIZATION
3445
#define ENABLE_SPECIALIZATION 0
3546

@@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
6273

6374
CHECK_EVAL_BREAKER();
6475

65-
OBJECT_STAT_INC(optimization_traces_executed);
76+
OPT_STAT_INC(traces_executed);
6677
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
6778
int pc = 0;
6879
int opcode;
6980
int oparg;
7081
uint64_t operand;
82+
#ifdef Py_STATS
83+
uint64_t trace_uop_execution_counter = 0;
84+
#endif
7185

7286
for (;;) {
7387
opcode = self->trace[pc].opcode;
@@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
8195
operand,
8296
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
8397
pc++;
84-
OBJECT_STAT_INC(optimization_uops_executed);
98+
OPT_STAT_INC(uops_executed);
99+
UOP_EXE_INC(opcode);
100+
#ifdef Py_STATS
101+
trace_uop_execution_counter++;
102+
#endif
103+
85104
switch (opcode) {
86105

87106
#include "executor_cases.c.h"
@@ -114,6 +133,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
114133
// On ERROR_IF we return NULL as the frame.
115134
// The caller recovers the frame from tstate->current_frame.
116135
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
136+
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
117137
_PyFrame_SetStackPointer(frame, stack_pointer);
118138
Py_DECREF(self);
119139
return NULL;
@@ -122,6 +142,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
122142
// On DEOPT_IF we just repeat the last instruction.
123143
// This presumes nothing was popped from the stack (nor pushed).
124144
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
145+
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
125146
frame->prev_instr--; // Back up to just before destination
126147
_PyFrame_SetStackPointer(frame, stack_pointer);
127148
Py_DECREF(self);

Python/executor_cases.c.h

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/generated_cases.c.h

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

+11-2
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ translate_bytecode_to_trace(
461461
if (trace_length + (n) > max_length) { \
462462
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
463463
(opname), (n), max_length - trace_length); \
464+
OPT_STAT_INC(trace_too_long); \
464465
goto done; \
465466
} \
466467
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest
@@ -472,6 +473,7 @@ translate_bytecode_to_trace(
472473
#define TRACE_STACK_PUSH() \
473474
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
474475
DPRINTF(2, "Trace stack overflow\n"); \
476+
OPT_STAT_INC(trace_stack_overflow); \
475477
ADD_TO_TRACE(_SET_IP, 0, 0); \
476478
goto done; \
477479
} \
@@ -572,6 +574,7 @@ translate_bytecode_to_trace(
572574
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0);
573575
}
574576
else {
577+
OPT_STAT_INC(inner_loop);
575578
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
576579
}
577580
goto done;
@@ -638,7 +641,9 @@ translate_bytecode_to_trace(
638641
// LOAD_CONST + _POP_FRAME.
639642
if (trace_stack_depth == 0) {
640643
DPRINTF(2, "Trace stack underflow\n");
641-
goto done;}
644+
OPT_STAT_INC(trace_stack_underflow);
645+
goto done;
646+
}
642647
}
643648
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
644649
for (int i = 0; i < nuops; i++) {
@@ -713,6 +718,7 @@ translate_bytecode_to_trace(
713718
PyUnicode_AsUTF8(new_code->co_qualname),
714719
PyUnicode_AsUTF8(new_code->co_filename),
715720
new_code->co_firstlineno);
721+
OPT_STAT_INC(recursive_call);
716722
ADD_TO_TRACE(_SET_IP, 0, 0);
717723
goto done;
718724
}
@@ -744,6 +750,7 @@ translate_bytecode_to_trace(
744750
break;
745751
}
746752
DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
753+
OPT_UNSUPPORTED_OPCODE(opcode);
747754
goto done; // Break out of loop
748755
} // End default
749756

@@ -891,7 +898,8 @@ uop_optimize(
891898
// Error or nothing translated
892899
return trace_length;
893900
}
894-
OBJECT_STAT_INC(optimization_traces_created);
901+
OPT_HIST(trace_length, trace_length_hist);
902+
OPT_STAT_INC(traces_created);
895903
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
896904
if (uop_optimize != NULL && *uop_optimize > '0') {
897905
trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
@@ -901,6 +909,7 @@ uop_optimize(
901909
if (executor == NULL) {
902910
return -1;
903911
}
912+
OPT_HIST(trace_length, optimized_trace_length_hist);
904913
executor->base.execute = _PyUopExecute;
905914
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
906915
*exec_ptr = (_PyExecutorObject *)executor;

Python/specialize.c

+50-4
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
199199
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
200200
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
201201
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
202-
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
203-
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
204-
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
205-
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
206202
}
207203

208204
static void
@@ -215,13 +211,63 @@ print_gc_stats(FILE *out, GCStats *stats)
215211
}
216212
}
217213

214+
static void
215+
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
216+
{
217+
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
218+
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
219+
}
220+
}
221+
222+
static void
223+
print_optimization_stats(FILE *out, OptimizationStats *stats)
224+
{
225+
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
226+
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
227+
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
228+
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
229+
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
230+
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
231+
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
232+
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
233+
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
234+
235+
print_histogram(out, "Trace length", stats->trace_length_hist);
236+
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
237+
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
238+
239+
const char* const* names;
240+
for (int i = 0; i < 512; i++) {
241+
if (i < 256) {
242+
names = _PyOpcode_OpName;
243+
} else {
244+
names = _PyOpcode_uop_name;
245+
}
246+
if (stats->opcode[i].execution_count) {
247+
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
248+
}
249+
}
250+
251+
for (int i = 0; i < 256; i++) {
252+
if (stats->unsupported_opcode[i]) {
253+
fprintf(
254+
out,
255+
"unsupported_opcode[%s].count : %" PRIu64 "\n",
256+
_PyOpcode_OpName[i],
257+
stats->unsupported_opcode[i]
258+
);
259+
}
260+
}
261+
}
262+
218263
static void
219264
print_stats(FILE *out, PyStats *stats)
220265
{
221266
print_spec_stats(out, stats->opcode_stats);
222267
print_call_stats(out, &stats->call_stats);
223268
print_object_stats(out, &stats->object_stats);
224269
print_gc_stats(out, stats->gc_stats);
270+
print_optimization_stats(out, &stats->optimization_stats);
225271
}
226272

227273
void

0 commit comments

Comments
 (0)