Skip to content

Commit 7ba5f55

Browse files
committed
gh-109329: Support for basic pystats for Tier 2
1 parent ecd813f commit 7ba5f55

File tree

8 files changed

+153
-44
lines changed

8 files changed

+153
-44
lines changed

Include/cpython/pystats.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,6 @@ typedef struct _object_stats {
8686
uint64_t type_cache_dunder_hits;
8787
uint64_t type_cache_dunder_misses;
8888
uint64_t type_cache_collisions;
89-
uint64_t optimization_attempts;
90-
uint64_t optimization_traces_created;
91-
uint64_t optimization_traces_executed;
92-
uint64_t optimization_uops_executed;
9389
/* Temporary value used during GC */
9490
uint64_t object_visits;
9591
} ObjectStats;
@@ -100,10 +96,24 @@ typedef struct _gc_stats {
10096
uint64_t objects_collected;
10197
} GCStats;
10298

99+
typedef struct _uop_stats {
100+
uint64_t execution_count;
101+
uint64_t miss;
102+
} UOpStats;
103+
104+
typedef struct _optimization_stats {
105+
uint64_t attempts;
106+
uint64_t traces_created;
107+
uint64_t traces_executed;
108+
uint64_t uops_executed;
109+
UOpStats opcode[512];
110+
} OptimizationStats;
111+
103112
typedef struct _stats {
104113
OpcodeStats opcode_stats[256];
105114
CallStats call_stats;
106115
ObjectStats object_stats;
116+
OptimizationStats optimization_stats;
107117
GCStats *gc_stats;
108118
} PyStats;
109119

Include/internal/pycore_code.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
282282
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
283283
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
284284
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
285+
#define OPTIMIZATION_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
286+
#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
287+
#define UOP_STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].name++; } while (0)
285288

286289
// Export for '_opcode' shared extension
287290
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -296,6 +299,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
296299
#define EVAL_CALL_STAT_INC(name) ((void)0)
297300
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
298301
#define GC_STAT_ADD(gen, name, n) ((void)0)
302+
#define OPTIMIZATION_STAT_INC(name) ((void)0)
303+
#define UOP_EXE_INC(opname) ((void)0)
304+
#define UOP_STAT_INC(opname, name) ((void)0)
299305
#endif // !Py_STATS
300306

301307
// Utility functions for reading/writing 32/64-bit values in the inline caches.

Python/bytecodes.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2244,7 +2244,7 @@ dummy_func(
22442244
// Double-check that the opcode isn't instrumented or something:
22452245
here->op.code == JUMP_BACKWARD)
22462246
{
2247-
OBJECT_STAT_INC(optimization_attempts);
2247+
OPTIMIZATION_STAT_INC(attempts);
22482248
int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
22492249
ERROR_IF(optimized < 0, error);
22502250
if (optimized) {

Python/executor.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
6262

6363
CHECK_EVAL_BREAKER();
6464

65-
OBJECT_STAT_INC(optimization_traces_executed);
65+
OPTIMIZATION_STAT_INC(traces_executed);
6666
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
6767
int pc = 0;
6868
int opcode;
@@ -81,7 +81,9 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
8181
operand,
8282
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
8383
pc++;
84-
OBJECT_STAT_INC(optimization_uops_executed);
84+
OPTIMIZATION_STAT_INC(uops_executed);
85+
assert(opcode < 512);
86+
UOP_EXE_INC(opcode);
8587
switch (opcode) {
8688

8789
#include "executor_cases.c.h"

Python/generated_cases.c.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -891,7 +891,7 @@ uop_optimize(
891891
// Error or nothing translated
892892
return trace_length;
893893
}
894-
OBJECT_STAT_INC(optimization_traces_created);
894+
OPTIMIZATION_STAT_INC(traces_created);
895895
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
896896
if (uop_optimize != NULL && *uop_optimize > '0') {
897897
trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);

Python/specialize.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
199199
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
200200
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
201201
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
202-
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
203-
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
204-
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
205-
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
206202
}
207203

208204
static void
@@ -215,13 +211,35 @@ print_gc_stats(FILE *out, GCStats *stats)
215211
}
216212
}
217213

214+
static void
215+
print_optimization_stats(FILE *out, OptimizationStats *stats)
216+
{
217+
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
218+
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
219+
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
220+
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
221+
222+
char** names;
223+
for (int i = 0; i < 512; i++) {
224+
if (i < 256) {
225+
names = _PyOpcode_OpName;
226+
} else {
227+
names = _PyOpcode_uop_name;
228+
}
229+
if (stats->opcode[i].execution_count) {
230+
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
231+
}
232+
}
233+
}
234+
218235
static void
219236
print_stats(FILE *out, PyStats *stats)
220237
{
221238
print_spec_stats(out, stats->opcode_stats);
222239
print_call_stats(out, &stats->call_stats);
223240
print_object_stats(out, &stats->object_stats);
224241
print_gc_stats(out, stats->gc_stats);
242+
print_optimization_stats(out, &stats->optimization_stats);
225243
}
226244

227245
void

Tools/scripts/summarize_stats.py

Lines changed: 104 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,12 @@ def gather_stats(input):
211211
else:
212212
raise ValueError(f"{input:r} is not a file or directory path")
213213

214-
def extract_opcode_stats(stats):
214+
def extract_opcode_stats(stats, prefix):
215215
opcode_stats = collections.defaultdict(dict)
216216
for key, value in stats.items():
217-
if not key.startswith("opcode"):
217+
if not key.startswith(prefix):
218218
continue
219-
name, _, rest = key[7:].partition("]")
219+
name, _, rest = key[len(prefix) + 2:].partition("]")
220220
opcode_stats[name][rest.strip(".")] = value
221221
return opcode_stats
222222

@@ -350,35 +350,38 @@ def emit_execution_counts(opcode_stats, total):
350350
rows
351351
)
352352

353+
def _emit_comparative_execution_counts(base_rows, head_rows):
354+
base_data = dict((x[0], x[1:]) for x in base_rows)
355+
head_data = dict((x[0], x[1:]) for x in head_rows)
356+
opcodes = set(base_data.keys()) | set(head_data.keys())
357+
358+
rows = []
359+
default = [0, "0.0%", "0.0%", 0]
360+
for opcode in opcodes:
361+
base_entry = base_data.get(opcode, default)
362+
head_entry = head_data.get(opcode, default)
363+
if base_entry[0] == 0:
364+
change = 1
365+
else:
366+
change = (head_entry[0] - base_entry[0]) / base_entry[0]
367+
rows.append(
368+
(opcode, base_entry[0], head_entry[0],
369+
f"{100*change:0.1f}%"))
370+
371+
rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
372+
373+
emit_table(
374+
("Name", "Base Count:", "Head Count:", "Change:"),
375+
rows
376+
)
377+
353378
def emit_comparative_execution_counts(
354-
base_opcode_stats, base_total, head_opcode_stats, head_total
379+
base_opcode_stats, base_total, head_opcode_stats, head_total, level=2
355380
):
356-
with Section("Execution counts", summary="execution counts for all instructions"):
381+
with Section("Execution counts", summary="execution counts for all instructions", level=level):
357382
base_rows = calculate_execution_counts(base_opcode_stats, base_total)
358383
head_rows = calculate_execution_counts(head_opcode_stats, head_total)
359-
base_data = dict((x[0], x[1:]) for x in base_rows)
360-
head_data = dict((x[0], x[1:]) for x in head_rows)
361-
opcodes = set(base_data.keys()) | set(head_data.keys())
362-
363-
rows = []
364-
default = [0, "0.0%", "0.0%", 0]
365-
for opcode in opcodes:
366-
base_entry = base_data.get(opcode, default)
367-
head_entry = head_data.get(opcode, default)
368-
if base_entry[0] == 0:
369-
change = 1
370-
else:
371-
change = (head_entry[0] - base_entry[0]) / base_entry[0]
372-
rows.append(
373-
(opcode, base_entry[0], head_entry[0],
374-
f"{100*change:0.1f}%"))
375-
376-
rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
377-
378-
emit_table(
379-
("Name", "Base Count:", "Head Count:", "Change:"),
380-
rows
381-
)
384+
_emit_comparative_execution_counts(base_rows, head_rows)
382385

383386
def get_defines():
384387
spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
@@ -611,8 +614,76 @@ def emit_pair_counts(opcode_stats, total):
611614
succ_rows
612615
)
613616

617+
618+
def calculate_optimization_stats(stats):
619+
attempts = stats["Optimization attempts"]
620+
created = stats["Optimization traces created"]
621+
executed = stats["Optimization traces executed"]
622+
uops = stats["Optimization uops executed"]
623+
624+
return [
625+
("Optimization attempts", attempts, ""),
626+
(
627+
"Traces created", created,
628+
format_ratio(created, attempts)
629+
),
630+
("Traces executed", executed, ""),
631+
("Uops executed", uops, format_ratio(uops, executed))
632+
]
633+
634+
635+
def calculate_uop_execution_counts(opcode_stats):
636+
total = 0
637+
counts = []
638+
for name, opcode_stat in opcode_stats.items():
639+
if "execution_count" in opcode_stat:
640+
count = opcode_stat['execution_count']
641+
counts.append((count, name))
642+
total += count
643+
counts.sort(reverse=True)
644+
cumulative = 0
645+
rows = []
646+
for (count, name) in counts:
647+
cumulative += count
648+
rows.append((name, count, format_ratio(count, total),
649+
format_ratio(cumulative, total)))
650+
return rows
651+
652+
653+
def emit_optimization_stats(stats):
654+
uop_stats = extract_opcode_stats(stats, "uop")
655+
656+
with Section("Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer"):
657+
with Section("Overall stats", level=3):
658+
rows = calculate_optimization_stats(stats)
659+
emit_table(("", "Count:", "Ratio:"), rows)
660+
661+
with Section("Uop stats", level=3):
662+
rows = calculate_uop_execution_counts(uop_stats)
663+
emit_table(
664+
("Uop", "Count:", "Self:", "Cumulative:"),
665+
rows
666+
)
667+
668+
669+
def emit_comparative_optimization_stats(base_stats, head_stats):
670+
base_uop_stats = extract_opcode_stats(base_stats, "uop")
671+
head_uop_stats = extract_opcode_stats(head_stats, "uop")
672+
673+
with Section("Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer"):
674+
with Section("Overall stats", level=3):
675+
base_rows = calculate_optimization_stats(base_stats)
676+
head_rows = calculate_optimization_stats(head_stats)
677+
emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
678+
679+
with Section("Uop stats", level=3):
680+
base_rows = calculate_uop_execution_counts(base_uop_stats)
681+
head_rows = calculate_uop_execution_counts(head_uop_stats)
682+
_emit_comparative_execution_counts(base_rows, head_rows)
683+
684+
614685
def output_single_stats(stats):
615-
opcode_stats = extract_opcode_stats(stats)
686+
opcode_stats = extract_opcode_stats(stats, "opcode")
616687
total = get_total(opcode_stats)
617688
emit_execution_counts(opcode_stats, total)
618689
emit_pair_counts(opcode_stats, total)
@@ -621,15 +692,16 @@ def output_single_stats(stats):
621692
emit_call_stats(stats, stats["_stats_defines"])
622693
emit_object_stats(stats)
623694
emit_gc_stats(stats)
695+
emit_optimization_stats(stats)
624696
with Section("Meta stats", summary="Meta statistics"):
625697
emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
626698

627699

628700
def output_comparative_stats(base_stats, head_stats):
629-
base_opcode_stats = extract_opcode_stats(base_stats)
701+
base_opcode_stats = extract_opcode_stats(base_stats, "opcode")
630702
base_total = get_total(base_opcode_stats)
631703

632-
head_opcode_stats = extract_opcode_stats(head_stats)
704+
head_opcode_stats = extract_opcode_stats(head_stats, "opcode")
633705
head_total = get_total(head_opcode_stats)
634706

635707
emit_comparative_execution_counts(
@@ -645,6 +717,7 @@ def output_comparative_stats(base_stats, head_stats):
645717
emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"])
646718
emit_comparative_object_stats(base_stats, head_stats)
647719
emit_comparative_gc_stats(base_stats, head_stats)
720+
emit_comparative_optimization_stats(base_stats, head_stats)
648721

649722
def output_stats(inputs, json_output=None):
650723
if len(inputs) == 1:

0 commit comments

Comments
 (0)