gh-109329: Support for basic pystats for Tier 2

mdboom · mdboom · commit 7ba5f5527dc4 · 2023-09-26T14:40:01.000-04:00
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
@@ -86,10 +86,6 @@ typedef struct _object_stats {
     uint64_t type_cache_dunder_hits;
     uint64_t type_cache_dunder_misses;
     uint64_t type_cache_collisions;
-    uint64_t optimization_attempts;
-    uint64_t optimization_traces_created;
-    uint64_t optimization_traces_executed;
-    uint64_t optimization_uops_executed;
     /* Temporary value used during GC */
     uint64_t object_visits;
 } ObjectStats;
@@ -100,10 +96,24 @@ typedef struct _gc_stats {
     uint64_t objects_collected;
 } GCStats;
 
+typedef struct _uop_stats {
+    uint64_t execution_count;
+    uint64_t miss;
+} UOpStats;
+
+typedef struct _optimization_stats {
+    uint64_t attempts;
+    uint64_t traces_created;
+    uint64_t traces_executed;
+    uint64_t uops_executed;
+    UOpStats opcode[512];
+} OptimizationStats;
+
 typedef struct _stats {
     OpcodeStats opcode_stats[256];
     CallStats call_stats;
     ObjectStats object_stats;
+    OptimizationStats optimization_stats;
     GCStats *gc_stats;
 } PyStats;
 
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
@@ -282,6 +282,9 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
     do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
 #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
+#define OPTIMIZATION_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
+#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
+#define UOP_STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].name++; } while (0)
 
 // Export for '_opcode' shared extension
 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -296,6 +299,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
 #define EVAL_CALL_STAT_INC(name) ((void)0)
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
 #define GC_STAT_ADD(gen, name, n) ((void)0)
+#define OPTIMIZATION_STAT_INC(name) ((void)0)
+#define UOP_EXE_INC(opname) ((void)0)
+#define UOP_STAT_INC(opname, name) ((void)0)
 #endif  // !Py_STATS
 
 // Utility functions for reading/writing 32/64-bit values in the inline caches.
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
@@ -2244,7 +2244,7 @@ dummy_func(
                 // Double-check that the opcode isn't instrumented or something:
                 here->op.code == JUMP_BACKWARD)
             {
-                OBJECT_STAT_INC(optimization_attempts);
+                OPTIMIZATION_STAT_INC(attempts);
                 int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
                 ERROR_IF(optimized < 0, error);
                 if (optimized) {
diff --git a/Python/executor.c b/Python/executor.c
@@ -62,7 +62,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
 
     CHECK_EVAL_BREAKER();
 
-    OBJECT_STAT_INC(optimization_traces_executed);
+    OPTIMIZATION_STAT_INC(traces_executed);
     _Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
     int pc = 0;
     int opcode;
@@ -81,7 +81,9 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
                 operand,
                 (int)(stack_pointer - _PyFrame_Stackbase(frame)));
         pc++;
-        OBJECT_STAT_INC(optimization_uops_executed);
+        OPTIMIZATION_STAT_INC(uops_executed);
+        assert(opcode < 512);
+        UOP_EXE_INC(opcode);
         switch (opcode) {
 
 #include "executor_cases.c.h"
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
diff --git a/Python/optimizer.c b/Python/optimizer.c
@@ -891,7 +891,7 @@ uop_optimize(
         // Error or nothing translated
         return trace_length;
     }
-    OBJECT_STAT_INC(optimization_traces_created);
+    OPTIMIZATION_STAT_INC(traces_created);
     char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
     if (uop_optimize != NULL && *uop_optimize > '0') {
         trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
diff --git a/Python/specialize.c b/Python/specialize.c
@@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
     fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
     fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
     fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
-    fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
-    fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
-    fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
-    fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
 }
 
 static void
@@ -215,13 +211,35 @@ print_gc_stats(FILE *out, GCStats *stats)
     }
 }
 
+static void
+print_optimization_stats(FILE *out, OptimizationStats *stats)
+{
+    fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
+    fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
+    fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
+    fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
+    
+    char** names;
+    for (int i = 0; i < 512; i++) {
+        if (i < 256) {
+            names = _PyOpcode_OpName;
+        } else {
+            names = _PyOpcode_uop_name;
+        }
+        if (stats->opcode[i].execution_count) {
+            fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
+        }
+    }
+}
+
 static void
 print_stats(FILE *out, PyStats *stats)
 {
     print_spec_stats(out, stats->opcode_stats);
     print_call_stats(out, &stats->call_stats);
     print_object_stats(out, &stats->object_stats);
     print_gc_stats(out, stats->gc_stats);
+    print_optimization_stats(out, &stats->optimization_stats);
 }
 
 void
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
@@ -211,12 +211,12 @@ def gather_stats(input):
     else:
         raise ValueError(f"{input:r} is not a file or directory path")
 
-def extract_opcode_stats(stats):
+def extract_opcode_stats(stats, prefix):
     opcode_stats = collections.defaultdict(dict)
     for key, value in stats.items():
-        if not key.startswith("opcode"):
+        if not key.startswith(prefix):
             continue
-        name, _, rest = key[7:].partition("]")
+        name, _, rest = key[len(prefix) + 2:].partition("]")
         opcode_stats[name][rest.strip(".")] = value
     return opcode_stats
 
@@ -350,35 +350,38 @@ def emit_execution_counts(opcode_stats, total):
             rows
         )
 
+def _emit_comparative_execution_counts(base_rows, head_rows):
+    base_data = dict((x[0], x[1:]) for x in base_rows)
+    head_data = dict((x[0], x[1:]) for x in head_rows)
+    opcodes = set(base_data.keys()) | set(head_data.keys())
+
+    rows = []
+    default = [0, "0.0%", "0.0%", 0]
+    for opcode in opcodes:
+        base_entry = base_data.get(opcode, default)
+        head_entry = head_data.get(opcode, default)
+        if base_entry[0] == 0:
+            change = 1
+        else:
+            change = (head_entry[0] - base_entry[0]) / base_entry[0]
+        rows.append(
+            (opcode, base_entry[0], head_entry[0],
+                f"{100*change:0.1f}%"))
+
+    rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
+
+    emit_table(
+        ("Name", "Base Count:", "Head Count:", "Change:"),
+        rows
+    )
+
 def emit_comparative_execution_counts(
-    base_opcode_stats, base_total, head_opcode_stats, head_total
+    base_opcode_stats, base_total, head_opcode_stats, head_total, level=2
 ):
-    with Section("Execution counts", summary="execution counts for all instructions"):
+    with Section("Execution counts", summary="execution counts for all instructions", level=level):
         base_rows = calculate_execution_counts(base_opcode_stats, base_total)
         head_rows = calculate_execution_counts(head_opcode_stats, head_total)
-        base_data = dict((x[0], x[1:]) for x in base_rows)
-        head_data = dict((x[0], x[1:]) for x in head_rows)
-        opcodes = set(base_data.keys()) | set(head_data.keys())
-
-        rows = []
-        default = [0, "0.0%", "0.0%", 0]
-        for opcode in opcodes:
-            base_entry = base_data.get(opcode, default)
-            head_entry = head_data.get(opcode, default)
-            if base_entry[0] == 0:
-                change = 1
-            else:
-                change = (head_entry[0] - base_entry[0]) / base_entry[0]
-            rows.append(
-                (opcode, base_entry[0], head_entry[0],
-                 f"{100*change:0.1f}%"))
-
-        rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
-
-        emit_table(
-            ("Name", "Base Count:", "Head Count:", "Change:"),
-            rows
-        )
+        _emit_comparative_execution_counts(base_rows, head_rows)
 
 def get_defines():
     spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
@@ -611,8 +614,76 @@ def emit_pair_counts(opcode_stats, total):
                     succ_rows
                 )
 
+
+def calculate_optimization_stats(stats):
+    attempts = stats["Optimization attempts"]
+    created = stats["Optimization traces created"]
+    executed = stats["Optimization traces executed"]
+    uops = stats["Optimization uops executed"]
+
+    return [
+        ("Optimization attempts", attempts, ""),
+        (
+            "Traces created", created, 
+            format_ratio(created, attempts)
+        ),
+        ("Traces executed", executed, ""),
+        ("Uops executed", uops, format_ratio(uops, executed))
+    ]
+
+
+def calculate_uop_execution_counts(opcode_stats):
+    total = 0
+    counts = []
+    for name, opcode_stat in opcode_stats.items():
+        if "execution_count" in opcode_stat:
+            count = opcode_stat['execution_count']
+            counts.append((count, name))
+            total += count
+    counts.sort(reverse=True)
+    cumulative = 0
+    rows = []
+    for (count, name) in counts:
+        cumulative += count
+        rows.append((name, count, format_ratio(count, total),
+                     format_ratio(cumulative, total)))
+    return rows
+
+
+def emit_optimization_stats(stats):
+    uop_stats = extract_opcode_stats(stats, "uop")
+
+    with Section("Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer"):
+        with Section("Overall stats", level=3):
+            rows = calculate_optimization_stats(stats)
+            emit_table(("", "Count:", "Ratio:"), rows)
+
+        with Section("Uop stats", level=3):
+            rows = calculate_uop_execution_counts(uop_stats)
+            emit_table(
+                ("Uop", "Count:", "Self:", "Cumulative:"), 
+                rows
+            )
+
+
+def emit_comparative_optimization_stats(base_stats, head_stats):
+    base_uop_stats = extract_opcode_stats(base_stats, "uop")
+    head_uop_stats = extract_opcode_stats(head_stats, "uop")
+
+    with Section("Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer"):
+        with Section("Overall stats", level=3):
+            base_rows = calculate_optimization_stats(base_stats)
+            head_rows = calculate_optimization_stats(head_stats)
+            emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
+
+        with Section("Uop stats", level=3):
+            base_rows = calculate_uop_execution_counts(base_uop_stats)
+            head_rows = calculate_uop_execution_counts(head_uop_stats)
+            _emit_comparative_execution_counts(base_rows, head_rows)
+
+
 def output_single_stats(stats):
-    opcode_stats = extract_opcode_stats(stats)
+    opcode_stats = extract_opcode_stats(stats, "opcode")
     total = get_total(opcode_stats)
     emit_execution_counts(opcode_stats, total)
     emit_pair_counts(opcode_stats, total)
@@ -621,15 +692,16 @@ def output_single_stats(stats):
     emit_call_stats(stats, stats["_stats_defines"])
     emit_object_stats(stats)
     emit_gc_stats(stats)
+    emit_optimization_stats(stats)
     with Section("Meta stats", summary="Meta statistics"):
         emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
 
 
 def output_comparative_stats(base_stats, head_stats):
-    base_opcode_stats = extract_opcode_stats(base_stats)
+    base_opcode_stats = extract_opcode_stats(base_stats, "opcode")
     base_total = get_total(base_opcode_stats)
 
-    head_opcode_stats = extract_opcode_stats(head_stats)
+    head_opcode_stats = extract_opcode_stats(head_stats, "opcode")
     head_total = get_total(head_opcode_stats)
 
     emit_comparative_execution_counts(
@@ -645,6 +717,7 @@ def output_comparative_stats(base_stats, head_stats):
     emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"])
     emit_comparative_object_stats(base_stats, head_stats)
     emit_comparative_gc_stats(base_stats, head_stats)
+    emit_comparative_optimization_stats(base_stats, head_stats)
 
 def output_stats(inputs, json_output=None):
     if len(inputs) == 1:

Original file line number	Diff line number	Diff line change
`@@ -2244,7 +2244,7 @@ dummy_func(`
`2244`	`2244`	`// Double-check that the opcode isn't instrumented or something:`
`2245`	`2245`	`here->op.code == JUMP_BACKWARD)`
`2246`	`2246`	`{`
`2247`		`- OBJECT_STAT_INC(optimization_attempts);`
	`2247`	`+ OPTIMIZATION_STAT_INC(attempts);`
`2248`	`2248`	`int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);`
`2249`	`2249`	`ERROR_IF(optimized < 0, error);`
`2250`	`2250`	`if (optimized) {`
Original file line number	Diff line number	Diff line change
`@@ -891,7 +891,7 @@ uop_optimize(`
`891`	`891`	`// Error or nothing translated`
`892`	`892`	`return trace_length;`
`893`	`893`	`}`
`894`		`- OBJECT_STAT_INC(optimization_traces_created);`
	`894`	`+ OPTIMIZATION_STAT_INC(traces_created);`
`895`	`895`	`char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");`
`896`	`896`	`if (uop_optimize != NULL && *uop_optimize > '0') {`
`897`	`897`	`trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);`