GH-109330: Dump and compare stats using opcode names, not numbers (GH-109335)

mdboom · web-flow · commit 5dcbbd8861e6 · 2023-09-12T14:12:57.000-07:00
diff --git a/Python/specialize.c b/Python/specialize.c
@@ -123,19 +123,19 @@ _Py_GetSpecializationStats(void) {
 
 #define PRINT_STAT(i, field) \
     if (stats[i].field) { \
-        fprintf(out, "    opcode[%d]." #field " : %" PRIu64 "\n", i, stats[i].field); \
+        fprintf(out, "    opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
     }
 
 static void
 print_spec_stats(FILE *out, OpcodeStats *stats)
 {
     /* Mark some opcodes as specializable for stats,
      * even though we don't specialize them yet. */
-    fprintf(out, "opcode[%d].specializable : 1\n", BINARY_SLICE);
-    fprintf(out, "opcode[%d].specializable : 1\n", STORE_SLICE);
+    fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
+    fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
     for (int i = 0; i < 256; i++) {
         if (_PyOpcode_Caches[i]) {
-            fprintf(out, "opcode[%d].specializable : 1\n", i);
+            fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
         }
         PRINT_STAT(i, specialization.success);
         PRINT_STAT(i, specialization.failure);
@@ -147,14 +147,14 @@ print_spec_stats(FILE *out, OpcodeStats *stats)
         for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
             uint64_t val = stats[i].specialization.failure_kinds[j];
             if (val) {
-                fprintf(out, "    opcode[%d].specialization.failure_kinds[%d] : %"
-                    PRIu64 "\n", i, j, val);
+                fprintf(out, "    opcode[%s].specialization.failure_kinds[%d] : %"
+                    PRIu64 "\n", _PyOpcode_OpName[i], j, val);
             }
         }
         for (int j = 0; j < 256; j++) {
             if (stats[i].pair_count[j]) {
-                fprintf(out, "opcode[%d].pair_count[%d] : %" PRIu64 "\n",
-                        i, j, stats[i].pair_count[j]);
+                fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
+                        _PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
             }
         }
     }
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
@@ -16,22 +16,6 @@
 else:
     DEFAULT_DIR = "/tmp/py_stats/"
 
-#Create list of all instruction names
-specialized = iter(opcode._specialized_opmap.keys())
-opname = ["<0>"]
-for name in opcode.opname[1:]:
-    if name.startswith("<"):
-        try:
-            name = next(specialized)
-        except StopIteration:
-            pass
-    opname.append(name)
-
-# opcode_name --> opcode
-# Sort alphabetically.
-opmap = {name: i for i, name in enumerate(opname)}
-opmap = dict(sorted(opmap.items()))
-
 TOTAL = "specialization.hit", "specialization.miss", "execution_count"
 
 def format_ratio(num, den):
@@ -200,12 +184,12 @@ def gather_stats(input):
         raise ValueError(f"{input:r} is not a file or directory path")
 
 def extract_opcode_stats(stats):
-    opcode_stats = [ {} for _ in range(256) ]
+    opcode_stats = collections.defaultdict(dict)
     for key, value in stats.items():
         if not key.startswith("opcode"):
             continue
-        n, _, rest = key[7:].partition("]")
-        opcode_stats[int(n)][rest.strip(".")] = value
+        name, _, rest = key[7:].partition("]")
+        opcode_stats[name][rest.strip(".")] = value
     return opcode_stats
 
 def parse_kinds(spec_src, prefix="SPEC_FAIL"):
@@ -246,11 +230,10 @@ def categorized_counts(opcode_stats):
     specialized_instructions = {
         op for op in opcode._specialized_opmap.keys()
         if "__" not in op}
-    for i, opcode_stat in enumerate(opcode_stats):
+    for name, opcode_stat in opcode_stats.items():
         if "execution_count" not in opcode_stat:
             continue
         count = opcode_stat['execution_count']
-        name = opname[i]
         if "specializable" in opcode_stat:
             not_specialized += count
         elif name in specialized_instructions:
@@ -314,13 +297,13 @@ def emit_table(header, rows):
 
 def calculate_execution_counts(opcode_stats, total):
     counts = []
-    for i, opcode_stat in enumerate(opcode_stats):
+    for name, opcode_stat in opcode_stats.items():
         if "execution_count" in opcode_stat:
             count = opcode_stat['execution_count']
             miss = 0
             if "specializable" not in opcode_stat:
                 miss = opcode_stat.get("specialization.miss")
-            counts.append((count, opname[i], miss))
+            counts.append((count, name, miss))
     counts.sort(reverse=True)
     cumulative = 0
     rows = []
@@ -381,16 +364,17 @@ def get_defines():
 def emit_specialization_stats(opcode_stats):
     defines = get_defines()
     with Section("Specialization stats", summary="specialization stats by family"):
-        for i, opcode_stat in enumerate(opcode_stats):
-            name = opname[i]
+        for name, opcode_stat in opcode_stats.items():
             print_specialization_stats(name, opcode_stat, defines)
 
 def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
     defines = get_defines()
     with Section("Specialization stats", summary="specialization stats by family"):
-        for i, (base_opcode_stat, head_opcode_stat) in enumerate(zip(base_opcode_stats, head_opcode_stats)):
-            name = opname[i]
-            print_comparative_specialization_stats(name, base_opcode_stat, head_opcode_stat, defines)
+        opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys())
+        for opcode in opcodes:
+            print_comparative_specialization_stats(
+                opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines
+            )
 
 def calculate_specialization_effectiveness(opcode_stats, total):
     basic, not_specialized, specialized = categorized_counts(opcode_stats)
@@ -407,12 +391,12 @@ def emit_specialization_overview(opcode_stats, total):
         for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
             total = 0
             counts = []
-            for i, opcode_stat in enumerate(opcode_stats):
+            for name, opcode_stat in opcode_stats.items():
                 # Avoid double counting misses
                 if title == "Misses" and "specializable" in opcode_stat:
                     continue
                 value = opcode_stat.get(field, 0)
-                counts.append((value, opname[i]))
+                counts.append((value, name))
                 total += value
             counts.sort(reverse=True)
             if total:
@@ -539,29 +523,27 @@ def emit_comparative_gc_stats(base_stats, head_stats):
 
 def get_total(opcode_stats):
     total = 0
-    for opcode_stat in opcode_stats:
+    for opcode_stat in opcode_stats.values():
         if "execution_count" in opcode_stat:
             total += opcode_stat['execution_count']
     return total
 
 def emit_pair_counts(opcode_stats, total):
     pair_counts = []
-    for i, opcode_stat in enumerate(opcode_stats):
-        if i == 0:
-            continue
+    for name_i, opcode_stat in opcode_stats.items():
         for key, value in opcode_stat.items():
             if key.startswith("pair_count"):
-                x, _, _ = key[11:].partition("]")
+                name_j, _, _ = key[11:].partition("]")
                 if value:
-                    pair_counts.append((value, (i, int(x))))
+                    pair_counts.append((value, (name_i, name_j)))
     with Section("Pair counts", summary="Pair counts for top 100 pairs"):
         pair_counts.sort(reverse=True)
         cumulative = 0
         rows = []
         for (count, pair) in itertools.islice(pair_counts, 100):
-            i, j = pair
+            name_i, name_j = pair
             cumulative += count
-            rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total),
+            rows.append((f"{name_i} {name_j}", count, format_ratio(count, total),
                          format_ratio(cumulative, total)))
         emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
             rows
@@ -577,18 +559,18 @@ def emit_pair_counts(opcode_stats, total):
                 successors[first][second] = count
                 total_predecessors[second] += count
                 total_successors[first] += count
-        for name, i in opmap.items():
-            total1 = total_predecessors[i]
-            total2 = total_successors[i]
+        for name in opcode_stats.keys():
+            total1 = total_predecessors[name]
+            total2 = total_successors[name]
             if total1 == 0 and total2 == 0:
                 continue
             pred_rows = succ_rows = ()
             if total1:
-                pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
-                             for (pred, count) in predecessors[i].most_common(5)]
+                pred_rows = [(pred, count, f"{count/total1:.1%}")
+                             for (pred, count) in predecessors[name].most_common(5)]
             if total2:
-                succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
-                             for (succ, count) in successors[i].most_common(5)]
+                succ_rows = [(succ, count, f"{count/total2:.1%}")
+                             for (succ, count) in successors[name].most_common(5)]
             with Section(name, 3, f"Successors and predecessors for {name}"):
                 emit_table(("Predecessors", "Count:", "Percentage:"),
                     pred_rows

Original file line number	Diff line number	Diff line change
`@@ -123,19 +123,19 @@ _Py_GetSpecializationStats(void) {`
`123`	`123`
`124`	`124`	`#define PRINT_STAT(i, field) \`
`125`	`125`	`if (stats[i].field) { \`
`126`		`- fprintf(out, " opcode[%d]." #field " : %" PRIu64 "\n", i, stats[i].field); \`
	`126`	`+ fprintf(out, " opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \`
`127`	`127`	`}`
`128`	`128`
`129`	`129`	`static void`
`130`	`130`	`print_spec_stats(FILE out, OpcodeStats stats)`
`131`	`131`	`{`
`132`	`132`	`/* Mark some opcodes as specializable for stats,`
`133`	`133`	`* even though we don't specialize them yet. */`
`134`		`- fprintf(out, "opcode[%d].specializable : 1\n", BINARY_SLICE);`
`135`		`- fprintf(out, "opcode[%d].specializable : 1\n", STORE_SLICE);`
	`134`	`+ fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");`
	`135`	`+ fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");`
`136`	`136`	`for (int i = 0; i < 256; i++) {`
`137`	`137`	`if (_PyOpcode_Caches[i]) {`
`138`		`- fprintf(out, "opcode[%d].specializable : 1\n", i);`
	`138`	`+ fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);`
`139`	`139`	`}`
`140`	`140`	`PRINT_STAT(i, specialization.success);`
`141`	`141`	`PRINT_STAT(i, specialization.failure);`
`@@ -147,14 +147,14 @@ print_spec_stats(FILE out, OpcodeStats stats)`
`147`	`147`	`for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {`
`148`	`148`	`uint64_t val = stats[i].specialization.failure_kinds[j];`
`149`	`149`	`if (val) {`
`150`		`- fprintf(out, " opcode[%d].specialization.failure_kinds[%d] : %"`
`151`		`- PRIu64 "\n", i, j, val);`
	`150`	`+ fprintf(out, " opcode[%s].specialization.failure_kinds[%d] : %"`
	`151`	`+ PRIu64 "\n", _PyOpcode_OpName[i], j, val);`
`152`	`152`	`}`
`153`	`153`	`}`
`154`	`154`	`for (int j = 0; j < 256; j++) {`
`155`	`155`	`if (stats[i].pair_count[j]) {`
`156`		`- fprintf(out, "opcode[%d].pair_count[%d] : %" PRIu64 "\n",`
`157`		`- i, j, stats[i].pair_count[j]);`
	`156`	`+ fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",`
	`157`	`+ _PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);`
`158`	`158`	`}`
`159`	`159`	`}`
`160`	`160`	`}`