Skip to content

Commit 19f5eff

Browse files
authored
GH-109373: Store metadata required for pystats comparison in the JSON (GH-109374)
1 parent 3d88145 commit 19f5eff

File tree

1 file changed

+65
-32
lines changed

1 file changed

+65
-32
lines changed

Tools/scripts/summarize_stats.py

Lines changed: 65 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
default stats folders.
33
"""
44

5+
# NOTE: Bytecode introspection modules (opcode, dis, etc.) should only
6+
# happen when loading a single dataset. When comparing datasets, it
7+
# could get it wrong, leading to subtle errors.
8+
59
import argparse
610
import collections
711
import json
812
import os.path
9-
import opcode
1013
from datetime import date
1114
import itertools
1215
import sys
@@ -28,6 +31,16 @@ def format_ratio(num, den):
2831
else:
2932
return f"{num/den:.01%}"
3033

34+
def percentage_to_float(s):
35+
"""
36+
Converts a percentage string to a float. The empty string is returned as 0.0
37+
"""
38+
if s == "":
39+
return 0.0
40+
else:
41+
assert s[-1] == "%"
42+
return float(s[:-1])
43+
3144
def join_rows(a_rows, b_rows):
3245
"""
3346
Joins two tables together, side-by-side, where the first column in each is a
@@ -164,7 +177,12 @@ def gather_stats(input):
164177

165178
if os.path.isfile(input):
166179
with open(input, "r") as fd:
167-
return json.load(fd)
180+
stats = json.load(fd)
181+
182+
stats["_stats_defines"] = {int(k): v for k, v in stats["_stats_defines"].items()}
183+
stats["_defines"] = {int(k): v for k, v in stats["_defines"].items()}
184+
return stats
185+
168186
elif os.path.isdir(input):
169187
stats = collections.Counter()
170188
for filename in os.listdir(input):
@@ -179,6 +197,16 @@ def gather_stats(input):
179197
value = int(value)
180198
stats[key] += value
181199
stats['__nfiles__'] += 1
200+
201+
import opcode
202+
203+
stats["_specialized_instructions"] = [
204+
op for op in opcode._specialized_opmap.keys()
205+
if "__" not in op
206+
]
207+
stats["_stats_defines"] = get_stats_defines()
208+
stats["_defines"] = get_defines()
209+
182210
return stats
183211
else:
184212
raise ValueError(f"{input:r} is not a file or directory path")
@@ -223,13 +251,10 @@ def kind_to_text(kind, defines, opname):
223251
return pretty(name[len(opname)+1:])
224252
return "kind " + str(kind)
225253

226-
def categorized_counts(opcode_stats):
254+
def categorized_counts(opcode_stats, specialized_instructions):
227255
basic = 0
228256
specialized = 0
229257
not_specialized = 0
230-
specialized_instructions = {
231-
op for op in opcode._specialized_opmap.keys()
232-
if "__" not in op}
233258
for name, opcode_stat in opcode_stats.items():
234259
if "execution_count" not in opcode_stat:
235260
continue
@@ -348,7 +373,7 @@ def emit_comparative_execution_counts(
348373
(opcode, base_entry[0], head_entry[0],
349374
f"{100*change:0.1f}%"))
350375

351-
rows.sort(key=lambda x: -abs(float(x[-1][:-1])))
376+
rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
352377

353378
emit_table(
354379
("Name", "Base Count:", "Head Count:", "Change:"),
@@ -361,32 +386,34 @@ def get_defines():
361386
defines = parse_kinds(spec_src)
362387
return defines
363388

364-
def emit_specialization_stats(opcode_stats):
365-
defines = get_defines()
389+
def emit_specialization_stats(opcode_stats, defines):
366390
with Section("Specialization stats", summary="specialization stats by family"):
367391
for name, opcode_stat in opcode_stats.items():
368392
print_specialization_stats(name, opcode_stat, defines)
369393

370-
def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
371-
defines = get_defines()
394+
def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats, defines):
372395
with Section("Specialization stats", summary="specialization stats by family"):
373396
opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys())
374397
for opcode in opcodes:
375398
print_comparative_specialization_stats(
376399
opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines
377400
)
378401

379-
def calculate_specialization_effectiveness(opcode_stats, total):
380-
basic, not_specialized, specialized = categorized_counts(opcode_stats)
402+
def calculate_specialization_effectiveness(
403+
opcode_stats, total, specialized_instructions
404+
):
405+
basic, not_specialized, specialized = categorized_counts(
406+
opcode_stats, specialized_instructions
407+
)
381408
return [
382409
("Basic", basic, format_ratio(basic, total)),
383410
("Not specialized", not_specialized, format_ratio(not_specialized, total)),
384411
("Specialized", specialized, format_ratio(specialized, total)),
385412
]
386413

387-
def emit_specialization_overview(opcode_stats, total):
414+
def emit_specialization_overview(opcode_stats, total, specialized_instructions):
388415
with Section("Specialization effectiveness"):
389-
rows = calculate_specialization_effectiveness(opcode_stats, total)
416+
rows = calculate_specialization_effectiveness(opcode_stats, total, specialized_instructions)
390417
emit_table(("Instructions", "Count:", "Ratio:"), rows)
391418
for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
392419
total = 0
@@ -404,10 +431,16 @@ def emit_specialization_overview(opcode_stats, total):
404431
rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
405432
emit_table(("Name", "Count:", "Ratio:"), rows)
406433

407-
def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total):
434+
def emit_comparative_specialization_overview(
435+
base_opcode_stats, base_total, head_opcode_stats, head_total, specialized_instructions
436+
):
408437
with Section("Specialization effectiveness"):
409-
base_rows = calculate_specialization_effectiveness(base_opcode_stats, base_total)
410-
head_rows = calculate_specialization_effectiveness(head_opcode_stats, head_total)
438+
base_rows = calculate_specialization_effectiveness(
439+
base_opcode_stats, base_total, specialized_instructions
440+
)
441+
head_rows = calculate_specialization_effectiveness(
442+
head_opcode_stats, head_total, specialized_instructions
443+
)
411444
emit_table(
412445
("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
413446
join_rows(base_rows, head_rows)
@@ -419,8 +452,7 @@ def get_stats_defines():
419452
defines = parse_kinds(stats_src, prefix="EVAL_CALL")
420453
return defines
421454

422-
def calculate_call_stats(stats):
423-
defines = get_stats_defines()
455+
def calculate_call_stats(stats, defines):
424456
total = 0
425457
for key, value in stats.items():
426458
if "Calls to" in key:
@@ -439,17 +471,17 @@ def calculate_call_stats(stats):
439471
rows.append((key, value, format_ratio(value, total)))
440472
return rows
441473

442-
def emit_call_stats(stats):
474+
def emit_call_stats(stats, defines):
443475
with Section("Call stats", summary="Inlined calls and frame stats"):
444-
rows = calculate_call_stats(stats)
476+
rows = calculate_call_stats(stats, defines)
445477
emit_table(("", "Count:", "Ratio:"), rows)
446478

447-
def emit_comparative_call_stats(base_stats, head_stats):
479+
def emit_comparative_call_stats(base_stats, head_stats, defines):
448480
with Section("Call stats", summary="Inlined calls and frame stats"):
449-
base_rows = calculate_call_stats(base_stats)
450-
head_rows = calculate_call_stats(head_stats)
481+
base_rows = calculate_call_stats(base_stats, defines)
482+
head_rows = calculate_call_stats(head_stats, defines)
451483
rows = join_rows(base_rows, head_rows)
452-
rows.sort(key=lambda x: -float(x[-1][:-1]))
484+
rows.sort(key=lambda x: -percentage_to_float(x[-1]))
453485
emit_table(
454486
("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
455487
rows
@@ -584,9 +616,9 @@ def output_single_stats(stats):
584616
total = get_total(opcode_stats)
585617
emit_execution_counts(opcode_stats, total)
586618
emit_pair_counts(opcode_stats, total)
587-
emit_specialization_stats(opcode_stats)
588-
emit_specialization_overview(opcode_stats, total)
589-
emit_call_stats(stats)
619+
emit_specialization_stats(opcode_stats, stats["_defines"])
620+
emit_specialization_overview(opcode_stats, total, stats["_specialized_instructions"])
621+
emit_call_stats(stats, stats["_stats_defines"])
590622
emit_object_stats(stats)
591623
emit_gc_stats(stats)
592624
with Section("Meta stats", summary="Meta statistics"):
@@ -604,12 +636,13 @@ def output_comparative_stats(base_stats, head_stats):
604636
base_opcode_stats, base_total, head_opcode_stats, head_total
605637
)
606638
emit_comparative_specialization_stats(
607-
base_opcode_stats, head_opcode_stats
639+
base_opcode_stats, head_opcode_stats, head_stats["_defines"]
608640
)
609641
emit_comparative_specialization_overview(
610-
base_opcode_stats, base_total, head_opcode_stats, head_total
642+
base_opcode_stats, base_total, head_opcode_stats, head_total,
643+
head_stats["_specialized_instructions"]
611644
)
612-
emit_comparative_call_stats(base_stats, head_stats)
645+
emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"])
613646
emit_comparative_object_stats(base_stats, head_stats)
614647
emit_comparative_gc_stats(base_stats, head_stats)
615648

0 commit comments

Comments
 (0)