Skip to content

Commit 20ae461

Browse files
authored
Fix bloat-gathering script (#29822)
1 parent fc12fe8 commit 20ae461

File tree

2 files changed

+57
-35
lines changed

2 files changed

+57
-35
lines changed

.github/actions/build_analytics/action.yml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ runs:
3333
export build_preset="${{ inputs.build_preset }}"
3434
export build_target="${{ inputs.build_target }}"
3535
./ya tool bloat --linker-map ydb/apps/ydbd/ydbd.map.lld --input ydb/apps/ydbd/ydbd --save-html ya_bloat_html --save-json bloat.json
36-
./ydb/ci/build_bloat/template_bloat.py -j bloat.json -o template_bloat -t template_bloat_html
36+
# disabled until we learn to parse new version of bloat.json
37+
# ./ydb/ci/build_bloat/template_bloat.py -j bloat.json -o template_bloat -t template_bloat_html
3738
./ydb/ci/build_bloat/main.py --build-dir . --html-dir-cpp html_cpp_impact --html-dir-headers html_headers_impact
3839
./ydb/ci/build_bloat/ydb_upload.py --html-dir-cpp html_cpp_impact --html-dir-headers html_headers_impact
3940
@@ -44,16 +45,17 @@ runs:
4445
s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "ya_bloat_html/" "$S3_BUCKET_PATH/ya_bloat_html/"
4546
echo "[ya bloat tool]($S3_URL_PREFIX/ya_bloat_html/tree_map.html) (better use Safari, because it is too large for Chrome)" >> $GITHUB_STEP_SUMMARY
4647
47-
s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_size.txt" "$S3_BUCKET_PATH/template_bloat.by_size.txt"
48-
TEMPLATE_BLOAT_BY_SIZE_URL=$S3_URL_PREFIX/template_bloat.by_size.txt
48+
# disabled until we learn to parse new version of bloat.json
49+
# s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_size.txt" "$S3_BUCKET_PATH/template_bloat.by_size.txt"
50+
# TEMPLATE_BLOAT_BY_SIZE_URL=$S3_URL_PREFIX/template_bloat.by_size.txt
4951
50-
s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_count.txt" "$S3_BUCKET_PATH/template_bloat.by_count.txt"
51-
TEMPLATE_BLOAT_BY_COUNT_URL=$S3_URL_PREFIX/template_bloat.by_count.txt
52+
# s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_count.txt" "$S3_BUCKET_PATH/template_bloat.by_count.txt"
53+
# TEMPLATE_BLOAT_BY_COUNT_URL=$S3_URL_PREFIX/template_bloat.by_count.txt
5254
53-
s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat_html/" "$S3_BUCKET_PATH/template_bloat_html/"
54-
TEMPLATE_BLOAT_TREEMAP_URL=$S3_URL_PREFIX/template_bloat_html/tree_map.html
55+
# s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat_html/" "$S3_BUCKET_PATH/template_bloat_html/"
56+
# TEMPLATE_BLOAT_TREEMAP_URL=$S3_URL_PREFIX/template_bloat_html/tree_map.html
5557
56-
echo "[template bloat]($TEMPLATE_BLOAT_TREEMAP_URL) ([sorted by size]($TEMPLATE_BLOAT_BY_SIZE_URL), [sorted by count]($TEMPLATE_BLOAT_BY_COUNT_URL))" >> $GITHUB_STEP_SUMMARY
58+
# echo "[template bloat]($TEMPLATE_BLOAT_TREEMAP_URL) ([sorted by size]($TEMPLATE_BLOAT_BY_SIZE_URL), [sorted by count]($TEMPLATE_BLOAT_BY_COUNT_URL))" >> $GITHUB_STEP_SUMMARY
5759
5860
s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "html_cpp_impact/" "$S3_BUCKET_PATH/html_cpp_impact/"
5961
echo "[cpp compilation time]($S3_URL_PREFIX/html_cpp_impact/tree_map.html)" >> $GITHUB_STEP_SUMMARY

ydb/ci/build_bloat/main.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,30 +62,27 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l
6262
with open(path) as f:
6363
obj = json.load(f)
6464

65-
include_events = [] # (time, +-1, path)
65+
current_includes_stack = []
66+
current_ts_stack = []
67+
tree_path_to_sum_duration = {}
6668

6769
for event in obj["traceEvents"]:
68-
if event["name"] == "Source":
69-
path = event["args"]["detail"]
70-
time_stamp = event["ts"]
71-
duration = event["dur"]
72-
include_events.append((time_stamp, +1, path, duration))
73-
include_events.append((time_stamp + duration, -1, path, duration))
74-
75-
include_events.sort(key=lambda event: (event[0], -event[1]))
70+
if event["name"] != "Source":
71+
continue
7672

77-
tree_path_to_sum_duration = {}
78-
current_includes_stack = []
79-
80-
for time_stamp, ev, path, duration in include_events:
81-
if ev == 1:
73+
ts = event["ts"]
74+
ph = event.get("ph")
75+
if ph == "b": # begin event
76+
path = event["args"]["detail"]
8277
current_includes_stack.append(sanitize_path(path, base_src_dir))
78+
current_ts_stack.append(ts)
79+
elif ph == "e": # end event
8380
tree_path = tuple(current_includes_stack)
8481
prev = tree_path_to_sum_duration.get(tree_path, 0)
82+
duration = ts - current_ts_stack[-1]
8583
tree_path_to_sum_duration[tree_path] = prev + duration
86-
else:
87-
assert current_includes_stack[-1] == sanitize_path(path, base_src_dir)
8884
current_includes_stack.pop()
85+
current_ts_stack.pop()
8986

9087
# filter small entities
9188
tree_paths_to_include = set()
@@ -231,19 +228,42 @@ def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int,
231228
obj = json.load(f)
232229

233230
cpp_file = None
234-
include_events = [] # (time, +-1, path)
231+
include_events = [] # (timestamp, +1/-1, path)
235232

236-
for event in obj["traceEvents"]:
237-
if event["name"] == "Source":
238-
path = event["args"]["detail"]
239-
path = sanitize_path(path, base_src_dir)
240-
time_stamp = event["ts"]
241-
duration = event["dur"]
242-
include_events.append((time_stamp, +1, path))
243-
include_events.append((time_stamp + duration, -1, path))
233+
# we will store "begin events" until we see matching "end"
234+
source_begin = {} # path -> timestamp
235+
source_stack = [] # stack of paths in open order
244236

245-
if event["name"] == "OptModule":
246-
cpp_file = event["args"]["detail"]
237+
for event in obj["traceEvents"]:
238+
name = event["name"]
239+
args = event.get("args", {})
240+
ph = event["ph"]
241+
ts = event["ts"]
242+
243+
if name == "Source" and ph == "b": # begin event
244+
path = sanitize_path(args["detail"], base_src_dir)
245+
source_begin[path] = ts
246+
source_stack.append(path)
247+
248+
elif name == "Source" and ph == "e": # end event
249+
detail = args.get("detail")
250+
if detail is not None:
251+
path = sanitize_path(detail, base_src_dir)
252+
else:
253+
assert source_stack, "Source end without any open begin"
254+
path = source_stack[-1]
255+
256+
ts_begin = source_begin.pop(path, None)
257+
assert ts_begin is not None
258+
259+
if source_stack and source_stack[-1] == path:
260+
source_stack.pop()
261+
262+
include_events.append((ts_begin, +1, path))
263+
include_events.append((ts, -1, path))
264+
265+
elif name == "OptModule":
266+
cpp_file = args["detail"]
247267

248268
path_to_time = {}
249269
last_time_stamp = 0

0 commit comments

Comments
 (0)