diff --git a/.github/actions/build_analytics/action.yml b/.github/actions/build_analytics/action.yml index a9f219ed40a0..2f50d88840a1 100644 --- a/.github/actions/build_analytics/action.yml +++ b/.github/actions/build_analytics/action.yml @@ -33,7 +33,8 @@ runs: export build_preset="${{ inputs.build_preset }}" export build_target="${{ inputs.build_target }}" ./ya tool bloat --linker-map ydb/apps/ydbd/ydbd.map.lld --input ydb/apps/ydbd/ydbd --save-html ya_bloat_html --save-json bloat.json - ./ydb/ci/build_bloat/template_bloat.py -j bloat.json -o template_bloat -t template_bloat_html + # disabled until we learn to parse new version of bloat.json + # ./ydb/ci/build_bloat/template_bloat.py -j bloat.json -o template_bloat -t template_bloat_html ./ydb/ci/build_bloat/main.py --build-dir . --html-dir-cpp html_cpp_impact --html-dir-headers html_headers_impact ./ydb/ci/build_bloat/ydb_upload.py --html-dir-cpp html_cpp_impact --html-dir-headers html_headers_impact @@ -44,16 +45,17 @@ runs: s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "ya_bloat_html/" "$S3_BUCKET_PATH/ya_bloat_html/" echo "[ya bloat tool]($S3_URL_PREFIX/ya_bloat_html/tree_map.html) (better use Safari, because it is too large for Chrome)" >> $GITHUB_STEP_SUMMARY - s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_size.txt" "$S3_BUCKET_PATH/template_bloat.by_size.txt" - TEMPLATE_BLOAT_BY_SIZE_URL=$S3_URL_PREFIX/template_bloat.by_size.txt + # disabled until we learn to parse new version of bloat.json + # s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_size.txt" "$S3_BUCKET_PATH/template_bloat.by_size.txt" + # TEMPLATE_BLOAT_BY_SIZE_URL=$S3_URL_PREFIX/template_bloat.by_size.txt - s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_count.txt" "$S3_BUCKET_PATH/template_bloat.by_count.txt" - TEMPLATE_BLOAT_BY_COUNT_URL=$S3_URL_PREFIX/template_bloat.by_count.txt + # s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat.by_count.txt" "$S3_BUCKET_PATH/template_bloat.by_count.txt" + # TEMPLATE_BLOAT_BY_COUNT_URL=$S3_URL_PREFIX/template_bloat.by_count.txt - s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat_html/" "$S3_BUCKET_PATH/template_bloat_html/" - TEMPLATE_BLOAT_TREEMAP_URL=$S3_URL_PREFIX/template_bloat_html/tree_map.html + # s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "template_bloat_html/" "$S3_BUCKET_PATH/template_bloat_html/" + # TEMPLATE_BLOAT_TREEMAP_URL=$S3_URL_PREFIX/template_bloat_html/tree_map.html - echo "[template bloat]($TEMPLATE_BLOAT_TREEMAP_URL) ([sorted by size]($TEMPLATE_BLOAT_BY_SIZE_URL), [sorted by count]($TEMPLATE_BLOAT_BY_COUNT_URL))" >> $GITHUB_STEP_SUMMARY + # echo "[template bloat]($TEMPLATE_BLOAT_TREEMAP_URL) ([sorted by size]($TEMPLATE_BLOAT_BY_SIZE_URL), [sorted by count]($TEMPLATE_BLOAT_BY_COUNT_URL))" >> $GITHUB_STEP_SUMMARY s3cmd sync -r --acl-public --stats --no-progress --no-mime-magic --guess-mime-type --no-check-md5 "html_cpp_impact/" "$S3_BUCKET_PATH/html_cpp_impact/" echo "[cpp compilation time]($S3_URL_PREFIX/html_cpp_impact/tree_map.html)" >> $GITHUB_STEP_SUMMARY diff --git a/ydb/ci/build_bloat/main.py b/ydb/ci/build_bloat/main.py index e8a110065d2f..e3b7ec467e2e 100755 --- a/ydb/ci/build_bloat/main.py +++ b/ydb/ci/build_bloat/main.py @@ -62,30 +62,27 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l with open(path) as f: obj = json.load(f) - include_events = [] # (time, +-1, path) + current_includes_stack = [] + current_ts_stack = [] + tree_path_to_sum_duration = {} for event in obj["traceEvents"]: - if event["name"] == "Source": - path = event["args"]["detail"] - time_stamp = event["ts"] - duration = event["dur"] - include_events.append((time_stamp, +1, path, duration)) - include_events.append((time_stamp + duration, -1, path, duration)) - - include_events.sort(key=lambda event: (event[0], -event[1])) + if event["name"] != "Source": + continue - tree_path_to_sum_duration = {} - current_includes_stack = [] - - for time_stamp, ev, path, duration in include_events: - if ev == 1: + ts = event["ts"] + ph = event.get("ph") + if ph == "b": # begin event + path = event["args"]["detail"] current_includes_stack.append(sanitize_path(path, base_src_dir)) + current_ts_stack.append(ts) + elif ph == "e": # end event tree_path = tuple(current_includes_stack) prev = tree_path_to_sum_duration.get(tree_path, 0) + duration = ts - current_ts_stack[-1] tree_path_to_sum_duration[tree_path] = prev + duration - else: - assert current_includes_stack[-1] == sanitize_path(path, base_src_dir) current_includes_stack.pop() + current_ts_stack.pop() # filter small entities tree_paths_to_include = set() @@ -231,19 +228,42 @@ def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, obj = json.load(f) cpp_file = None - include_events = [] # (time, +-1, path) + include_events = [] # (timestamp, +1/-1, path) - for event in obj["traceEvents"]: - if event["name"] == "Source": - path = event["args"]["detail"] - path = sanitize_path(path, base_src_dir) - time_stamp = event["ts"] - duration = event["dur"] - include_events.append((time_stamp, +1, path)) - include_events.append((time_stamp + duration, -1, path)) + # we will store "begin events" until we see matching "end" + source_begin = {} # path -> timestamp + source_stack = [] # stack of paths in open order - if event["name"] == "OptModule": - cpp_file = event["args"]["detail"] + for event in obj["traceEvents"]: + name = event["name"] + args = event.get("args", {}) + ph = event["ph"] + ts = event["ts"] + + if name == "Source" and ph == "b": # begin event + path = sanitize_path(args["detail"], base_src_dir) + source_begin[path] = ts + source_stack.append(path) + + elif name == "Source" and ph == "e": # end event + detail = args.get("detail") + if detail is not None: + path = sanitize_path(detail, base_src_dir) + else: + assert source_stack, "Source end without any open begin" + path = source_stack[-1] + + ts_begin = source_begin.pop(path, None) + assert ts_begin is not None + + if source_stack and source_stack[-1] == path: + source_stack.pop() + + include_events.append((ts_begin, +1, path)) + include_events.append((ts, -1, path)) + + elif name == "OptModule": + cpp_file = args["detail"] path_to_time = {} last_time_stamp = 0