Skip to content

Commit 4fb634e

Browse files
refactor: defer zip manifest building to execution phase to improve analysis phase performance (#3381)
When py_binary/py_test were being built, they were flattening the runfiles depsets at analysis time in order to create the zip file mapping manifest for their implicit zipapp outputs. This flattening was necessary because they had to filter out the original main executable from the runfiles that didn't belong in the zipapp. This flattening is expensive for large builds, in some cases adding over 400 seconds of time and significant memory overhead. To fix, have the zip file manifest use the `runfiles_with_exe` object, which is the runfiles, but pre-filtered for the files zip building doesn't want. This then allows passing the depsets directly to `Args.add_all` and using map_each to transform them. Additionally, pass `runfiles.empty_filenames` using a lambda. Accessing that attribute implicitly flattens the runfiles. Finally, because the original profiles indicated `str.format()` was a non-trivial amount of time (46 seconds / 15% of build time), switch to using `+` instead. This is a more incremental alternative to #3380 which achieves _most_ of the same optimization with only Starlark changes, as opposed to introducing an external script written in C++. [Profile of a large build](https://github.com/user-attachments/assets/e90ae699-a04d-44df-b53c-1156aa890af5), which shows a Starlark CPU profile. It shows an overall build time of 305 seconds. 46 seconds (15%) are spent in `map_zip_runfiles`, half of which is in `str.startswith()` and the other half in `str.format()`. --------- Co-authored-by: Richard Levasseur <[email protected]>
1 parent c037d83 commit 4fb634e

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ END_UNRELEASED_TEMPLATE
7373
([#3085](https://github.com/bazel-contrib/rules_python/issues/3085)).
7474
* (toolchains) local toolchains now tell the `sys.abiflags` value of the
7575
underlying runtime.
76+
* (performance) 90% reduction in py_binary/py_test analysis phase cost.
77+
([#3381](https://github.com/bazel-contrib/rules_python/pull/3381)).
7678

7779
{#v0-0-0-added}
7880
### Added

python/private/py_executable.bzl

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,8 @@ def _create_executable(
380380
_create_zip_file(
381381
ctx,
382382
output = zip_file,
383-
original_nonzip_executable = executable,
384383
zip_main = zip_main,
385-
runfiles = runfiles_details.default_runfiles.merge(extra_runfiles),
384+
runfiles = runfiles_details.runfiles_without_exe.merge(extra_runfiles),
386385
)
387386

388387
extra_files_to_build = []
@@ -803,7 +802,7 @@ def _create_windows_exe_launcher(
803802
use_default_shell_env = True,
804803
)
805804

806-
def _create_zip_file(ctx, *, output, original_nonzip_executable, zip_main, runfiles):
805+
def _create_zip_file(ctx, *, output, zip_main, runfiles):
807806
"""Create a Python zipapp (zip with __main__.py entry point)."""
808807
workspace_name = ctx.workspace_name
809808
legacy_external_runfiles = _py_builtins.get_legacy_external_runfiles(ctx)
@@ -819,17 +818,27 @@ def _create_zip_file(ctx, *, output, original_nonzip_executable, zip_main, runfi
819818
_get_zip_runfiles_path("__init__.py", workspace_name, legacy_external_runfiles),
820819
),
821820
)
822-
for path in runfiles.empty_filenames.to_list():
823-
manifest.add("{}=".format(_get_zip_runfiles_path(path, workspace_name, legacy_external_runfiles)))
821+
822+
def map_zip_empty_filenames(list_paths_cb):
823+
return [
824+
_get_zip_runfiles_path(path, workspace_name, legacy_external_runfiles) + "="
825+
for path in list_paths_cb().to_list()
826+
]
827+
828+
manifest.add_all(
829+
# NOTE: Accessing runfiles.empty_filenames implicitly flattens the runfiles.
830+
# Smuggle a lambda in via a list to defer that flattening.
831+
[lambda: runfiles.empty_filenames],
832+
map_each = map_zip_empty_filenames,
833+
allow_closure = True,
834+
)
824835

825836
def map_zip_runfiles(file):
826-
if file != original_nonzip_executable and file != output:
827-
return "{}={}".format(
828-
_get_zip_runfiles_path(file.short_path, workspace_name, legacy_external_runfiles),
829-
file.path,
830-
)
831-
else:
832-
return None
837+
return (
838+
# NOTE: Use "+" for performance
839+
_get_zip_runfiles_path(file.short_path, workspace_name, legacy_external_runfiles) +
840+
"=" + file.path
841+
)
833842

834843
manifest.add_all(runfiles.files, map_each = map_zip_runfiles, allow_closure = True)
835844

@@ -850,37 +859,37 @@ def _create_zip_file(ctx, *, output, original_nonzip_executable, zip_main, runfi
850859
))
851860
inputs.append(zip_repo_mapping_manifest)
852861

853-
for artifact in runfiles.files.to_list():
854-
# Don't include the original executable because it isn't used by the
855-
# zip file, so no need to build it for the action.
856-
# Don't include the zipfile itself because it's an output.
857-
if artifact != original_nonzip_executable and artifact != output:
858-
inputs.append(artifact)
859-
860862
zip_cli_args = ctx.actions.args()
861863
zip_cli_args.add("cC")
862864
zip_cli_args.add(output)
863865

864866
ctx.actions.run(
865867
executable = ctx.executable._zipper,
866868
arguments = [zip_cli_args, manifest],
867-
inputs = depset(inputs),
869+
inputs = depset(inputs, transitive = [runfiles.files]),
868870
outputs = [output],
869871
use_default_shell_env = True,
870872
mnemonic = "PythonZipper",
871873
progress_message = "Building Python zip: %{label}",
872874
)
873875

874876
def _get_zip_runfiles_path(path, workspace_name, legacy_external_runfiles):
877+
maybe_workspace = ""
875878
if legacy_external_runfiles and path.startswith(_EXTERNAL_PATH_PREFIX):
876-
zip_runfiles_path = paths.relativize(path, _EXTERNAL_PATH_PREFIX)
879+
zip_runfiles_path = path.removeprefix(_EXTERNAL_PATH_PREFIX)
877880
else:
878881
# NOTE: External runfiles (artifacts in other repos) will have a leading
879882
# path component of "../" so that they refer outside the main workspace
880-
# directory and into the runfiles root. By normalizing, we simplify e.g.
883+
# directory and into the runfiles root. So we simplify it, e.g.
881884
# "workspace/../foo/bar" to simply "foo/bar".
882-
zip_runfiles_path = paths.normalize("{}/{}".format(workspace_name, path))
883-
return "{}/{}".format(_ZIP_RUNFILES_DIRECTORY_NAME, zip_runfiles_path)
885+
if path.startswith("../"):
886+
zip_runfiles_path = path[3:]
887+
else:
888+
zip_runfiles_path = path
889+
maybe_workspace = workspace_name + "/"
890+
891+
# NOTE: Use "+" for performance
892+
return _ZIP_RUNFILES_DIRECTORY_NAME + "/" + maybe_workspace + zip_runfiles_path
884893

885894
def _create_executable_zip_file(
886895
ctx,

0 commit comments

Comments
 (0)