From 9dd7589e4df6b8fccf7d147886ad7a0b77de6566 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 27 May 2025 23:09:54 +0900 Subject: [PATCH 1/7] wip: add symlinks for .dist-info folders in the venv This adds the necessary `.dist-info` files into the mix and should get us reasonably close to handling 99% of the cases. The expected differences from this and a `venv` built by `uv` would be: * Shared libraries are stored in `.libs` in `uv` venvs. This can be achieved in `rules_python` by changing the `installer` settings in the `wheel_installer/wheel.py#unzip` function. * The `RECORD` files are excluded from the `venv`s for better cache hit rate in `bazel`, however I am not sure if we should do that for actual wheels that are downloaded from the internet. Tested: - [x] Building the `//docs` and manually checking the symlinks. - [ ] Unit tests Work towards #2156 --- python/private/BUILD.bazel | 1 + python/private/common.bzl | 4 ++-- python/private/py_executable.bzl | 12 ++++++++-- python/private/py_info.bzl | 9 +++++-- python/private/py_library.bzl | 40 ++++++++++++++++++++++++++++++-- 5 files changed, 58 insertions(+), 8 deletions(-) diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index ce22421300..20c8a9ba53 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -444,6 +444,7 @@ bzl_library( ":attributes_bzl", ":common_bzl", ":flags_bzl", + ":normalize_name_bzl", ":precompile_bzl", ":py_cc_link_params_info_bzl", ":py_internal_bzl", diff --git a/python/private/common.bzl b/python/private/common.bzl index a58a9c00a4..95774b8222 100644 --- a/python/private/common.bzl +++ b/python/private/common.bzl @@ -396,8 +396,8 @@ def create_py_info( implicit_pyc_files: {type}`depset[File]` Implicitly generated pyc files that a binary can choose to include. imports: depset of strings; the import path values to propagate. - site_packages_symlinks: {type}`list[tuple[str, str]]` tuples of - `(runfiles_path, site_packages_path)` for symlinks to create + site_packages_symlinks: {type}`list[tuple[str, str, str]]` tuples of + `(normalized_package_name, runfiles_path, site_packages_path)` for symlinks to create in the consuming binary's venv site packages. Returns: diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 24be8dd2ad..f35ca8a50a 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -671,11 +671,19 @@ def _create_site_packages_symlinks(ctx, site_packages): def _build_link_map(entries): link_map = {} - for link_to_runfiles_path, site_packages_path in entries: - if site_packages_path in link_map: + package_map = {} + for link_to_runfiles_path, site_packages_path, package in entries: + if package and package in package_map: # We ignore duplicates by design. The dependency closer to the # binary gets precedence due to the topological ordering. continue + elif site_packages_path in link_map: + # The packages are not duplicate, but have overlapping paths, for + # now we ignore these. + continue + elif package: + package_map[package] = site_packages_path + link_map[site_packages_path] = link_to_runfiles_path else: link_map[site_packages_path] = link_to_runfiles_path diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl index d175eefb69..5ca4bf4173 100644 --- a/python/private/py_info.bzl +++ b/python/private/py_info.bzl @@ -148,11 +148,11 @@ The order of the depset is not guaranteed and may be changed in the future. It is recommended to use `default` order (the default). """, "site_packages_symlinks": """ -:type: depset[tuple[str | None, str]] +:type: depset[tuple[str | None, str, str | None]] A depset with `topological` ordering. -Tuples of `(runfiles_path, site_packages_path)`. Where +Tuples of `(runfiles_path, site_packages_path, symlink_key)`. Where * `runfiles_path` is a runfiles-root relative path. It is the path that has the code to make importable. If `None` or empty string, then it means to not create a site packages directory with the `site_packages_path` @@ -161,6 +161,11 @@ Tuples of `(runfiles_path, site_packages_path)`. Where the venv for whatever creates the venv (typically py_binary). It makes the code in `runfiles_path` available for import. Note that this is created as a "raw" symlink (via `declare_symlink`). +* `symlink_key` is a path which should be checked instead of the + `site_packages_path`. This is typically used for dist-info directories as + they include version in the filenames and we want to ensure that we can + replace the whole package and not have orphan .dist-info packages in the + virtual environment. :::{include} /_includes/experimental_api.md ::: diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl index fd9dad9f20..e2b1168a5c 100644 --- a/python/private/py_library.bzl +++ b/python/private/py_library.bzl @@ -41,6 +41,7 @@ load( "runfiles_root_path", ) load(":flags.bzl", "AddSrcsToRunfilesFlag", "PrecompileFlag", "VenvsSitePackages") +load(":normalize_name.bzl", "normalize_name") load(":precompile.bzl", "maybe_precompile") load(":py_cc_link_params_info.bzl", "PyCcLinkParamsInfo") load(":py_info.bzl", "PyInfo") @@ -206,16 +207,32 @@ Source files are no longer added to the runfiles directly. ::: """ +def _get_distinfo_metadata(ctx): + data = ctx.files.data or [] + for d in data: + # work on case insensitive FSes + if d.basename.lower() != "metadata": + continue + + if d.dirname.endswith(".dist-info"): + return d + + return None + def _get_imports_and_site_packages_symlinks(ctx, semantics): imports = depset() site_packages_symlinks = depset() if VenvsSitePackages.is_enabled(ctx): - site_packages_symlinks = _get_site_packages_symlinks(ctx) + dist_info_metadata = _get_distinfo_metadata(ctx) + site_packages_symlinks = _get_site_packages_symlinks( + ctx, + dist_info_metadata, + ) else: imports = collect_imports(ctx, semantics) return imports, site_packages_symlinks -def _get_site_packages_symlinks(ctx): +def _get_site_packages_symlinks(ctx, dist_info_metadata): imports = ctx.attr.imports if len(imports) == 0: fail("When venvs_site_packages is enabled, exactly one `imports` " + @@ -254,6 +271,23 @@ def _get_site_packages_symlinks(ctx): repo_runfiles_dirname = None dirs_with_init = {} # dirname -> runfile path site_packages_symlinks = [] + if dist_info_metadata: + # in order to be able to have replacements in the venv, we have to add a + # third value into the site_packages_symlinks, which would be the normalized + # package name. This allows us to ensure that we can replace the `dist-info` + # directories by checking if the package key is there. + dist_info_dir = paths.basename(dist_info_metadata.dirname) + package, _, _suffix = dist_info_dir.rpartition(".dist-info") + package, _, _version = package.rpartition("-") + symlink_key = "{}.dist-info".format(normalize_name(package)) + + repo_runfiles_dirname = runfiles_root_path(ctx, dist_info_metadata.short_path).partition("/")[0] + site_packages_symlinks.append(( + paths.join(repo_runfiles_dirname, site_packages_root, dist_info_dir), + dist_info_dir, + symlink_key, + )) + for src in ctx.files.srcs: if src.extension not in PYTHON_FILE_EXTENSIONS: continue @@ -274,6 +308,7 @@ def _get_site_packages_symlinks(ctx): site_packages_symlinks.append(( paths.join(repo_runfiles_dirname, site_packages_root, filename), filename, + None, )) # Sort so that we encounter `foo` before `foo/bar`. This ensures we @@ -294,6 +329,7 @@ def _get_site_packages_symlinks(ctx): site_packages_symlinks.append(( paths.join(repo_runfiles_dirname, site_packages_root, dirname), dirname, + None, )) return site_packages_symlinks From 42e5059e72049085cfb4efe74222463d46134068 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 29 May 2025 22:46:29 +0900 Subject: [PATCH 2/7] fixup --- python/private/py_executable.bzl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index aef02e59d5..7c3e0cb757 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -689,13 +689,6 @@ def _build_link_map(entries): # We ignore duplicates by design. The dependency closer to the # binary gets precedence due to the topological ordering. continue - elif site_packages_path in link_map: - # The packages are not duplicate, but have overlapping paths, for - # now we ignore these. - continue - elif package: - package_map[package] = site_packages_path - link_map[site_packages_path] = link_to_runfiles_path else: kind_map[entry.venv_path] = entry.link_to_path From d59f85a7ce88523c3f61ccef530afdf9264f3313 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 29 May 2025 23:07:07 +0900 Subject: [PATCH 3/7] add a failing test --- .bazelrc | 4 ++-- python/private/py_info.bzl | 1 + python/private/py_library.bzl | 2 +- tests/modules/other/simple_v1/BUILD.bazel | 14 ++++++++++++++ .../site-packages/simple-1.0.0.dist-info/METADATA | 1 + .../simple_v1/site-packages/simple/__init__.py | 1 + tests/modules/other/simple_v2/BUILD.bazel | 14 ++++++++++++++ .../site-packages/simple-2.0.0.dist-info/METADATA | 1 + .../simple_v2/site-packages/simple/__init__.py | 1 + tests/venv_site_packages_libs/BUILD.bazel | 15 +++++++++++++++ tests/venv_site_packages_libs/bin.py | 15 +++++++++++++++ 11 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 tests/modules/other/simple_v1/BUILD.bazel create mode 100644 tests/modules/other/simple_v1/site-packages/simple-1.0.0.dist-info/METADATA create mode 100644 tests/modules/other/simple_v1/site-packages/simple/__init__.py create mode 100644 tests/modules/other/simple_v2/BUILD.bazel create mode 100644 tests/modules/other/simple_v2/site-packages/simple-2.0.0.dist-info/METADATA create mode 100644 tests/modules/other/simple_v2/site-packages/simple/__init__.py diff --git a/.bazelrc b/.bazelrc index 7e744fb67a..aada656fbc 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single,tests/modules/other/simple_v1,tests/modules/other/simple_v2 +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single,tests/modules/other/simple_v1,tests/modules/other/simple_v2 test --test_output=errors diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl index 0aa9c966cb..bf71ee557a 100644 --- a/python/private/py_info.bzl +++ b/python/private/py_info.bzl @@ -45,6 +45,7 @@ def _VenvSymlinkKind_typedef(): VenvSymlinkKind = struct( TYPEDEF = _VenvSymlinkKind_typedef, BIN = "BIN", + DISTINFO = "DISTINFO", LIB = "LIB", INCLUDE = "INCLUDE", ) diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl index acb15f21f6..3e237f571e 100644 --- a/python/private/py_library.bzl +++ b/python/private/py_library.bzl @@ -283,7 +283,7 @@ def _get_venv_symlinks(ctx, dist_info_metadata): repo_runfiles_dirname = runfiles_root_path(ctx, dist_info_metadata.short_path).partition("/")[0] venv_symlinks.append(VenvSymlinkEntry( - kind = VenvSymlinkKind.LIB, + kind = VenvSymlinkKind.DISTINFO, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, dist_info_dir), venv_path = dist_info_dir, )) diff --git a/tests/modules/other/simple_v1/BUILD.bazel b/tests/modules/other/simple_v1/BUILD.bazel new file mode 100644 index 0000000000..da5db8164a --- /dev/null +++ b/tests/modules/other/simple_v1/BUILD.bazel @@ -0,0 +1,14 @@ +load("@rules_python//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "simple_v1", + srcs = glob(["site-packages/**/*.py"]), + data = glob( + ["**/*"], + exclude = ["site-packages/**/*.py"], + ), + experimental_venvs_site_packages = "@rules_python//python/config_settings:venvs_site_packages", + imports = [package_name() + "/site-packages"], +) diff --git a/tests/modules/other/simple_v1/site-packages/simple-1.0.0.dist-info/METADATA b/tests/modules/other/simple_v1/site-packages/simple-1.0.0.dist-info/METADATA new file mode 100644 index 0000000000..ee76ec48a4 --- /dev/null +++ b/tests/modules/other/simple_v1/site-packages/simple-1.0.0.dist-info/METADATA @@ -0,0 +1 @@ +inside is v1 diff --git a/tests/modules/other/simple_v1/site-packages/simple/__init__.py b/tests/modules/other/simple_v1/site-packages/simple/__init__.py new file mode 100644 index 0000000000..bb7b160deb --- /dev/null +++ b/tests/modules/other/simple_v1/site-packages/simple/__init__.py @@ -0,0 +1 @@ +# Intentionally empty diff --git a/tests/modules/other/simple_v2/BUILD.bazel b/tests/modules/other/simple_v2/BUILD.bazel new file mode 100644 index 0000000000..7648827383 --- /dev/null +++ b/tests/modules/other/simple_v2/BUILD.bazel @@ -0,0 +1,14 @@ +load("@rules_python//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "simple_v2", + srcs = glob(["site-packages/**/*.py"]), + data = glob( + ["**/*"], + exclude = ["site-packages/**/*.py"], + ), + experimental_venvs_site_packages = "@rules_python//python/config_settings:venvs_site_packages", + imports = [package_name() + "/site-packages"], +) diff --git a/tests/modules/other/simple_v2/site-packages/simple-2.0.0.dist-info/METADATA b/tests/modules/other/simple_v2/site-packages/simple-2.0.0.dist-info/METADATA new file mode 100644 index 0000000000..ee76ec48a4 --- /dev/null +++ b/tests/modules/other/simple_v2/site-packages/simple-2.0.0.dist-info/METADATA @@ -0,0 +1 @@ +inside is v1 diff --git a/tests/modules/other/simple_v2/site-packages/simple/__init__.py b/tests/modules/other/simple_v2/site-packages/simple/__init__.py new file mode 100644 index 0000000000..bb7b160deb --- /dev/null +++ b/tests/modules/other/simple_v2/site-packages/simple/__init__.py @@ -0,0 +1 @@ +# Intentionally empty diff --git a/tests/venv_site_packages_libs/BUILD.bazel b/tests/venv_site_packages_libs/BUILD.bazel index d5a4fe6750..d68436c54d 100644 --- a/tests/venv_site_packages_libs/BUILD.bazel +++ b/tests/venv_site_packages_libs/BUILD.bazel @@ -1,6 +1,20 @@ +load("//python:py_library.bzl", "py_library") load("//tests/support:py_reconfig.bzl", "py_reconfig_test") load("//tests/support:support.bzl", "SUPPORTS_BOOTSTRAP_SCRIPT") +py_library( + name = "user_lib", + deps = ["@other//simple_v1"], +) + +py_library( + name = "closer_lib", + deps = [ + ":user_lib", + "@other//simple_v2", + ], +) + py_reconfig_test( name = "venvs_site_packages_libs_test", srcs = ["bin.py"], @@ -9,6 +23,7 @@ py_reconfig_test( target_compatible_with = SUPPORTS_BOOTSTRAP_SCRIPT, venvs_site_packages = "yes", deps = [ + ":closer_lib", "//tests/venv_site_packages_libs/nspkg_alpha", "//tests/venv_site_packages_libs/nspkg_beta", "@other//nspkg_delta", diff --git a/tests/venv_site_packages_libs/bin.py b/tests/venv_site_packages_libs/bin.py index 58572a2a1e..b2e1177ba0 100644 --- a/tests/venv_site_packages_libs/bin.py +++ b/tests/venv_site_packages_libs/bin.py @@ -2,6 +2,7 @@ import os import sys import unittest +from pathlib import Path class VenvSitePackagesLibraryTest(unittest.TestCase): @@ -27,6 +28,20 @@ def test_imported_from_venv(self): self.assert_imported_from_venv("nspkg.subnspkg.gamma") self.assert_imported_from_venv("nspkg.subnspkg.delta") self.assert_imported_from_venv("single_file") + self.assert_imported_from_venv("simple") + + def test_distinfo_is_overriden(self): + self.assert_imported_from_venv("simple") + module = importlib.import_module("simple") + module_path = Path(module.__file__) + site_packages = module_path.parent.parent + + dist_info_dirs = [p.name for p in site_packages.glob("*.dist-info")] + + self.assertEqual( + ["simple-2.0.0.dist-info"], + dist_info_dirs, + ) if __name__ == "__main__": From 714b245a8e106131433c509ad610ec5c48da4735 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 30 May 2025 17:11:43 +0900 Subject: [PATCH 4/7] add a test for topological ordering preference --- python/private/py_executable.bzl | 16 ++++++++-------- python/private/py_info.bzl | 6 +++++- python/private/py_library.bzl | 14 +++++++++----- .../simple_v1/site-packages/simple/__init__.py | 2 ++ .../simple_v2/site-packages/simple/__init__.py | 2 ++ tests/venv_site_packages_libs/bin.py | 4 ++++ 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 7c3e0cb757..13235b356a 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -680,17 +680,17 @@ def _create_venv_symlinks(ctx, venv_dir_map): return venv_files def _build_link_map(entries): - # dict[str kind, dict[str rel_path, str link_to_path]] + # dict[str kind, dict[str key or rel_path, tuple[str rel_path, str link_to_path]]] link_map = {} for entry in entries: kind = entry.kind kind_map = link_map.setdefault(kind, {}) - if entry.venv_path in kind_map: - # We ignore duplicates by design. The dependency closer to the - # binary gets precedence due to the topological ordering. - continue - else: - kind_map[entry.venv_path] = entry.link_to_path + + # We overwrite duplicates by design. The dependency closer to the + # binary gets precedence due to the topological ordering. + # + # This allows us to store only one version of the dist-info that is needed + kind_map[entry.key or entry.venv_path] = (entry.venv_path, entry.link_to_path) # An empty link_to value means to not create the site package symlink. # Because of the topological ordering, this allows binaries to remove @@ -710,7 +710,7 @@ def _build_link_map(entries): for _ in range(len(kind_map)): if not kind_map: break - dirname, value = kind_map.popitem() + _, (dirname, value) = kind_map.popitem() keep_kind_map[dirname] = value prefix = dirname + "/" # Add slash to prevent /X matching /XY for maybe_suffix in kind_map.keys(): diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl index bf71ee557a..e3899d5835 100644 --- a/python/private/py_info.bzl +++ b/python/private/py_info.bzl @@ -45,7 +45,6 @@ def _VenvSymlinkKind_typedef(): VenvSymlinkKind = struct( TYPEDEF = _VenvSymlinkKind_typedef, BIN = "BIN", - DISTINFO = "DISTINFO", LIB = "LIB", INCLUDE = "INCLUDE", ) @@ -57,6 +56,11 @@ VenvSymlinkEntry = provider( An entry in `PyInfo.venv_symlinks` """, fields = { + "key": """ +:type: str | None + +Represents the value that gets used to key entries so that each package is represented only once. +""", "kind": """ :type: str diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl index 3e237f571e..89c794f52b 100644 --- a/python/private/py_library.bzl +++ b/python/private/py_library.bzl @@ -41,6 +41,7 @@ load( "runfiles_root_path", ) load(":flags.bzl", "AddSrcsToRunfilesFlag", "PrecompileFlag", "VenvsSitePackages") +load(":normalize_name.bzl", "normalize_name") load(":precompile.bzl", "maybe_precompile") load(":py_cc_link_params_info.bzl", "PyCcLinkParamsInfo") load(":py_info.bzl", "PyInfo", "VenvSymlinkEntry", "VenvSymlinkKind") @@ -270,20 +271,21 @@ def _get_venv_symlinks(ctx, dist_info_metadata): repo_runfiles_dirname = None dirs_with_init = {} # dirname -> runfile path venv_symlinks = [] + package = "" if dist_info_metadata: # in order to be able to have replacements in the venv, we have to add a # third value into the venv_symlinks, which would be the normalized # package name. This allows us to ensure that we can replace the `dist-info` # directories by checking if the package key is there. dist_info_dir = paths.basename(dist_info_metadata.dirname) - #TODO remove - #package, _, _suffix = dist_info_dir.rpartition(".dist-info") - #package, _, _version = package.rpartition("-") - #symlink_key = "{}.dist-info".format(normalize_name(package)) + package, _, _suffix = dist_info_dir.rpartition(".dist-info") + package, _, _version = package.rpartition("-") + package = normalize_name(package) repo_runfiles_dirname = runfiles_root_path(ctx, dist_info_metadata.short_path).partition("/")[0] venv_symlinks.append(VenvSymlinkEntry( - kind = VenvSymlinkKind.DISTINFO, + key = "{}.dist-info".format(package), + kind = VenvSymlinkKind.LIB, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, dist_info_dir), venv_path = dist_info_dir, )) @@ -306,6 +308,7 @@ def _get_venv_symlinks(ctx, dist_info_metadata): # This would be files that do not have directories and we just need to add # direct symlinks to them as is: venv_symlinks.append(VenvSymlinkEntry( + key = None, kind = VenvSymlinkKind.LIB, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, filename), venv_path = filename, @@ -327,6 +330,7 @@ def _get_venv_symlinks(ctx, dist_info_metadata): for dirname in first_level_explicit_packages: venv_symlinks.append(VenvSymlinkEntry( + key = None, kind = VenvSymlinkKind.LIB, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, dirname), venv_path = dirname, diff --git a/tests/modules/other/simple_v1/site-packages/simple/__init__.py b/tests/modules/other/simple_v1/site-packages/simple/__init__.py index bb7b160deb..40d30392a8 100644 --- a/tests/modules/other/simple_v1/site-packages/simple/__init__.py +++ b/tests/modules/other/simple_v1/site-packages/simple/__init__.py @@ -1 +1,3 @@ # Intentionally empty + +__version__ = "1.0.0" diff --git a/tests/modules/other/simple_v2/site-packages/simple/__init__.py b/tests/modules/other/simple_v2/site-packages/simple/__init__.py index bb7b160deb..757693f1d4 100644 --- a/tests/modules/other/simple_v2/site-packages/simple/__init__.py +++ b/tests/modules/other/simple_v2/site-packages/simple/__init__.py @@ -1 +1,3 @@ # Intentionally empty + +__version__ = "2.0.0" diff --git a/tests/venv_site_packages_libs/bin.py b/tests/venv_site_packages_libs/bin.py index b2e1177ba0..24492f0e5e 100644 --- a/tests/venv_site_packages_libs/bin.py +++ b/tests/venv_site_packages_libs/bin.py @@ -38,6 +38,10 @@ def test_distinfo_is_overriden(self): dist_info_dirs = [p.name for p in site_packages.glob("*.dist-info")] + self.assertEqual( + "2.0.0", + module.__version__, + ) self.assertEqual( ["simple-2.0.0.dist-info"], dist_info_dirs, From 31e16ee9e373095b5560f437e9ac51b89bf42bfb Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 30 May 2025 17:39:09 +0900 Subject: [PATCH 5/7] correctly pop existing links for package overrides --- python/private/py_executable.bzl | 21 ++++++++++++++++--- python/private/py_info.bzl | 10 ++++----- python/private/py_library.bzl | 8 +++---- .../site-packages/simple/__init__.py | 2 -- .../site-packages/simple_extras/__init__.py | 1 + .../site-packages/simple/__init__.py | 2 -- tests/venv_site_packages_libs/bin.py | 16 +++++++++----- 7 files changed, 39 insertions(+), 21 deletions(-) create mode 100644 tests/modules/other/simple_v1/site-packages/simple_extras/__init__.py diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 13235b356a..461833584e 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -680,17 +680,32 @@ def _create_venv_symlinks(ctx, venv_dir_map): return venv_files def _build_link_map(entries): - # dict[str kind, dict[str key or rel_path, tuple[str rel_path, str link_to_path]]] + # dict[str kind, dict[str rel_path, str link_to_path]] link_map = {} + + # Here we store venv paths by package + # dict[str package, dict[str kind, list[str venv_path]]] + pkg_map = {} for entry in entries: kind = entry.kind kind_map = link_map.setdefault(kind, {}) + # If we detect that we are adding a dist-info for an already existing package + # we need to pop all of the previous symlinks from the link_map + if entry.venv_path.endswith(".dist-info") and entry.src in pkg_map: + # dist-info will come always first + for kind, dir_paths in pkg_map.pop(entry.src).items(): + for dir_path in dir_paths: + link_map[kind].pop(dir_path) + + pkg_venv_paths = pkg_map.setdefault(entry.src, {}).setdefault(entry.kind, []) + pkg_venv_paths.append(entry.venv_path) + # We overwrite duplicates by design. The dependency closer to the # binary gets precedence due to the topological ordering. # # This allows us to store only one version of the dist-info that is needed - kind_map[entry.key or entry.venv_path] = (entry.venv_path, entry.link_to_path) + kind_map[entry.venv_path] = entry.link_to_path # An empty link_to value means to not create the site package symlink. # Because of the topological ordering, this allows binaries to remove @@ -710,7 +725,7 @@ def _build_link_map(entries): for _ in range(len(kind_map)): if not kind_map: break - _, (dirname, value) = kind_map.popitem() + dirname, value = kind_map.popitem() keep_kind_map[dirname] = value prefix = dirname + "/" # Add slash to prevent /X matching /XY for maybe_suffix in kind_map.keys(): diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl index e3899d5835..eecfe96e95 100644 --- a/python/private/py_info.bzl +++ b/python/private/py_info.bzl @@ -56,11 +56,6 @@ VenvSymlinkEntry = provider( An entry in `PyInfo.venv_symlinks` """, fields = { - "key": """ -:type: str | None - -Represents the value that gets used to key entries so that each package is represented only once. -""", "kind": """ :type: str @@ -72,6 +67,11 @@ the venv to create the path under. A runfiles-root relative path that `venv_path` will symlink to. If `None`, it means to not create a symlink. +""", + "src": """ +:type: str | None + +Represents the PyPI package that the code originates from. """, "venv_path": """ :type: str diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl index 89c794f52b..0b6eed8a1b 100644 --- a/python/private/py_library.bzl +++ b/python/private/py_library.bzl @@ -271,7 +271,7 @@ def _get_venv_symlinks(ctx, dist_info_metadata): repo_runfiles_dirname = None dirs_with_init = {} # dirname -> runfile path venv_symlinks = [] - package = "" + package = None if dist_info_metadata: # in order to be able to have replacements in the venv, we have to add a # third value into the venv_symlinks, which would be the normalized @@ -284,9 +284,9 @@ def _get_venv_symlinks(ctx, dist_info_metadata): repo_runfiles_dirname = runfiles_root_path(ctx, dist_info_metadata.short_path).partition("/")[0] venv_symlinks.append(VenvSymlinkEntry( - key = "{}.dist-info".format(package), kind = VenvSymlinkKind.LIB, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, dist_info_dir), + src = package, venv_path = dist_info_dir, )) @@ -308,9 +308,9 @@ def _get_venv_symlinks(ctx, dist_info_metadata): # This would be files that do not have directories and we just need to add # direct symlinks to them as is: venv_symlinks.append(VenvSymlinkEntry( - key = None, kind = VenvSymlinkKind.LIB, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, filename), + src = package, venv_path = filename, )) @@ -330,9 +330,9 @@ def _get_venv_symlinks(ctx, dist_info_metadata): for dirname in first_level_explicit_packages: venv_symlinks.append(VenvSymlinkEntry( - key = None, kind = VenvSymlinkKind.LIB, link_to_path = paths.join(repo_runfiles_dirname, site_packages_root, dirname), + src = package, venv_path = dirname, )) return venv_symlinks diff --git a/tests/modules/other/simple_v1/site-packages/simple/__init__.py b/tests/modules/other/simple_v1/site-packages/simple/__init__.py index 40d30392a8..5becc17c04 100644 --- a/tests/modules/other/simple_v1/site-packages/simple/__init__.py +++ b/tests/modules/other/simple_v1/site-packages/simple/__init__.py @@ -1,3 +1 @@ -# Intentionally empty - __version__ = "1.0.0" diff --git a/tests/modules/other/simple_v1/site-packages/simple_extras/__init__.py b/tests/modules/other/simple_v1/site-packages/simple_extras/__init__.py new file mode 100644 index 0000000000..bb7b160deb --- /dev/null +++ b/tests/modules/other/simple_v1/site-packages/simple_extras/__init__.py @@ -0,0 +1 @@ +# Intentionally empty diff --git a/tests/modules/other/simple_v2/site-packages/simple/__init__.py b/tests/modules/other/simple_v2/site-packages/simple/__init__.py index 757693f1d4..8c0d5d5bb2 100644 --- a/tests/modules/other/simple_v2/site-packages/simple/__init__.py +++ b/tests/modules/other/simple_v2/site-packages/simple/__init__.py @@ -1,3 +1 @@ -# Intentionally empty - __version__ = "2.0.0" diff --git a/tests/venv_site_packages_libs/bin.py b/tests/venv_site_packages_libs/bin.py index 24492f0e5e..18a2fb76c5 100644 --- a/tests/venv_site_packages_libs/bin.py +++ b/tests/venv_site_packages_libs/bin.py @@ -33,20 +33,26 @@ def test_imported_from_venv(self): def test_distinfo_is_overriden(self): self.assert_imported_from_venv("simple") module = importlib.import_module("simple") - module_path = Path(module.__file__) - site_packages = module_path.parent.parent - - dist_info_dirs = [p.name for p in site_packages.glob("*.dist-info")] - self.assertEqual( "2.0.0", module.__version__, ) + module_path = Path(module.__file__) + + site_packages = module_path.parent.parent + dist_info_dirs = [p.name for p in site_packages.glob("*.dist-info")] self.assertEqual( ["simple-2.0.0.dist-info"], dist_info_dirs, ) + # Ensure that packages from simple v1 are not present + files = [p.name for p in site_packages.glob("*")] + self.assertNotIn( + "simple_extras", + files, + ) + if __name__ == "__main__": unittest.main() From e0affecdb46a952bfd759ba68b6ec139c41bb74c Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 30 May 2025 17:57:20 +0900 Subject: [PATCH 6/7] finish impl --- python/private/py_library.bzl | 14 +++++--- tests/modules/other/simple_v2/BUILD.bazel | 1 + .../site-packages/simple.libs/data.txt | 1 + .../site-packages/simple/__init__.pyi | 1 + tests/venv_site_packages_libs/bin.py | 32 +++++++++++++++---- 5 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 tests/modules/other/simple_v2/site-packages/simple.libs/data.txt create mode 100644 tests/modules/other/simple_v2/site-packages/simple/__init__.pyi diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl index 0b6eed8a1b..58ac9cdd86 100644 --- a/python/private/py_library.bzl +++ b/python/private/py_library.bzl @@ -290,16 +290,22 @@ def _get_venv_symlinks(ctx, dist_info_metadata): venv_path = dist_info_dir, )) - for src in ctx.files.srcs: - if src.extension not in PYTHON_FILE_EXTENSIONS: - continue + for src in ctx.files.srcs + ctx.files.data: path = _repo_relative_short_path(src.short_path) if not path.startswith(site_packages_root): continue path = path.removeprefix(site_packages_root) dir_name, _, filename = path.rpartition("/") - if dir_name and filename.startswith("__init__."): + if src.extension not in PYTHON_FILE_EXTENSIONS: + if dir_name.endswith(".dist-info"): + # we have already handled the stuff + pass + elif dir_name: + # TODO @aignas 2025-05-30: is this the right way? + dirs_with_init[dir_name] = None + repo_runfiles_dirname = runfiles_root_path(ctx, src.short_path).partition("/")[0] + elif dir_name and filename.startswith("__init__."): dirs_with_init[dir_name] = None repo_runfiles_dirname = runfiles_root_path(ctx, src.short_path).partition("/")[0] elif not dir_name: diff --git a/tests/modules/other/simple_v2/BUILD.bazel b/tests/modules/other/simple_v2/BUILD.bazel index 7648827383..45f83a5a88 100644 --- a/tests/modules/other/simple_v2/BUILD.bazel +++ b/tests/modules/other/simple_v2/BUILD.bazel @@ -11,4 +11,5 @@ py_library( ), experimental_venvs_site_packages = "@rules_python//python/config_settings:venvs_site_packages", imports = [package_name() + "/site-packages"], + pyi_srcs = glob(["**/*.pyi"]), ) diff --git a/tests/modules/other/simple_v2/site-packages/simple.libs/data.txt b/tests/modules/other/simple_v2/site-packages/simple.libs/data.txt new file mode 100644 index 0000000000..f8a95b35d3 --- /dev/null +++ b/tests/modules/other/simple_v2/site-packages/simple.libs/data.txt @@ -0,0 +1 @@ +Intentionally empty diff --git a/tests/modules/other/simple_v2/site-packages/simple/__init__.pyi b/tests/modules/other/simple_v2/site-packages/simple/__init__.pyi new file mode 100644 index 0000000000..bb7b160deb --- /dev/null +++ b/tests/modules/other/simple_v2/site-packages/simple/__init__.pyi @@ -0,0 +1 @@ +# Intentionally empty diff --git a/tests/venv_site_packages_libs/bin.py b/tests/venv_site_packages_libs/bin.py index 18a2fb76c5..5afeaadc71 100644 --- a/tests/venv_site_packages_libs/bin.py +++ b/tests/venv_site_packages_libs/bin.py @@ -1,5 +1,4 @@ import importlib -import os import sys import unittest from pathlib import Path @@ -30,13 +29,37 @@ def test_imported_from_venv(self): self.assert_imported_from_venv("single_file") self.assert_imported_from_venv("simple") - def test_distinfo_is_overriden(self): + def test_pyi_is_included(self): + self.assert_imported_from_venv("simple") + module = importlib.import_module("simple") + module_path = Path(module.__file__) + + # this has been not included through data but through `pyi_srcs` + pyi_files = [p.name for p in module_path.parent.glob("*.pyi")] + self.assertIn("__init__.pyi", pyi_files) + + def test_data_is_included(self): + self.assert_imported_from_venv("simple") + module = importlib.import_module("simple") + module_path = Path(module.__file__) + + site_packages = module_path.parent.parent + + # Ensure that packages from simple v1 are not present + files = [p.name for p in site_packages.glob("*")] + self.assertIn("simple.libs", files) + + def test_override_pkg(self): self.assert_imported_from_venv("simple") module = importlib.import_module("simple") self.assertEqual( "2.0.0", module.__version__, ) + + def test_dirs_from_replaced_package_are_not_present(self): + self.assert_imported_from_venv("simple") + module = importlib.import_module("simple") module_path = Path(module.__file__) site_packages = module_path.parent.parent @@ -48,10 +71,7 @@ def test_distinfo_is_overriden(self): # Ensure that packages from simple v1 are not present files = [p.name for p in site_packages.glob("*")] - self.assertNotIn( - "simple_extras", - files, - ) + self.assertNotIn("simple_extras", files) if __name__ == "__main__": From c844aa07fcb474811f81c2731da04b5b5e5d3f64 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 30 May 2025 18:00:49 +0900 Subject: [PATCH 7/7] cleanup stale docs --- python/private/common.bzl | 3 +-- python/private/py_info.bzl | 16 ---------------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/python/private/common.bzl b/python/private/common.bzl index 87a8d0bea0..020e445a5b 100644 --- a/python/private/common.bzl +++ b/python/private/common.bzl @@ -396,8 +396,7 @@ def create_py_info( implicit_pyc_files: {type}`depset[File]` Implicitly generated pyc files that a binary can choose to include. imports: depset of strings; the import path values to propagate. - venv_symlinks: {type}`list[tuple[str, str, str]]` tuples of - `(normalized_package_name, runfiles_path, site_packages_path)` for + venv_symlinks: {type}`list[VenvSymlinkEntry]` instances for symlinks to create in the consuming binary's venv. Returns: diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl index eecfe96e95..065f876973 100644 --- a/python/private/py_info.bzl +++ b/python/private/py_info.bzl @@ -303,22 +303,6 @@ This field is currently unused in Bazel and may go away in the future. A depset with `topological` ordering. - -Tuples of `(runfiles_path, site_packages_path)`. Where -* `runfiles_path` is a runfiles-root relative path. It is the path that - has the code to make importable. If `None` or empty string, then it means - to not create a site packages directory with the `site_packages_path` - name. -* `site_packages_path` is a path relative to the site-packages directory of - the venv for whatever creates the venv (typically py_binary). It makes - the code in `runfiles_path` available for import. Note that this - is created as a "raw" symlink (via `declare_symlink`). -* `symlink_key` is a path which should be checked instead of the - `site_packages_path`. This is typically used for dist-info directories as - they include version in the filenames and we want to ensure that we can - replace the whole package and not have orphan .dist-info packages in the - virtual environment. - :::{include} /_includes/experimental_api.md :::