Skip to content

Commit 9dd7589

Browse files
committed
wip: add symlinks for .dist-info folders in the venv
This adds the necessary `.dist-info` files into the mix and should get us reasonably close to handling 99% of the cases. The expected differences from this and a `venv` built by `uv` would be: * Shared libraries are stored in `<package>.libs` in `uv` venvs. This can be achieved in `rules_python` by changing the `installer` settings in the `wheel_installer/wheel.py#unzip` function. * The `RECORD` files are excluded from the `venv`s for better cache hit rate in `bazel`, however I am not sure if we should do that for actual wheels that are downloaded from the internet. Tested: - [x] Building the `//docs` and manually checking the symlinks. - [ ] Unit tests Work towards #2156
1 parent 3464c14 commit 9dd7589

File tree

5 files changed

+58
-8
lines changed

5 files changed

+58
-8
lines changed

python/private/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ bzl_library(
444444
":attributes_bzl",
445445
":common_bzl",
446446
":flags_bzl",
447+
":normalize_name_bzl",
447448
":precompile_bzl",
448449
":py_cc_link_params_info_bzl",
449450
":py_internal_bzl",

python/private/common.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,8 @@ def create_py_info(
396396
implicit_pyc_files: {type}`depset[File]` Implicitly generated pyc files
397397
that a binary can choose to include.
398398
imports: depset of strings; the import path values to propagate.
399-
site_packages_symlinks: {type}`list[tuple[str, str]]` tuples of
400-
`(runfiles_path, site_packages_path)` for symlinks to create
399+
site_packages_symlinks: {type}`list[tuple[str, str, str]]` tuples of
400+
`(normalized_package_name, runfiles_path, site_packages_path)` for symlinks to create
401401
in the consuming binary's venv site packages.
402402
403403
Returns:

python/private/py_executable.bzl

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -671,11 +671,19 @@ def _create_site_packages_symlinks(ctx, site_packages):
671671

672672
def _build_link_map(entries):
673673
link_map = {}
674-
for link_to_runfiles_path, site_packages_path in entries:
675-
if site_packages_path in link_map:
674+
package_map = {}
675+
for link_to_runfiles_path, site_packages_path, package in entries:
676+
if package and package in package_map:
676677
# We ignore duplicates by design. The dependency closer to the
677678
# binary gets precedence due to the topological ordering.
678679
continue
680+
elif site_packages_path in link_map:
681+
# The packages are not duplicate, but have overlapping paths, for
682+
# now we ignore these.
683+
continue
684+
elif package:
685+
package_map[package] = site_packages_path
686+
link_map[site_packages_path] = link_to_runfiles_path
679687
else:
680688
link_map[site_packages_path] = link_to_runfiles_path
681689

python/private/py_info.bzl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,11 @@ The order of the depset is not guaranteed and may be changed in the future. It
148148
is recommended to use `default` order (the default).
149149
""",
150150
"site_packages_symlinks": """
151-
:type: depset[tuple[str | None, str]]
151+
:type: depset[tuple[str | None, str, str | None]]
152152
153153
A depset with `topological` ordering.
154154
155-
Tuples of `(runfiles_path, site_packages_path)`. Where
155+
Tuples of `(runfiles_path, site_packages_path, symlink_key)`. Where
156156
* `runfiles_path` is a runfiles-root relative path. It is the path that
157157
has the code to make importable. If `None` or empty string, then it means
158158
to not create a site packages directory with the `site_packages_path`
@@ -161,6 +161,11 @@ Tuples of `(runfiles_path, site_packages_path)`. Where
161161
the venv for whatever creates the venv (typically py_binary). It makes
162162
the code in `runfiles_path` available for import. Note that this
163163
is created as a "raw" symlink (via `declare_symlink`).
164+
* `symlink_key` is a path which should be checked instead of the
165+
`site_packages_path`. This is typically used for dist-info directories as
166+
they include version in the filenames and we want to ensure that we can
167+
replace the whole package and not have orphan .dist-info packages in the
168+
virtual environment.
164169
165170
:::{include} /_includes/experimental_api.md
166171
:::

python/private/py_library.bzl

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ load(
4141
"runfiles_root_path",
4242
)
4343
load(":flags.bzl", "AddSrcsToRunfilesFlag", "PrecompileFlag", "VenvsSitePackages")
44+
load(":normalize_name.bzl", "normalize_name")
4445
load(":precompile.bzl", "maybe_precompile")
4546
load(":py_cc_link_params_info.bzl", "PyCcLinkParamsInfo")
4647
load(":py_info.bzl", "PyInfo")
@@ -206,16 +207,32 @@ Source files are no longer added to the runfiles directly.
206207
:::
207208
"""
208209

210+
def _get_distinfo_metadata(ctx):
211+
data = ctx.files.data or []
212+
for d in data:
213+
# work on case insensitive FSes
214+
if d.basename.lower() != "metadata":
215+
continue
216+
217+
if d.dirname.endswith(".dist-info"):
218+
return d
219+
220+
return None
221+
209222
def _get_imports_and_site_packages_symlinks(ctx, semantics):
210223
imports = depset()
211224
site_packages_symlinks = depset()
212225
if VenvsSitePackages.is_enabled(ctx):
213-
site_packages_symlinks = _get_site_packages_symlinks(ctx)
226+
dist_info_metadata = _get_distinfo_metadata(ctx)
227+
site_packages_symlinks = _get_site_packages_symlinks(
228+
ctx,
229+
dist_info_metadata,
230+
)
214231
else:
215232
imports = collect_imports(ctx, semantics)
216233
return imports, site_packages_symlinks
217234

218-
def _get_site_packages_symlinks(ctx):
235+
def _get_site_packages_symlinks(ctx, dist_info_metadata):
219236
imports = ctx.attr.imports
220237
if len(imports) == 0:
221238
fail("When venvs_site_packages is enabled, exactly one `imports` " +
@@ -254,6 +271,23 @@ def _get_site_packages_symlinks(ctx):
254271
repo_runfiles_dirname = None
255272
dirs_with_init = {} # dirname -> runfile path
256273
site_packages_symlinks = []
274+
if dist_info_metadata:
275+
# in order to be able to have replacements in the venv, we have to add a
276+
# third value into the site_packages_symlinks, which would be the normalized
277+
# package name. This allows us to ensure that we can replace the `dist-info`
278+
# directories by checking if the package key is there.
279+
dist_info_dir = paths.basename(dist_info_metadata.dirname)
280+
package, _, _suffix = dist_info_dir.rpartition(".dist-info")
281+
package, _, _version = package.rpartition("-")
282+
symlink_key = "{}.dist-info".format(normalize_name(package))
283+
284+
repo_runfiles_dirname = runfiles_root_path(ctx, dist_info_metadata.short_path).partition("/")[0]
285+
site_packages_symlinks.append((
286+
paths.join(repo_runfiles_dirname, site_packages_root, dist_info_dir),
287+
dist_info_dir,
288+
symlink_key,
289+
))
290+
257291
for src in ctx.files.srcs:
258292
if src.extension not in PYTHON_FILE_EXTENSIONS:
259293
continue
@@ -274,6 +308,7 @@ def _get_site_packages_symlinks(ctx):
274308
site_packages_symlinks.append((
275309
paths.join(repo_runfiles_dirname, site_packages_root, filename),
276310
filename,
311+
None,
277312
))
278313

279314
# Sort so that we encounter `foo` before `foo/bar`. This ensures we
@@ -294,6 +329,7 @@ def _get_site_packages_symlinks(ctx):
294329
site_packages_symlinks.append((
295330
paths.join(repo_runfiles_dirname, site_packages_root, dirname),
296331
dirname,
332+
None,
297333
))
298334
return site_packages_symlinks
299335

0 commit comments

Comments
 (0)