Skip to content

Commit b50fe65

Browse files
committed
Add relationships for pip's vendored dependencies
1 parent 884b9b0 commit b50fe65

File tree

2 files changed

+141
-21
lines changed

2 files changed

+141
-21
lines changed

Misc/sbom.spdx.json

Lines changed: 120 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Tools/build/generate_sbom.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]:
124124
return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")])
125125

126126

127-
def fetch_package_metadata_from_pypi(project: str, version: str, filename=None) -> tuple[str, str]|None:
127+
def fetch_package_metadata_from_pypi(project: str, version: str, filename: str | None = None) -> tuple[str, str] | None:
128128
"""
129129
Fetches the SHA256 checksum and download location from PyPI.
130130
If we're given a filename then we match with that, otherwise we use wheels.
@@ -138,7 +138,12 @@ def fetch_package_metadata_from_pypi(project: str, version: str, filename=None)
138138
# Look for a matching artifact filename and then check
139139
# its remote checksum to the local one.
140140
for url in release_metadata["urls"]:
141-
if filename is None or (filename is not None and url["filename"] == filename):
141+
# Pip can only use Python-only dependencies, so there's
142+
# no risk of picking the 'incorrect' wheel here.
143+
if (
144+
(filename is None and url["packagetype"] == "bdist_wheel")
145+
or (filename is not None and url["filename"] == filename)
146+
):
142147
break
143148
else:
144149
raise ValueError(f"No matching filename on PyPI for '{filename}'")
@@ -221,6 +226,7 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
221226
error_if(match is None, f"Couldn't parse line from pip vendor.txt: '{line}'")
222227
assert match is not None # Make mypy happy.
223228

229+
# Parse out and normalize the project name.
224230
project_name, project_version = match.groups()
225231
project_name = project_name.lower()
226232

@@ -239,6 +245,9 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
239245
for package in sbom_data["packages"]:
240246
if package["SPDXID"] != sbom_project_spdx_id:
241247
continue
248+
249+
# Only thing missing from this blob is the `licenseConcluded`,
250+
# that needs to be triaged by human maintainers if the list changes.
242251
package.update({
243252
"SPDXID": sbom_project_spdx_id,
244253
"name": project_name,
@@ -268,15 +277,6 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
268277
for sbom_package in sbom_data["packages"]
269278
if sbom_package["name"] != "pip"
270279
]
271-
sbom_data["relationships"] = [
272-
sbom_relationship
273-
for sbom_relationship in sbom_data["relationships"]
274-
if (
275-
sbom_relationship["spdxElementId"] == sbom_pip_spdx_id
276-
and sbom_relationship["relationshipType"] == "DEPENDS_ON"
277-
)
278-
]
279-
280280
sbom_data["packages"].append(
281281
{
282282
"SPDXID": sbom_pip_spdx_id,
@@ -315,6 +315,11 @@ def main() -> None:
315315
sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json"
316316
sbom_data = json.loads(sbom_path.read_bytes())
317317

318+
# We regenerate all of this information. Package information
319+
# should be preserved though since that is edited by humans.
320+
sbom_data["files"] = []
321+
sbom_data["relationships"] = []
322+
318323
# Insert pip's SBOM metadata from the wheel.
319324
discover_pip_sbom_package(sbom_data)
320325

@@ -331,9 +336,10 @@ def main() -> None:
331336
"name" not in package,
332337
"Package is missing the 'name' field"
333338
)
339+
missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set(package.keys())
334340
error_if(
335-
not set(package.keys()).issuperset(REQUIRED_PROPERTIES_PACKAGE),
336-
f"Package '{package['name']}' is missing required fields",
341+
bool(missing_required_keys),
342+
f"Package '{package['name']}' is missing required fields: {missing_required_keys}",
337343
)
338344
error_if(
339345
package["SPDXID"] != spdx_id(f"SPDXRef-PACKAGE-{package['name']}"),
@@ -361,10 +367,6 @@ def main() -> None:
361367
f"License identifier '{license_concluded}' not in SBOM tool allowlist"
362368
)
363369

364-
# Regenerate file information from current data.
365-
sbom_files = []
366-
sbom_relationships = []
367-
368370
# We call 'sorted()' here a lot to avoid filesystem scan order issues.
369371
for name, files in sorted(PACKAGE_TO_FILES.items()):
370372
package_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{name}")
@@ -389,7 +391,7 @@ def main() -> None:
389391
checksum_sha256 = hashlib.sha256(data).hexdigest()
390392

391393
file_spdx_id = spdx_id(f"SPDXRef-FILE-{path}")
392-
sbom_files.append({
394+
sbom_data["files"].append({
393395
"SPDXID": file_spdx_id,
394396
"fileName": path,
395397
"checksums": [
@@ -399,15 +401,13 @@ def main() -> None:
399401
})
400402

401403
# Tie each file back to its respective package.
402-
sbom_relationships.append({
404+
sbom_data["relationships"].append({
403405
"spdxElementId": package_spdx_id,
404406
"relatedSpdxElement": file_spdx_id,
405407
"relationshipType": "CONTAINS",
406408
})
407409

408410
# Update the SBOM on disk
409-
sbom_data["files"] = sbom_files
410-
sbom_data["relationships"] = sbom_relationships
411411
sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True))
412412

413413

0 commit comments

Comments
 (0)