@@ -124,7 +124,7 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]:
124
124
return sorted ([line .split ()[- 1 ] for line in git_check_ignore_lines if line .startswith ("::" )])
125
125
126
126
127
- def fetch_package_metadata_from_pypi (project : str , version : str , filename = None ) -> tuple [str , str ]| None :
127
+ def fetch_package_metadata_from_pypi (project : str , version : str , filename : str | None = None ) -> tuple [str , str ] | None :
128
128
"""
129
129
Fetches the SHA256 checksum and download location from PyPI.
130
130
If we're given a filename then we match with that, otherwise we use wheels.
@@ -138,7 +138,12 @@ def fetch_package_metadata_from_pypi(project: str, version: str, filename=None)
138
138
# Look for a matching artifact filename and then check
139
139
# its remote checksum to the local one.
140
140
for url in release_metadata ["urls" ]:
141
- if filename is None or (filename is not None and url ["filename" ] == filename ):
141
+ # Pip can only use Python-only dependencies, so there's
142
+ # no risk of picking the 'incorrect' wheel here.
143
+ if (
144
+ (filename is None and url ["packagetype" ] == "bdist_wheel" )
145
+ or (filename is not None and url ["filename" ] == filename )
146
+ ):
142
147
break
143
148
else :
144
149
raise ValueError (f"No matching filename on PyPI for '{ filename } '" )
@@ -221,6 +226,7 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
221
226
error_if (match is None , f"Couldn't parse line from pip vendor.txt: '{ line } '" )
222
227
assert match is not None # Make mypy happy.
223
228
229
+ # Parse out and normalize the project name.
224
230
project_name , project_version = match .groups ()
225
231
project_name = project_name .lower ()
226
232
@@ -239,6 +245,9 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
239
245
for package in sbom_data ["packages" ]:
240
246
if package ["SPDXID" ] != sbom_project_spdx_id :
241
247
continue
248
+
249
+ # Only thing missing from this blob is the `licenseConcluded`,
250
+ # that needs to be triaged by human maintainers if the list changes.
242
251
package .update ({
243
252
"SPDXID" : sbom_project_spdx_id ,
244
253
"name" : project_name ,
@@ -268,15 +277,6 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
268
277
for sbom_package in sbom_data ["packages" ]
269
278
if sbom_package ["name" ] != "pip"
270
279
]
271
- sbom_data ["relationships" ] = [
272
- sbom_relationship
273
- for sbom_relationship in sbom_data ["relationships" ]
274
- if (
275
- sbom_relationship ["spdxElementId" ] == sbom_pip_spdx_id
276
- and sbom_relationship ["relationshipType" ] == "DEPENDS_ON"
277
- )
278
- ]
279
-
280
280
sbom_data ["packages" ].append (
281
281
{
282
282
"SPDXID" : sbom_pip_spdx_id ,
@@ -315,6 +315,11 @@ def main() -> None:
315
315
sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json"
316
316
sbom_data = json .loads (sbom_path .read_bytes ())
317
317
318
+ # We regenerate all of this information. Package information
319
+ # should be preserved though since that is edited by humans.
320
+ sbom_data ["files" ] = []
321
+ sbom_data ["relationships" ] = []
322
+
318
323
# Insert pip's SBOM metadata from the wheel.
319
324
discover_pip_sbom_package (sbom_data )
320
325
@@ -331,9 +336,10 @@ def main() -> None:
331
336
"name" not in package ,
332
337
"Package is missing the 'name' field"
333
338
)
339
+ missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set (package .keys ())
334
340
error_if (
335
- not set ( package . keys ()). issuperset ( REQUIRED_PROPERTIES_PACKAGE ),
336
- f"Package '{ package ['name' ]} ' is missing required fields" ,
341
+ bool ( missing_required_keys ),
342
+ f"Package '{ package ['name' ]} ' is missing required fields: { missing_required_keys } " ,
337
343
)
338
344
error_if (
339
345
package ["SPDXID" ] != spdx_id (f"SPDXRef-PACKAGE-{ package ['name' ]} " ),
@@ -361,10 +367,6 @@ def main() -> None:
361
367
f"License identifier '{ license_concluded } ' not in SBOM tool allowlist"
362
368
)
363
369
364
- # Regenerate file information from current data.
365
- sbom_files = []
366
- sbom_relationships = []
367
-
368
370
# We call 'sorted()' here a lot to avoid filesystem scan order issues.
369
371
for name , files in sorted (PACKAGE_TO_FILES .items ()):
370
372
package_spdx_id = spdx_id (f"SPDXRef-PACKAGE-{ name } " )
@@ -389,7 +391,7 @@ def main() -> None:
389
391
checksum_sha256 = hashlib .sha256 (data ).hexdigest ()
390
392
391
393
file_spdx_id = spdx_id (f"SPDXRef-FILE-{ path } " )
392
- sbom_files .append ({
394
+ sbom_data [ "files" ] .append ({
393
395
"SPDXID" : file_spdx_id ,
394
396
"fileName" : path ,
395
397
"checksums" : [
@@ -399,15 +401,13 @@ def main() -> None:
399
401
})
400
402
401
403
# Tie each file back to its respective package.
402
- sbom_relationships .append ({
404
+ sbom_data [ "relationships" ] .append ({
403
405
"spdxElementId" : package_spdx_id ,
404
406
"relatedSpdxElement" : file_spdx_id ,
405
407
"relationshipType" : "CONTAINS" ,
406
408
})
407
409
408
410
# Update the SBOM on disk
409
- sbom_data ["files" ] = sbom_files
410
- sbom_data ["relationships" ] = sbom_relationships
411
411
sbom_path .write_text (json .dumps (sbom_data , indent = 2 , sort_keys = True ))
412
412
413
413
0 commit comments