99import os
1010import sys
1111import uuid
12+ from datetime import datetime
1213from io import BytesIO
1314from io import StringIO
1415
15- from spdx .checksum import Checksum
16- from spdx .checksum import ChecksumAlgorithm
17- from spdx .creationinfo import Tool
18- from spdx .document import ExtractedLicense
19- from spdx .document import Document
20- from spdx .license import License
21- from spdx .file import File
22- from spdx .package import Package
23- from spdx .relationship import Relationship
24- from spdx .utils import calc_verif_code
25- from spdx .utils import NoAssert
26- from spdx .utils import SPDXNone
27- from spdx .version import Version
2816
2917from license_expression import Licensing
3018from commoncode .cliutils import OUTPUT_GROUP
3119from commoncode .cliutils import PluggableCommandLineOption
3220from commoncode .fileutils import file_name
3321from commoncode .fileutils import parent_directory
3422from commoncode .text import python_safe_name
23+ from spdx_tools .spdx .model import SpdxNoAssertion
24+ from spdx_tools .spdx .model import Version
25+ from spdx_tools .spdx .model import CreationInfo
26+ from spdx_tools .spdx .model import Actor
27+ from spdx_tools .spdx .model import ActorType
28+ from spdx_tools .spdx .model import Document
29+ from spdx_tools .spdx .model import Package
30+ from spdx_tools .spdx .model import File
31+ from spdx_tools .spdx .model import Checksum
32+ from spdx_tools .spdx .model import ChecksumAlgorithm
33+ from spdx_tools .spdx .model import ExtractedLicensingInfo
34+ from spdx_tools .spdx .model import SpdxNone
35+ from spdx_tools .spdx .model import Relationship
36+ from spdx_tools .spdx .model import RelationshipType
37+ from spdx_tools .spdx .spdx_element_utils import calculate_package_verification_code
38+
3539from formattedcode import FileOptionType
3640from licensedcode .detection import get_matches_from_detection_mappings
3741from plugincode .output import output_impl
@@ -63,50 +67,6 @@ def logger_debug(*args):
6367Output plugins to write scan results in SPDX format.
6468"""
6569
66- _spdx_list_is_patched = False
67-
68-
69- def _patch_license_list ():
70- """
71- Patch the SPDX Python library license list to match the list of ScanCode
72- known SPDX licenses.
73- """
74- global _spdx_list_is_patched
75- if not _spdx_list_is_patched :
76- from spdx .config import LICENSE_MAP
77- from licensedcode .models import load_licenses
78- licenses = load_licenses (with_deprecated = True )
79- spdx_licenses = get_licenses_by_spdx_key (licenses .values ())
80- LICENSE_MAP .update (spdx_licenses )
81- _spdx_list_is_patched = True
82-
83-
84- def get_licenses_by_spdx_key (licenses ):
85- """
86- Return a mapping of {spdx_key: license object} given a ``license`` sequence
87- of License objects.
88- """
89- spdx_licenses = {}
90- for lic in licenses :
91- if not (lic .spdx_license_key or lic .other_spdx_license_keys ):
92- continue
93-
94- if lic .spdx_license_key :
95- name = lic .name
96- slk = lic .spdx_license_key
97- spdx_licenses [slk ] = name
98- spdx_licenses [name ] = slk
99-
100- for other_spdx in lic .other_spdx_license_keys :
101- if not (other_spdx and other_spdx .strip ()):
102- continue
103- slk = other_spdx
104- spdx_licenses [slk ] = name
105- spdx_licenses [name ] = slk
106-
107- return spdx_licenses
108-
109-
11070@output_impl
11171class SpdxTvOutput (OutputPlugin ):
11272
@@ -220,7 +180,7 @@ def write_spdx(
220180 tool_version ,
221181 notice ,
222182 package_name = '' ,
223- download_location = NoAssert (),
183+ download_location = SpdxNoAssertion (),
224184 as_tagvalue = True ,
225185 spdx_version = (2 , 2 ),
226186 with_notice_text = False ,
@@ -240,37 +200,43 @@ def write_spdx(
240200 licensing = Licensing ()
241201
242202 as_rdf = not as_tagvalue
243- _patch_license_list ()
244203
245204 ns_prefix = '_' .join (package_name .lower ().split ())
246205 comment = notice + f'\n SPDX License List: { scancode_config .spdx_license_list_version } '
247206
248207 version_major , version_minor = scancode_config .spdx_license_list_version .split ("." )
249208 spdx_license_list_version = Version (major = version_major , minor = version_minor )
250209
251- doc = Document (
252- version = Version (* spdx_version ),
253- data_license = License .from_identifier ('CC0-1.0' ),
254- comment = notice ,
255- namespace = f'http://spdx.org/spdxdocs/{ ns_prefix } -{ uuid .uuid4 ()} ' ,
256- license_list_version = scancode_config .spdx_license_list_version ,
257- name = 'SPDX Document created by ScanCode Toolkit'
210+ tool_name = tool_name or 'ScanCode'
211+ creator = Actor (ActorType .TOOL , f'{ tool_name } { tool_version } ' )
212+
213+ creation_info = CreationInfo (
214+ spdx_id = "SPDXRef-DOCUMENT" ,
215+ spdx_version = f"SPDX-{ spdx_version [0 ]} .{ spdx_version [1 ]} " ,
216+ data_license = 'CC0-1.0' ,
217+ document_comment = comment ,
218+ document_namespace = f'http://spdx.org/spdxdocs/{ ns_prefix } -{ uuid .uuid4 ()} ' ,
219+ license_list_version = spdx_license_list_version ,
220+ name = 'SPDX Document created by ScanCode Toolkit' ,
221+ creators = [creator ],
222+ created = datetime .now (),
258223 )
259224
260- tool_name = tool_name or 'ScanCode'
261- doc .creation_info .add_creator (Tool (f'{ tool_name } { tool_version } ' ))
262- doc .creation_info .set_created_now ()
263- doc .creation_info .license_list_version = spdx_license_list_version
264225
265226 package_id = '001'
266- package = doc . package = Package (
227+ package = Package (
267228 name = package_name ,
268229 download_location = download_location ,
269230 spdx_id = f'SPDXRef-{ package_id } ' ,
270231 )
271232
233+ doc = Document (
234+ creation_info = creation_info ,
235+ packages = [package ],
236+ )
237+
272238 # Use a set of unique copyrights for the package.
273- package . cr_text = set ()
239+ package_copyright_texts = set ()
274240
275241 all_files_have_no_license = True
276242 all_files_have_no_copyright = True
@@ -285,13 +251,18 @@ def write_spdx(
285251 # Set a relative file name as that is what we want in
286252 # SPDX output (with explicit leading './').
287253 name = './' + file_data .get ('path' )
288- file_entry = File (
289- spdx_id = f'SPDXRef-{ sid } ' ,
290- name = name )
254+
291255 if file_data .get ('file_type' ) == 'empty' :
292- file_entry . set_checksum ( Checksum (ChecksumAlgorithm .SHA1 , "da39a3ee5e6b4b0d3255bfef95601890afd80709" ) )
256+ checksum = Checksum (ChecksumAlgorithm .SHA1 , "da39a3ee5e6b4b0d3255bfef95601890afd80709" )
293257 else :
294- file_entry .set_checksum (Checksum (ChecksumAlgorithm .SHA1 , file_data .get ('sha1' ) or '' ))
258+ # FIXME: this sets the checksum of a file to the empty string hash if unknown; tracked in https://github.com/nexB/scancode-toolkit/issues/3453
259+ checksum = Checksum (ChecksumAlgorithm .SHA1 , file_data .get ('sha1' ) or 'da39a3ee5e6b4b0d3255bfef95601890afd80709' )
260+
261+ file_entry = File (
262+ spdx_id = f'SPDXRef-{ sid } ' ,
263+ name = name ,
264+ checksums = [checksum ]
265+ )
295266
296267 file_license_detections = file_data .get ('license_detections' )
297268 license_matches = get_matches_from_detection_mappings (file_license_detections )
@@ -312,63 +283,63 @@ def write_spdx(
312283 spdx_id = f'LicenseRef-scancode-{ license_key } '
313284 is_license_ref = spdx_id .lower ().startswith ('licenseref-' )
314285
315- if not is_license_ref :
316- spdx_license = License .from_identifier (spdx_id )
317- else :
318- spdx_license = ExtractedLicense (spdx_id )
319- spdx_license .name = file_license .short_name
286+ spdx_license = licensing .parse (spdx_id )
287+
288+ if is_license_ref :
289+ text = match .get ('matched_text' )
320290 # FIXME: replace this with the licensedb URL
321291 comment = (
322292 f'See details at https://github.com/nexB/scancode-toolkit'
323- f'/blob/develop/src/licensedcode/data/licenses/{ license_key } .yml \n '
293+ f'/blob/develop/src/licensedcode/data/licenses/{ license_key } .LICENSE \n '
324294 )
325- spdx_license .comment = comment
326- text = match .get ('matched_text' )
327- # always set some text, even if we did not extract the
328- # matched text
329- if not text :
330- text = comment
331- spdx_license .text = text
332- doc .add_extr_lic (spdx_license )
295+ extracted_license = ExtractedLicensingInfo (
296+ license_id = spdx_id ,
297+ # always set some text, even if we did not extract the
298+ # matched text
299+ extracted_text = text if text else comment ,
300+ license_name = file_license .short_name ,
301+ comment = comment ,
302+ )
303+ doc .extracted_licensing_info .append (extracted_license )
333304
334305 # Add licenses in the order they appear in the file. Maintaining
335306 # the order might be useful for provenance purposes.
336- file_entry .add_lics (spdx_license )
337- package .add_lics_from_file (spdx_license )
307+ file_entry .license_info_in_file . append (spdx_license )
308+ package .license_info_from_files . append (spdx_license )
338309
339310 elif license_matches is None :
340311 all_files_have_no_license = False
341- file_entry .add_lics ( NoAssert ())
312+ file_entry .license_info_in_file . append ( SpdxNoAssertion ())
342313
343314 else :
344- file_entry .add_lics ( SPDXNone ())
315+ file_entry .license_info_in_file . append ( SpdxNone ())
345316
346- file_entry .conc_lics = NoAssert ()
317+ file_entry .license_concluded = SpdxNoAssertion ()
347318
348319 file_copyrights = file_data .get ('copyrights' )
349320 if file_copyrights :
350321 all_files_have_no_copyright = False
351- file_entry . copyright = []
322+ copyrights = []
352323 for file_copyright in file_copyrights :
353- file_entry . copyright .append (file_copyright .get ('copyright' ))
324+ copyrights .append (file_copyright .get ('copyright' ))
354325
355- package . cr_text . update (file_entry . copyright )
326+ package_copyright_texts . update (copyrights )
356327
357328 # Create a text of copyright statements in the order they appear in
358329 # the file. Maintaining the order might be useful for provenance
359330 # purposes.
360- file_entry .copyright = '\n ' .join (file_entry . copyright ) + '\n '
331+ file_entry .copyright_text = '\n ' .join (copyrights ) + '\n '
361332
362333 elif file_copyrights is None :
363334 all_files_have_no_copyright = False
364- file_entry .copyright = NoAssert ()
335+ file_entry .copyright_text = SpdxNoAssertion ()
365336
366337 else :
367- file_entry .copyright = SPDXNone ()
338+ file_entry .copyright_text = SpdxNone ()
368339
369- doc .add_file (file_entry )
370- relationship = Relationship (f' { package .spdx_id } CONTAINS { file_entry .spdx_id } ' )
371- doc .add_relationship (relationship )
340+ doc .files . append (file_entry )
341+ relationship = Relationship (package .spdx_id , RelationshipType . CONTAINS , file_entry .spdx_id )
342+ doc .relationships . append (relationship )
372343
373344 if not doc .files :
374345 if as_tagvalue :
@@ -379,33 +350,29 @@ def write_spdx(
379350 output_file .write (msg )
380351
381352 # Remove duplicate licenses from the list for the package.
382- unique_licenses = {l .identifier : l for l in package .licenses_from_files }
383- unique_licenses = list (unique_licenses .values ())
384- if not len (package .licenses_from_files ):
353+ package .license_info_from_files = list (set (package .license_info_from_files ))
354+ if not package .license_info_from_files :
385355 if all_files_have_no_license :
386- package .licenses_from_files = [SPDXNone ()]
356+ package .license_info_from_files = [SpdxNone ()]
387357 else :
388- package .licenses_from_files = [NoAssert ()]
358+ package .license_info_from_files = [SpdxNoAssertion ()]
389359 else :
390360 # List license identifiers alphabetically for the package.
391- package .licenses_from_files = sorted (
392- unique_licenses ,
393- key = lambda x : x .identifier ,
394- )
361+ package .license_info_from_files = sorted (package .license_info_from_files )
395362
396- if len ( package . cr_text ) == 0 :
363+ if not package_copyright_texts :
397364 if all_files_have_no_copyright :
398- package .cr_text = SPDXNone ()
365+ package .copyright_text = SpdxNone ()
399366 else :
400- package .cr_text = NoAssert ()
367+ package .copyright_text = SpdxNoAssertion ()
401368 else :
402369 # Create a text of alphabetically sorted copyright
403370 # statements for the package.
404- package .cr_text = '\n ' .join (sorted (package . cr_text )) + '\n '
371+ package .copyright_text = '\n ' .join (sorted (package_copyright_texts )) + '\n '
405372
406- package .verif_code = calc_verif_code (doc .files )
407- package .license_declared = NoAssert ()
408- package .conc_lics = NoAssert ()
373+ package .verification_code = calculate_package_verification_code (doc .files )
374+ package .license_declared = SpdxNoAssertion ()
375+ package .license_concluded = SpdxNoAssertion ()
409376
410377 # The spdx-tools write_document returns either:
411378 # - unicode for tag values
@@ -416,19 +383,15 @@ def write_spdx(
416383 # in the other case we deal with text all the way.
417384
418385 if doc .files :
419-
420- if as_tagvalue :
421- from spdx .writers .tagvalue import write_document # NOQA
422- elif as_rdf :
423- from spdx .writers .rdf import write_document # NOQA
424-
425386 if as_tagvalue :
387+ from spdx_tools .spdx .writer .tagvalue .tagvalue_writer import write_document_to_stream # NOQA
426388 spdx_output = StringIO ()
427389 elif as_rdf :
390+ from spdx_tools .spdx .writer .rdf .rdf_writer import write_document_to_stream # NOQA
428391 # rdf is utf-encoded bytes
429392 spdx_output = BytesIO ()
430393
431- write_document (doc , spdx_output , validate = False )
394+ write_document_to_stream (doc , spdx_output , validate = False )
432395 result = spdx_output .getvalue ()
433396
434397 if as_rdf :
0 commit comments