Skip to content

Commit 56cedc0

Browse files
Damian EDmitriyLewen
Damian E
andauthored
fix(license): reorder logic of how python package licenses are acquired (aquasecurity#6220)
Co-authored-by: DmitriyLewen <[email protected]>
1 parent d7d7265 commit 56cedc0

File tree

7 files changed

+208
-26
lines changed

7 files changed

+208
-26
lines changed

pkg/dependency/parser/python/packaging/parse.go

+25-8
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,39 @@ func (*Parser) Parse(r xio.ReadSeekerAt) ([]types.Library, []types.Dependency, e
4040
return nil, nil, xerrors.New("name or version is empty")
4141
}
4242

43-
// "License-Expression" takes precedence as "License" is deprecated.
44-
// cf. https://peps.python.org/pep-0639/#deprecate-license-field
43+
// "License-Expression" takes precedence in accordance with https://peps.python.org/pep-0639/#deprecate-license-field
44+
// Although keep in mind that pep-0639 is still in draft.
4545
var license string
46-
if l := h.Get("License-Expression"); l != "" {
47-
license = l
48-
} else if l := h.Get("License"); l != "" {
49-
license = l
46+
if le := h.Get("License-Expression"); le != "" {
47+
license = le
5048
} else {
49+
// Get possible multiple occurrences of licenses from "Classifier: License" field
50+
// When present it should define the license whereas "License" would define any additional exceptions or modifications
51+
// ref. https://packaging.python.org/en/latest/specifications/core-metadata/#license
52+
var licenses []string
5153
for _, classifier := range h.Values("Classifier") {
5254
if strings.HasPrefix(classifier, "License :: ") {
5355
values := strings.Split(classifier, " :: ")
54-
license = values[len(values)-1]
55-
break
56+
licenseName := values[len(values)-1]
57+
// According to the classifier list https://pypi.org/classifiers/ there is one classifier which seems more like a grouping
58+
// It has no specific license definition (Classifier: License :: OSI Approved) - it is skipped
59+
if licenseName != "OSI Approved" {
60+
licenses = append(licenses, licenseName)
61+
}
5662
}
5763
}
64+
license = strings.Join(licenses, ", ")
65+
66+
if l := h.Get("License"); l != "" {
67+
if len(licenses) != 0 {
68+
log.Logger.Infof("License acquired from METADATA classifiers may be subject to additional terms for [%s:%s]", name, version)
69+
} else {
70+
license = l
71+
}
72+
}
73+
5874
}
75+
5976
if license == "" && h.Get("License-File") != "" {
6077
license = "file://" + h.Get("License-File")
6178
}

pkg/dependency/parser/python/packaging/parse_test.go

+17-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,23 @@ func TestParse(t *testing.T) {
7676
// for single METADATA file with known name
7777
// cat "{{ libname }}.METADATA | grep -e "^Name:" -e "^Version:" -e "^License:" | cut -d" " -f2- | tr "\n" "\t" | awk -F "\t" '{printf("\{\""$1"\", \""$2"\", \""$3"\"\}\n")}'
7878
input: "testdata/distlib-0.3.1.METADATA",
79-
want: []types.Library{{Name: "distlib", Version: "0.3.1", License: "Python license"}},
79+
want: []types.Library{{Name: "distlib", Version: "0.3.1", License: "Python Software Foundation License"}},
80+
},
81+
{
82+
name: "wheel METADATA",
83+
// Input defines "Classifier: License" but it ends at "OSI Approved" which doesn't define any specific license, thus "License" field is added to results
84+
input: "testdata/asyncssh-2.14.2.METADATA",
85+
86+
want: []types.Library{{Name: "asyncssh", Version: "2.14.2", License: "Eclipse Public License v2.0"}},
87+
},
88+
{
89+
name: "wheel METADATA",
90+
// Input defines multiple "Classifier: License"
91+
input: "testdata/pyphen-0.14.0.METADATA",
92+
93+
want: []types.Library{
94+
{Name: "pyphen", Version: "0.14.0", License: "GNU General Public License v2 or later (GPLv2+), GNU Lesser General Public License v2 or later (LGPLv2+), Mozilla Public License 1.1 (MPL 1.1)"},
95+
},
8096
},
8197
{
8298
name: "invalid",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
Metadata-Version: 2.1
2+
Name: asyncssh
3+
Version: 2.14.2
4+
Summary: AsyncSSH: Asynchronous SSHv2 client and server library
5+
Home-page: http://asyncssh.timeheart.net
6+
Author: Ron Frederick
7+
Author-email: [email protected]
8+
License: Eclipse Public License v2.0
9+
Project-URL: Documentation, https://asyncssh.readthedocs.io
10+
Project-URL: Source, https://github.com/ronf/asyncssh
11+
Project-URL: Tracker, https://github.com/ronf/asyncssh/issues
12+
Platform: Any
13+
Classifier: Development Status :: 5 - Production/Stable
14+
Classifier: Environment :: Console
15+
Classifier: Intended Audience :: Developers
16+
Classifier: License :: OSI Approved
17+
Classifier: Operating System :: MacOS :: MacOS X
18+
Classifier: Operating System :: POSIX
19+
Classifier: Programming Language :: Python :: 3.7
20+
Classifier: Programming Language :: Python :: 3.8
21+
Classifier: Programming Language :: Python :: 3.9
22+
Classifier: Programming Language :: Python :: 3.10
23+
Classifier: Programming Language :: Python :: 3.11
24+
Classifier: Programming Language :: Python :: 3.12
25+
Classifier: Topic :: Internet
26+
Classifier: Topic :: Security :: Cryptography
27+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
28+
Classifier: Topic :: System :: Networking
29+
Requires-Python: >= 3.6
30+
License-File: LICENSE
31+
Requires-Dist: cryptography (>=39.0)
32+
Requires-Dist: typing-extensions (>=3.6)
33+
Provides-Extra: bcrypt
34+
Requires-Dist: bcrypt (>=3.1.3) ; extra == 'bcrypt'
35+
Provides-Extra: fido2
36+
Requires-Dist: fido2 (>=0.9.2) ; extra == 'fido2'
37+
Provides-Extra: gssapi
38+
Requires-Dist: gssapi (>=1.2.0) ; extra == 'gssapi'
39+
Provides-Extra: libnacl
40+
Requires-Dist: libnacl (>=1.4.2) ; extra == 'libnacl'
41+
Provides-Extra: pkcs11
42+
Requires-Dist: python-pkcs11 (>=0.7.0) ; extra == 'pkcs11'
43+
Provides-Extra: pyopenssl
44+
Requires-Dist: pyOpenSSL (>=23.0.0) ; extra == 'pyopenssl'
45+
Provides-Extra: pywin32
46+
Requires-Dist: pywin32 (>=227) ; extra == 'pywin32'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
Metadata-Version: 2.1
2+
Name: pyphen
3+
Version: 0.14.0
4+
Summary: Pure Python module to hyphenate text
5+
Keywords: hyphenation
6+
Author-email: Guillaume Ayoub <[email protected]>
7+
Maintainer-email: CourtBouillon <[email protected]>
8+
Requires-Python: >=3.7
9+
Description-Content-Type: text/x-rst
10+
Classifier: Development Status :: 4 - Beta
11+
Classifier: Intended Audience :: Developers
12+
Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)
13+
Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
14+
Classifier: License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)
15+
Classifier: Programming Language :: Python
16+
Classifier: Programming Language :: Python :: 3
17+
Classifier: Programming Language :: Python :: 3.7
18+
Classifier: Programming Language :: Python :: 3.8
19+
Classifier: Programming Language :: Python :: 3.9
20+
Classifier: Programming Language :: Python :: 3.10
21+
Classifier: Programming Language :: Python :: 3.11
22+
Classifier: Programming Language :: Python :: Implementation :: CPython
23+
Classifier: Programming Language :: Python :: Implementation :: PyPy
24+
Classifier: Topic :: Text Processing
25+
Classifier: Topic :: Text Processing :: Linguistic
26+
Requires-Dist: sphinx ; extra == "doc"
27+
Requires-Dist: sphinx_rtd_theme ; extra == "doc"
28+
Requires-Dist: pytest ; extra == "test"
29+
Requires-Dist: isort ; extra == "test"
30+
Requires-Dist: flake8 ; extra == "test"
31+
Project-URL: Changelog, https://github.com/Kozea/Pyphen/releases
32+
Project-URL: Code, https://github.com/Kozea/Pyphen
33+
Project-URL: Documentation, https://pyphen.org/
34+
Project-URL: Donation, https://opencollective.com/courtbouillon
35+
Project-URL: Homepage, https://www.courtbouillon.org/pyphen
36+
Project-URL: Issues, https://github.com/Kozea/Pyphen/issues
37+
Provides-Extra: doc
38+
Provides-Extra: test

pkg/fanal/analyzer/language/python/packaging/packaging_test.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ func Test_packagingAnalyzer_Analyze(t *testing.T) {
3030
FilePath: "kitchen-1.2.6-py2.7.egg",
3131
Libraries: types.Packages{
3232
{
33-
Name: "kitchen",
34-
Version: "1.2.6",
35-
Licenses: []string{"LGPLv2+"},
33+
Name: "kitchen",
34+
Version: "1.2.6",
35+
Licenses: []string{
36+
"GNU Library or Lesser General Public License (LGPL)",
37+
},
3638
FilePath: "kitchen-1.2.6-py2.7.egg",
3739
},
3840
},

pkg/licensing/normalize.go

+26-4
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ var mapping = map[string]string{
8181
"PUBLIC DOMAIN": Unlicense,
8282
}
8383

84+
// pythonLicenseExceptions contains licenses that we cannot separate correctly using our logic.
85+
// first word after separator (or/and) => license name
86+
var pythonLicenseExceptions = map[string]string{
87+
"lesser": "GNU Library or Lesser General Public License (LGPL)",
88+
"distribution": "Common Development and Distribution License 1.0 (CDDL-1.0)",
89+
"disclaimer": "Historical Permission Notice and Disclaimer (HPND)",
90+
}
91+
8492
// Split licenses without considering "and"/"or"
8593
// examples:
8694
// 'GPL-1+,GPL-2' => {"GPL-1+", "GPL-2"}
@@ -104,11 +112,25 @@ func SplitLicenses(str string) []string {
104112
var licenses []string
105113
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
106114
lower := strings.ToLower(maybeLic)
107-
if (strings.HasPrefix(lower, "ver ") || strings.HasPrefix(lower, "version ")) && len(licenses) > 0 {
108-
licenses[len(licenses)-1] += ", " + maybeLic
109-
} else {
110-
licenses = append(licenses, maybeLic)
115+
firstWord, _, _ := strings.Cut(lower, " ")
116+
if len(licenses) > 0 {
117+
// e.g. `Apache License, Version 2.0`
118+
if firstWord == "ver" || firstWord == "version" {
119+
licenses[len(licenses)-1] += ", " + maybeLic
120+
continue
121+
// e.g. `GNU Lesser General Public License v2 or later (LGPLv2+)`
122+
} else if firstWord == "later" {
123+
licenses[len(licenses)-1] += " or " + maybeLic
124+
continue
125+
} else if lic, ok := pythonLicenseExceptions[firstWord]; ok {
126+
// Check `or` and `and` separators
127+
if lic == licenses[len(licenses)-1]+" or "+maybeLic || lic == licenses[len(licenses)-1]+" and "+maybeLic {
128+
licenses[len(licenses)-1] = lic
129+
}
130+
continue
131+
}
111132
}
133+
licenses = append(licenses, maybeLic)
112134
}
113135
return licenses
114136
}

pkg/licensing/normalize_test.go

+51-10
Original file line numberDiff line numberDiff line change
@@ -17,44 +17,85 @@ func TestSplitLicenses(t *testing.T) {
1717
{
1818
"simple list comma-separated",
1919
"GPL-1+,GPL-2",
20-
[]string{"GPL-1+", "GPL-2"},
20+
[]string{
21+
"GPL-1+",
22+
"GPL-2",
23+
},
2124
},
2225
{
2326
"simple list comma-separated",
2427
"GPL-1+,GPL-2,GPL-3",
25-
[]string{"GPL-1+", "GPL-2", "GPL-3"},
28+
[]string{
29+
"GPL-1+",
30+
"GPL-2",
31+
"GPL-3",
32+
},
2633
},
2734
{
2835
"3 licenses 'or'-separated",
2936
"GPL-1+ or Artistic or Artistic-dist",
30-
[]string{"GPL-1+", "Artistic", "Artistic-dist"},
37+
[]string{
38+
"GPL-1+",
39+
"Artistic",
40+
"Artistic-dist",
41+
},
3142
},
32-
// '
3343
{
3444
"two licenses _or_ separated",
3545
"LGPLv3+_or_GPLv2+",
36-
[]string{"LGPLv3+", "GPLv2+"},
46+
[]string{
47+
"LGPLv3+",
48+
"GPLv2+",
49+
},
3750
},
38-
// '
3951
{
4052
"licenses `and`-separated",
4153
"BSD-3-CLAUSE and GPL-2",
42-
[]string{"BSD-3-CLAUSE", "GPL-2"},
54+
[]string{
55+
"BSD-3-CLAUSE",
56+
"GPL-2",
57+
},
4358
},
4459
{
4560
"three licenses and/or separated",
4661
"GPL-1+ or Artistic, and BSD-4-clause-POWERDOG",
47-
[]string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"},
62+
[]string{
63+
"GPL-1+",
64+
"Artistic",
65+
"BSD-4-clause-POWERDOG",
66+
},
4867
},
4968
{
5069
"two licenses with version",
5170
"Apache License,Version 2.0, OSET Public License version 2.1",
52-
[]string{"Apache License, Version 2.0", "OSET Public License version 2.1"},
71+
[]string{
72+
"Apache License, Version 2.0",
73+
"OSET Public License version 2.1",
74+
},
5375
},
5476
{
5577
"the license starts with `ver`",
5678
"verbatim and BSD-4-clause",
57-
[]string{"verbatim", "BSD-4-clause"},
79+
[]string{
80+
"verbatim",
81+
"BSD-4-clause",
82+
},
83+
},
84+
{
85+
"the license with `or later`",
86+
"GNU Affero General Public License v3 or later (AGPLv3+)",
87+
[]string{
88+
"GNU Affero General Public License v3 or later (AGPLv3+)",
89+
},
90+
},
91+
{
92+
"Python license exceptions",
93+
"GNU Library or Lesser General Public License (LGPL), Common Development and Distribution License 1.0 (CDDL-1.0), Historical Permission Notice and Disclaimer (HPND)",
94+
[]string{
95+
"GNU Library or Lesser General Public License (LGPL)",
96+
"Common Development and Distribution License 1.0 (CDDL-1.0)",
97+
"Historical Permission Notice and Disclaimer (HPND)",
98+
},
5899
},
59100
}
60101

0 commit comments

Comments
 (0)