Skip to content

Commit d70020c

Browse files
committed
Improve NPM package handling
* do not map the legacy "url" to anything #755 * allow to build a Package from data and not only from a file * do not return full location for the metafile but only filename #782 * ensure only one download_urls is created #779 * improve the mapping of "parties" * other misc code improvement and TODOs * add new tests for #755 and other samples Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 25e3826 commit d70020c

File tree

13 files changed

+5501
-355
lines changed

13 files changed

+5501
-355
lines changed

src/packagedcode/npm.py

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
2+
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
33
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
44
# The ScanCode software is licensed under the Apache License version 2.0.
55
# Data generated with ScanCode require an acknowledgment.
@@ -86,6 +86,20 @@ def parse(location):
8686
if not is_package_json(location):
8787
return
8888

89+
with codecs.open(location, encoding='utf-8') as loc:
90+
package_data = json.load(loc, object_pairs_hook=OrderedDict)
91+
92+
# a package.json is at the root of an NPM package
93+
base_dir = fileutils.parent_directory(location)
94+
metafile_name = fileutils.file_base_name(location)
95+
return build_package(package_data, base_dir, metafile_name)
96+
97+
98+
def build_package(package_data, base_dir=None, metafile_name='package.json'):
99+
"""
100+
Return a Package object from a package_data mapping (from a
101+
package.json or similar) or None.
102+
"""
89103
# mapping of top level package.json items to the Package object field name
90104
plain_fields = OrderedDict([
91105
('name', 'name'),
@@ -101,33 +115,35 @@ def parse(location):
101115
('bugs', bugs_mapper),
102116
('contributors', contributors_mapper),
103117
('maintainers', maintainers_mapper),
118+
# current form
104119
('license', licensing_mapper),
120+
# old, deprecated form
105121
('licenses', licensing_mapper),
106122
('dependencies', dependencies_mapper),
107123
('devDependencies', dev_dependencies_mapper),
108124
('peerDependencies', peer_dependencies_mapper),
109125
('optionalDependencies', optional_dependencies_mapper),
110-
('url', url_mapper),
126+
# legacy, ignored
127+
# ('url', url_mapper),
111128
('dist', dist_mapper),
112129
('repository', repository_mapper),
113130
])
114131

115-
with codecs.open(location, encoding='utf-8') as loc:
116-
data = json.load(loc, object_pairs_hook=OrderedDict)
117132

118-
if not data.get('name') or not data.get('version'):
133+
if not package_data.get('name') or not package_data.get('version'):
119134
# a package.json without name and version is not a usable NPM package
120135
return
121136

122137
package = NpmPackage()
123138
# a package.json is at the root of an NPM package
124-
base_dir = fileutils.parent_directory(location)
125139
package.location = base_dir
126140
# for now we only recognize a package.json, not a node_modules directory yet
127-
package.metafile_locations = [location]
128-
package.version = data.get('version')
141+
if metafile_name:
142+
package.metafile_locations = [metafile_name]
143+
144+
package.version = package_data.get('version') or None
129145
for source, target in plain_fields.items():
130-
value = data.get(source)
146+
value = package_data.get(source) or None
131147
if value:
132148
if isinstance(value, basestring):
133149
value = value.strip()
@@ -136,14 +152,21 @@ def parse(location):
136152

137153
for source, func in field_mappers.items():
138154
logger.debug('parse: %(source)r, %(func)r' % locals())
139-
value = data.get(source)
155+
value = package_data.get(source) or None
140156
if value:
141157
if isinstance(value, basestring):
142158
value = value.strip()
143159
if value:
144160
func(value, package)
145-
# this should be a mapper function but requires two args
146-
package.download_urls.append(public_download_url(package.name, package.version))
161+
162+
# this should be a mapper function but requires two args.
163+
# Note: we only add a synthetic download URL if there is none from
164+
# the dist mapping.
165+
if not package.download_urls:
166+
tarball = public_download_url(package.name, package.version)
167+
if tarball:
168+
package.download_urls.append(tarball)
169+
147170
return package
148171

149172

@@ -152,7 +175,7 @@ def licensing_mapper(licenses, package):
152175
Update package licensing and return package.
153176
Licensing data structure has evolved over time and is a tad messy.
154177
https://docs.npmjs.com/files/package.json#license
155-
licenses is either:
178+
license(s) is either:
156179
- a string with:
157180
- an SPDX id or expression { "license" : "(ISC OR GPL-3.0)" }
158181
- some license name or id
@@ -163,9 +186,13 @@ def licensing_mapper(licenses, package):
163186
return package
164187

165188
if isinstance(licenses, basestring):
189+
# current form
190+
# TODO: handle "SEE LICENSE IN <filename>"
191+
# TODO: parse expression with license_expression library
166192
package.asserted_licenses.append(models.AssertedLicense(license=licenses))
167193

168194
elif isinstance(licenses, dict):
195+
# old, deprecated form
169196
"""
170197
"license": {
171198
"type": "MIT",
@@ -176,6 +203,7 @@ def licensing_mapper(licenses, package):
176203
url=licenses.get('url')))
177204

178205
elif isinstance(licenses, list):
206+
# old, deprecated form
179207
"""
180208
"licenses": ["type": "Apache License, Version 2.0",
181209
"url": "http://www.apache.org/licenses/LICENSE-2.0" } ]
@@ -302,12 +330,19 @@ def repository_mapper(repo, package):
302330

303331
def url_mapper(url, package):
304332
"""
305-
In a package.json, the "url" field is a redirection to a package download
306-
URL published somewhere else than on the public npm registry.
307-
We map it to a download url.
333+
In a package.json, the "url" field is a legacy field that contained
334+
various URLs either as a string or as a mapping of type->url
308335
"""
309-
if url:
310-
package.download_urls.append(url)
336+
if not url:
337+
return package
338+
339+
if isinstance(url, basestring):
340+
# TOOD: map to a miscellaneous urls dict
341+
pass
342+
elif isinstance(url, dict):
343+
# typical key is "web"
344+
# TOOD: map to a miscellaneous urls dict
345+
pass
311346
return package
312347

313348

@@ -395,6 +430,11 @@ def parse_person(person):
395430
Both forms are equivalent.
396431
"""
397432
# TODO: detect if this is a person name or a company name
433+
434+
name = None
435+
email = None
436+
url = None
437+
398438
if isinstance(person, basestring):
399439
parsed = person_parser(person)
400440
if not parsed:
@@ -409,10 +449,28 @@ def parse_person(person):
409449
name = person.get('name')
410450
email = person.get('email')
411451
url = person.get('url')
452+
412453
else:
413454
raise Exception('Incorrect NPM package.json person: %(person)r' % locals())
414455

415-
return name and name.strip(), email and email.strip('<> '), url and url.strip('() ')
456+
if name:
457+
name = name.strip()
458+
if name.lower() == 'none':
459+
name = None
460+
name = name or None
461+
462+
if email:
463+
email = email.strip('<> ')
464+
if email.lower() == 'none':
465+
email = None
466+
email = email or None
467+
468+
if url:
469+
url = url.strip('() ')
470+
if url.lower() == 'none':
471+
url = None
472+
url = url or None
473+
return name, email, url
416474

417475

418476
def public_download_url(name, version, registry='https://registry.npmjs.org'):

0 commit comments

Comments
 (0)