Skip to content

Commit df6ad28

Browse files
Use a regex pattern to check for missing licenses
Co-Authored-By: Catherine <[email protected]>
1 parent 3c30cf2 commit df6ad28

File tree

1 file changed

+58
-35
lines changed

1 file changed

+58
-35
lines changed

make_wheels.py

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
import logging
33
import io
44
import os
5+
import re
56
import json
67
import hashlib
78
import tarfile
9+
from warnings import warn
810
import urllib.request
9-
from pathlib import Path
11+
from pathlib import Path, PurePath
1012
from email.message import EmailMessage
1113
from wheel.wheelfile import WheelFile
1214
from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED
@@ -70,19 +72,12 @@ def write_wheel_file(filename, contents):
7072
def write_wheel(out_dir, *, name, version, tag, metadata, description, contents):
7173
wheel_name = f'{name}-{version}-{tag}.whl'
7274
dist_info = f'{name}-{version}.dist-info'
73-
license_files = {}
7475
filtered_metadata = []
7576
for header, value in metadata:
76-
if header == 'License-File':
77-
license_dest = f'{dist_info}/licenses/{value}'
78-
if value in contents:
79-
license_files[license_dest] = contents[value]
80-
filtered_metadata.append((header, value))
8177
filtered_metadata.append((header, value))
8278

8379
return write_wheel_file(os.path.join(out_dir, wheel_name), {
8480
**contents,
85-
**license_files,
8681
f'{dist_info}/METADATA': make_message([
8782
('Metadata-Version', '2.4'),
8883
('Name', name),
@@ -119,12 +114,35 @@ def write_ziglang_wheel(out_dir, *, version, platform, archive):
119114
contents['ziglang/__init__.py'] = b''
120115

121116
license_files = {}
117+
found_license_files = set()
118+
potential_extra_licenses = set()
119+
120+
# A bunch of standard license file patterns. If a file matches any of
121+
# these, we need to add them to required_license_paths and metadata.
122+
license_patterns = [
123+
r'COPYING.*',
124+
r'COPYRIGHT.*',
125+
r'COPYLEFT.*',
126+
r'LICEN[CS]E.*',
127+
r'LICEN[CS]E-.*',
128+
r'LICEN[CS]E\..*',
129+
r'PATENTS.*',
130+
r'NOTICE.*',
131+
r'LEGAL.*',
132+
r'AUTHORS.*',
133+
r'RIGHT*',
134+
r'PERMISSION*',
135+
r'THIRD[-_]PARTY[-_]LICENSES?.*',
136+
r'EULA*',
137+
r'MIT*',
138+
r'GPL*',
139+
r'AGPL*',
140+
r'LGPL*',
141+
r'APACHE*',
142+
]
143+
license_regex = re.compile('|'.join(f'^{pattern}$' for pattern in license_patterns), re.IGNORECASE)
122144

123-
# The paths to these licenses MUST match both the actual files
124-
# in the Zig source tarballs and the License-File entries listed
125-
# below in the metadata. These are not prefixed with "ziglang/"
126-
# since these are the actual paths in the Zig source tarballs.
127-
license_paths = [
145+
required_license_paths = [
128146
'LICENSE',
129147
'lib/libc/glibc/LICENSES',
130148
'lib/libc/mingw/COPYING',
@@ -133,42 +151,33 @@ def write_ziglang_wheel(out_dir, *, version, platform, archive):
133151
'lib/libc/wasi/LICENSE-APACHE',
134152
'lib/libc/wasi/LICENSE-APACHE-LLVM',
135153
'lib/libc/wasi/LICENSE-MIT',
154+
'lib/libc/wasi/libc-bottom-half/cloudlibc/LICENSE',
155+
'lib/libc/wasi/libc-top-half/musl/COPYRIGHT',
136156
'lib/libcxx/LICENSE.TXT',
137157
'lib/libcxxabi/LICENSE.TXT',
138-
'lib/libunwind/LICENSE.TXT'
158+
'lib/libunwind/LICENSE.TXT',
139159
]
140160

141-
found_license_files = set()
142-
143161
for entry_name, entry_mode, entry_data in iter_archive_contents(archive):
144162
entry_name = '/'.join(entry_name.split('/')[1:])
145163
if not entry_name:
146164
continue
147165
if entry_name.startswith('doc/'):
148166
continue
149167

150-
# The license files and their paths MUST remain in sync with
151-
# the paths in the official Zig tarballs and with the ones
152-
# defined below in the metadata.
153-
if any(entry_name == license_path for license_path in [
154-
'LICENSE',
155-
'lib/libc/glibc/LICENSES',
156-
'lib/libc/mingw/COPYING',
157-
'lib/libc/musl/COPYRIGHT',
158-
'lib/libc/wasi/LICENSE',
159-
'lib/libc/wasi/LICENSE-APACHE',
160-
'lib/libc/wasi/LICENSE-APACHE-LLVM',
161-
'lib/libc/wasi/LICENSE-MIT',
162-
'lib/libcxx/LICENSE.TXT',
163-
'lib/libcxxabi/LICENSE.TXT',
164-
'lib/libunwind/LICENSE.TXT'
165-
]):
166-
license_contents[entry_name] = entry_data
168+
# Check for additional license-like files
169+
potential_license_filename = PurePath(entry_name).name
170+
if license_regex.match(potential_license_filename):
171+
potential_extra_licenses.add(entry_name)
167172

168173
zip_info = ZipInfo(f'ziglang/{entry_name}')
169174
zip_info.external_attr = (entry_mode & 0xFFFF) << 16
170175
contents[zip_info] = entry_data
171176

177+
if entry_name in required_license_paths:
178+
license_files[entry_name] = entry_data
179+
found_license_files.add(entry_name)
180+
172181
if entry_name.startswith('zig'):
173182
contents['ziglang/__main__.py'] = f'''\
174183
import os, sys
@@ -179,12 +188,24 @@ def write_ziglang_wheel(out_dir, *, version, platform, archive):
179188
import subprocess; sys.exit(subprocess.call(argv))
180189
'''.encode('ascii')
181190

191+
# 1. Check for missing required licenses paths
192+
missing_licenses = set(required_license_paths) - found_license_files
193+
if missing_licenses:
194+
print(f"\033[93mWarning: the following required license files were not found in the Zig archive: {', '.join(sorted(missing_licenses))} "
195+
"\nThis may indicate a change in Zig's license file structure or an error in the listing of license files and/or paths.\033[0m")
196+
197+
# 2. Check for potentially missing license files
198+
extra_licenses = potential_extra_licenses - set(required_license_paths)
199+
if extra_licenses:
200+
print(f"\033[93mWarning: found additional potential license files in the Zig archive but not included in the metadata: {', '.join(sorted(extra_licenses))} "
201+
"\nPlease consider adding these to the license paths if they should be included.\033[0m")
202+
182203
with open('README.pypi.md') as f:
183204
description = f.read()
184205

185206
dist_info = f'ziglang-{version}.dist-info'
186207
for license_path, license_data in license_files.items():
187-
contents[f"{dist_info}/licenses/ziglang/{license_path}"] = license_data
208+
contents[f'{dist_info}/licenses/ziglang/{license_path}'] = license_data
188209

189210
return write_wheel(out_dir,
190211
name='ziglang',
@@ -199,14 +220,16 @@ def write_ziglang_wheel(out_dir, *, version, platform, archive):
199220
# are prefixed with "ziglang/" to match the paths in the wheel
200221
# for metadata compliance.
201222
('License-Expression', 'MIT'),
202-
('License-File', 'LICENSE'),
223+
('License-File', 'ziglang/LICENSE'),
203224
('License-File', 'ziglang/lib/libc/glibc/LICENSES'),
204225
('License-File', 'ziglang/lib/libc/mingw/COPYING'),
205226
('License-File', 'ziglang/lib/libc/musl/COPYRIGHT'),
206227
('License-File', 'ziglang/lib/libc/wasi/LICENSE'),
207228
('License-File', 'ziglang/lib/libc/wasi/LICENSE-APACHE'),
208229
('License-File', 'ziglang/lib/libc/wasi/LICENSE-APACHE-LLVM'),
209230
('License-File', 'ziglang/lib/libc/wasi/LICENSE-MIT'),
231+
('License-File', 'ziglang/lib/libc/wasi/libc-bottom-half/cloudlibc/LICENSE'),
232+
('License-File', 'ziglang/lib/libc/wasi/libc-top-half/musl/COPYRIGHT'),
210233
('License-File', 'ziglang/lib/libcxx/LICENSE.TXT'),
211234
('License-File', 'ziglang/lib/libcxxabi/LICENSE.TXT'),
212235
('License-File', 'ziglang/lib/libunwind/LICENSE.TXT'),

0 commit comments

Comments
 (0)