Skip to content

Commit f2ae506

Browse files
committed
Add download script.
1 parent bce92d5 commit f2ae506

File tree

1 file changed

+146
-0
lines changed

1 file changed

+146
-0
lines changed

download_artefacts.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#!/usr/bin/python3
2+
3+
import itertools
4+
import json
5+
import logging
6+
import re
7+
import shutil
8+
import datetime
9+
10+
from concurrent.futures import ProcessPoolExecutor as Pool, as_completed
11+
from pathlib import Path
12+
from urllib.request import urlopen
13+
from urllib.parse import urljoin
14+
15+
logger = logging.getLogger()
16+
17+
PARALLEL_DOWNLOADS = 6
18+
GITHUB_PACKAGE_URL = "https://github.com/scoder/fastrlock"
19+
APPVEYOR_PACKAGE_URL = "https://ci.appveyor.com/api/projects/scoder/fastrlock"
20+
APPVEYOR_BUILDJOBS_URL = "https://ci.appveyor.com/api/buildjobs"
21+
22+
23+
def find_github_files(version, base_package_url=GITHUB_PACKAGE_URL):
24+
url = f"{base_package_url}/releases/tag/{version}"
25+
with urlopen(url) as p:
26+
page = p.read().decode()
27+
28+
for wheel_url, _ in itertools.groupby(sorted(re.findall(r'href="([^"]+\.whl)"', page))):
29+
yield urljoin(base_package_url, wheel_url)
30+
31+
32+
def find_appveyor_files(version, base_package_url=APPVEYOR_PACKAGE_URL, base_job_url=APPVEYOR_BUILDJOBS_URL):
33+
url = f"{base_package_url}/history?recordsNumber=20"
34+
with urlopen(url) as p:
35+
builds = json.load(p)["builds"]
36+
37+
tag = f"{version}"
38+
for build in builds:
39+
if build['isTag'] and build['tag'] == tag:
40+
build_id = build['buildId']
41+
break
42+
else:
43+
logger.warning(f"No appveyor build found for tag '{tag}'")
44+
return
45+
46+
build_url = f"{base_package_url}/builds/{build_id}"
47+
with urlopen(build_url) as p:
48+
jobs = json.load(p)["build"]["jobs"]
49+
50+
for job in jobs:
51+
artifacts_url = f"{base_job_url}/{job['jobId']}/artifacts/"
52+
53+
with urlopen(artifacts_url) as p:
54+
for artifact in json.load(p):
55+
yield urljoin(artifacts_url, artifact['fileName'])
56+
57+
58+
def download1(wheel_url, dest_dir):
59+
wheel_name = wheel_url.rsplit("/", 1)[1]
60+
logger.info(f"Downloading {wheel_url} ...")
61+
with urlopen(wheel_url) as w:
62+
file_path = dest_dir / wheel_name
63+
if (file_path.exists()
64+
and "Content-Length" in w.headers
65+
and file_path.stat().st_size == int(w.headers["Content-Length"])):
66+
logger.info(f"Already have {wheel_name}")
67+
else:
68+
temp_file_path = file_path.with_suffix(".tmp")
69+
try:
70+
with open(temp_file_path, "wb") as f:
71+
shutil.copyfileobj(w, f)
72+
except:
73+
if temp_file_path.exists():
74+
temp_file_path.unlink()
75+
raise
76+
else:
77+
temp_file_path.replace(file_path)
78+
logger.info(f"Finished downloading {wheel_name}")
79+
return wheel_name
80+
81+
82+
def download(urls, dest_dir, jobs=PARALLEL_DOWNLOADS):
83+
with Pool(max_workers=jobs) as pool:
84+
futures = [pool.submit(download1, url, dest_dir) for url in urls]
85+
try:
86+
for future in as_completed(futures):
87+
wheel_name = future.result()
88+
yield wheel_name
89+
except KeyboardInterrupt:
90+
for future in futures:
91+
future.cancel()
92+
raise
93+
94+
95+
def dedup(it):
96+
seen = set()
97+
for value in it:
98+
if value not in seen:
99+
seen.add(value)
100+
yield value
101+
102+
103+
def roundrobin(*iterables):
104+
"roundrobin('ABC', 'D', 'EF') --> A D E B F C"
105+
# Recipe credited to George Sakkis
106+
from itertools import cycle, islice
107+
num_active = len(iterables)
108+
nexts = cycle(iter(it).__next__ for it in iterables)
109+
while num_active:
110+
try:
111+
for next in nexts:
112+
yield next()
113+
except StopIteration:
114+
# Remove the iterator we just exhausted from the cycle.
115+
num_active -= 1
116+
nexts = cycle(islice(nexts, num_active))
117+
118+
119+
def main(*args):
120+
if not args:
121+
print("Please pass the version to download")
122+
return
123+
124+
version = args[0]
125+
dest_dir = Path("dist") / version
126+
if not dest_dir.is_dir():
127+
dest_dir.mkdir()
128+
129+
start_time = datetime.datetime.now().replace(microsecond=0)
130+
urls = roundrobin(*map(dedup, [
131+
find_github_files(version),
132+
find_appveyor_files(version),
133+
]))
134+
count = sum(1 for _ in enumerate(download(urls, dest_dir)))
135+
duration = datetime.datetime.now().replace(microsecond=0) - start_time
136+
logger.info(f"Downloaded {count} files in {duration}.")
137+
138+
139+
if __name__ == "__main__":
140+
import sys
141+
logging.basicConfig(
142+
stream=sys.stderr,
143+
level=logging.INFO,
144+
format="%(asctime)-15s %(message)s",
145+
)
146+
main(*sys.argv[1:])

0 commit comments

Comments
 (0)