Skip to content

Commit 17faac3

Browse files
authored
Merge pull request #276 from adafruit/file-compare-script
Added script to compare files across adafruit repos
2 parents 33d32df + b7f82aa commit 17faac3

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

tools/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,10 @@ Must be run from top-level directory (i.e. one up from this one).
5858
Run with:
5959
`python3 runner.py`
6060
and then type in the number you want to run.
61+
62+
63+
#### file_compare.py
64+
65+
Functionality to compare a file across all Adafruit CircuitPython repos
66+
and output the text of the files along with which and how many repos use that
67+
exact file text.

tools/file_compare.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# SPDX-FileCopyrightText: 2022 Eva Herrada
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
"""
6+
7+
file-compare.py
8+
===============
9+
10+
Functionality to compare a file across all Adafruit CircuitPython repos
11+
and output the text of the files along with which and how many repos use that
12+
exact file text.
13+
14+
* Author(s): Eva Herrada
15+
16+
"""
17+
import argparse
18+
from typing import Optional
19+
20+
import requests
21+
from requests.structures import CaseInsensitiveDict
22+
23+
from adabot.lib.common_funcs import list_repos
24+
25+
26+
def compare(git_file: str, token: Optional[str] = None) -> list:
27+
"""Uses requests to compare files across the adafruit org
28+
29+
.. note::
30+
31+
The GitHub API token is not necessary as long as all repos
32+
being accessed are public. However: it does make things easier
33+
as you won't get rate-limited quite as often
34+
35+
:param str git_file: The file to compare
36+
:param str|None token: The (optional but recommended) github API token
37+
:return: A list containing all the unique file texts, sorted from most to
38+
least common along with the repos that have that exact file text.
39+
:rtype: list
40+
"""
41+
42+
files = {}
43+
44+
all_repos = list_repos()
45+
print("Got Repos List")
46+
print(f"Repos found: {len(all_repos)}")
47+
48+
for repo in all_repos:
49+
name = repo["name"]
50+
url = f"https://raw.githubusercontent.com/adafruit/{name}/main/{git_file}"
51+
52+
if token:
53+
# If repo is private - we need to add a token in header:
54+
headers = CaseInsensitiveDict()
55+
headers["Authorization"] = f"token {token}"
56+
57+
resp = requests.get(url, headers=headers)
58+
else:
59+
resp = requests.get(url)
60+
61+
if resp.status_code != 200:
62+
print(name)
63+
print(resp.status_code)
64+
if resp.text not in files:
65+
files[resp.text] = [1, [repo["html_url"]]]
66+
else:
67+
files[resp.text][0] = files[resp.text][0] + 1
68+
files[resp.text][1].append(repo["html_url"])
69+
70+
top = 0
71+
sort = []
72+
for text, repos in files.items():
73+
if repos[0] >= top:
74+
sort.insert(0, [repos[0], text, repos[1]])
75+
top = repos[0]
76+
else:
77+
for i, val in enumerate(sort):
78+
if val[0] <= repos[0]:
79+
sort.insert(i, [repos[0], text, repos[1]])
80+
break
81+
82+
return sort
83+
84+
85+
if __name__ == "__main__":
86+
87+
parser = argparse.ArgumentParser(
88+
description="Compare files across the adafruit CircuitPython repos",
89+
)
90+
parser.add_argument(
91+
"gh_token",
92+
metavar="GH_TOKEN",
93+
type=str,
94+
help="GitHub token with proper scopes",
95+
)
96+
97+
parser.add_argument(
98+
"--file",
99+
metavar="<FILE>",
100+
type=str,
101+
dest="file",
102+
required=True,
103+
help="File to compare",
104+
)
105+
106+
parser.add_argument(
107+
"-o",
108+
metavar="<OUTFILE>",
109+
type=str,
110+
dest="outfile",
111+
default=None,
112+
help="File to send output to",
113+
)
114+
115+
args = parser.parse_args()
116+
117+
results = compare(args.file, args.gh_token)
118+
119+
for index, value in enumerate(results):
120+
print(f"##### {index+1}/{len(results)} #####")
121+
print(value[0])
122+
print("START OF FILE")
123+
print(value[1])
124+
print("END OF FILE")
125+
print(value[2])
126+
print()
127+
if args.outfile:
128+
with open(args.outfile, "w") as F:
129+
for index, value in enumerate(results):
130+
F.write(f"##### {index+1}/{len(results)} #####\n")
131+
F.write(f"{value[0]}\n")
132+
F.write("START OF FILE\n")
133+
F.write(f"{value[1]}\n")
134+
F.write("END OF FILE\n")
135+
for r in value[2]:
136+
F.write(r + "\n")
137+
F.write("\n")

0 commit comments

Comments
 (0)