Skip to content

Commit c84e2b7

Browse files
committed
Lintcheck: Refactor structs and only take one version per crate
1 parent dfb9253 commit c84e2b7

File tree

6 files changed

+567
-541
lines changed

6 files changed

+567
-541
lines changed

lintcheck/lintcheck_crates.toml

+26-26
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,38 @@
11
[crates]
22
# some of these are from cargotest
3-
cargo = {name = "cargo", versions = ['0.64.0']}
4-
iron = {name = "iron", versions = ['0.6.1']}
5-
ripgrep = {name = "ripgrep", versions = ['12.1.1']}
6-
xsv = {name = "xsv", versions = ['0.13.0']}
3+
cargo = {name = "cargo", version = '0.64.0'}
4+
iron = {name = "iron", version = '0.6.1'}
5+
ripgrep = {name = "ripgrep", version = '12.1.1'}
6+
xsv = {name = "xsv", version = '0.13.0'}
77
# commented out because of 173K clippy::match_same_arms msgs in language_type.rs
8-
#tokei = { name = "tokei", versions = ['12.0.4']}
9-
rayon = {name = "rayon", versions = ['1.5.0']}
10-
serde = {name = "serde", versions = ['1.0.118']}
8+
#tokei = { name = "tokei", version = '12.0.4'}
9+
rayon = {name = "rayon", version = '1.5.0'}
10+
serde = {name = "serde", version = '1.0.118'}
1111
# top 10 crates.io dls
12-
bitflags = {name = "bitflags", versions = ['1.2.1']}
12+
bitflags = {name = "bitflags", version = '1.2.1'}
1313
# crash = {name = "clippy_crash", path = "/tmp/clippy_crash"}
14-
libc = {name = "libc", versions = ['0.2.81']}
15-
log = {name = "log", versions = ['0.4.11']}
16-
proc-macro2 = {name = "proc-macro2", versions = ['1.0.24']}
17-
quote = {name = "quote", versions = ['1.0.7']}
18-
rand = {name = "rand", versions = ['0.7.3']}
19-
rand_core = {name = "rand_core", versions = ['0.6.0']}
20-
regex = {name = "regex", versions = ['1.3.2']}
21-
syn = {name = "syn", versions = ['1.0.54']}
22-
unicode-xid = {name = "unicode-xid", versions = ['0.2.1']}
14+
libc = {name = "libc", version = '0.2.81'}
15+
log = {name = "log", version = '0.4.11'}
16+
proc-macro2 = {name = "proc-macro2", version = '1.0.24'}
17+
quote = {name = "quote", version = '1.0.7'}
18+
rand = {name = "rand", version = '0.7.3'}
19+
rand_core = {name = "rand_core", version = '0.6.0'}
20+
regex = {name = "regex", version = '1.3.2'}
21+
syn = {name = "syn", version = '1.0.54'}
22+
unicode-xid = {name = "unicode-xid", version = '0.2.1'}
2323
# some more of dtolnays crates
24-
anyhow = {name = "anyhow", versions = ['1.0.38']}
25-
async-trait = {name = "async-trait", versions = ['0.1.42']}
26-
cxx = {name = "cxx", versions = ['1.0.32']}
27-
ryu = {name = "ryu", versions = ['1.0.5']}
28-
serde_yaml = {name = "serde_yaml", versions = ['0.8.17']}
29-
thiserror = {name = "thiserror", versions = ['1.0.24']}
24+
anyhow = {name = "anyhow", version = '1.0.38'}
25+
async-trait = {name = "async-trait", version = '0.1.42'}
26+
cxx = {name = "cxx", version = '1.0.32'}
27+
ryu = {name = "ryu", version = '1.0.5'}
28+
serde_yaml = {name = "serde_yaml", version = '0.8.17'}
29+
thiserror = {name = "thiserror", version = '1.0.24'}
3030
# some embark crates, there are other interesting crates but
3131
# unfortunately adding them increases lintcheck runtime drastically
32-
cfg-expr = {name = "cfg-expr", versions = ['0.7.1']}
32+
cfg-expr = {name = "cfg-expr", version = '0.7.1'}
3333
puffin = {name = "puffin", git_url = "https://github.com/EmbarkStudios/puffin", git_hash = "02dd4a3"}
34-
rpmalloc = {name = "rpmalloc", versions = ['0.2.0']}
35-
tame-oidc = {name = "tame-oidc", versions = ['0.1.0']}
34+
rpmalloc = {name = "rpmalloc", version = '0.2.0'}
35+
tame-oidc = {name = "tame-oidc", version = '0.1.0'}
3636

3737
[recursive]
3838
ignore = [

lintcheck/src/input.rs

+288
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
use std::collections::{HashMap, HashSet};
2+
use std::fs::{self};
3+
use std::io::{self, ErrorKind};
4+
use std::path::{Path, PathBuf};
5+
use std::process::Command;
6+
use std::time::Duration;
7+
8+
use serde::Deserialize;
9+
use walkdir::{DirEntry, WalkDir};
10+
11+
use crate::{Crate, LINTCHECK_DOWNLOADS, LINTCHECK_SOURCES};
12+
13+
/// List of sources to check, loaded from a .toml file
14+
#[derive(Debug, Deserialize)]
15+
pub struct SourceList {
16+
crates: HashMap<String, TomlCrate>,
17+
#[serde(default)]
18+
recursive: RecursiveOptions,
19+
}
20+
21+
#[derive(Debug, Deserialize, Default)]
22+
pub struct RecursiveOptions {
23+
pub ignore: HashSet<String>,
24+
}
25+
26+
/// A crate source stored inside the .toml
27+
/// will be translated into on one of the `CrateSource` variants
28+
#[derive(Debug, Deserialize)]
29+
struct TomlCrate {
30+
pub name: String,
31+
pub version: Option<String>,
32+
pub git_url: Option<String>,
33+
pub git_hash: Option<String>,
34+
pub path: Option<String>,
35+
pub options: Option<Vec<String>>,
36+
}
37+
38+
/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
39+
/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
40+
#[derive(Debug, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
41+
pub enum CrateSource {
42+
CratesIo {
43+
name: String,
44+
version: String,
45+
options: Option<Vec<String>>,
46+
},
47+
Git {
48+
name: String,
49+
url: String,
50+
commit: String,
51+
options: Option<Vec<String>>,
52+
},
53+
Path {
54+
name: String,
55+
path: PathBuf,
56+
options: Option<Vec<String>>,
57+
},
58+
}
59+
60+
/// Read a `lintcheck_crates.toml` file
61+
pub fn read_crates(toml_path: &Path) -> (Vec<CrateSource>, RecursiveOptions) {
62+
let toml_content: String =
63+
fs::read_to_string(toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
64+
let crate_list: SourceList =
65+
toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{e}", toml_path.display()));
66+
// parse the hashmap of the toml file into a list of crates
67+
let tomlcrates: Vec<TomlCrate> = crate_list.crates.into_values().collect();
68+
69+
// flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate =>
70+
// multiple Cratesources)
71+
let mut crate_sources = Vec::new();
72+
for tk in tomlcrates {
73+
if let Some(ref path) = tk.path {
74+
crate_sources.push(CrateSource::Path {
75+
name: tk.name.clone(),
76+
path: PathBuf::from(path),
77+
options: tk.options.clone(),
78+
});
79+
} else if let Some(ref version) = tk.version {
80+
crate_sources.push(CrateSource::CratesIo {
81+
name: tk.name.clone(),
82+
version: version.to_string(),
83+
options: tk.options.clone(),
84+
});
85+
} else if tk.git_url.is_some() && tk.git_hash.is_some() {
86+
// otherwise, we should have a git source
87+
crate_sources.push(CrateSource::Git {
88+
name: tk.name.clone(),
89+
url: tk.git_url.clone().unwrap(),
90+
commit: tk.git_hash.clone().unwrap(),
91+
options: tk.options.clone(),
92+
});
93+
} else {
94+
panic!("Invalid crate source: {tk:?}");
95+
}
96+
97+
// if we have a version as well as a git data OR only one git data, something is funky
98+
if tk.version.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some())
99+
|| tk.git_hash.is_some() != tk.git_url.is_some()
100+
{
101+
eprintln!("tomlkrate: {tk:?}");
102+
assert_eq!(
103+
tk.git_hash.is_some(),
104+
tk.git_url.is_some(),
105+
"Error: Encountered TomlCrate with only one of git_hash and git_url!"
106+
);
107+
assert!(
108+
tk.path.is_none() || (tk.git_hash.is_none() && tk.version.is_none()),
109+
"Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields"
110+
);
111+
unreachable!("Failed to translate TomlCrate into CrateSource!");
112+
}
113+
}
114+
// sort the crates
115+
crate_sources.sort();
116+
117+
(crate_sources, crate_list.recursive)
118+
}
119+
120+
impl CrateSource {
121+
/// Makes the sources available on the disk for clippy to check.
122+
/// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or
123+
/// copies a local folder
124+
#[expect(clippy::too_many_lines)]
125+
pub fn download_and_extract(&self) -> Crate {
126+
#[allow(clippy::result_large_err)]
127+
fn get(path: &str) -> Result<ureq::Response, ureq::Error> {
128+
const MAX_RETRIES: u8 = 4;
129+
let mut retries = 0;
130+
loop {
131+
match ureq::get(path).call() {
132+
Ok(res) => return Ok(res),
133+
Err(e) if retries >= MAX_RETRIES => return Err(e),
134+
Err(ureq::Error::Transport(e)) => eprintln!("Error: {e}"),
135+
Err(e) => return Err(e),
136+
}
137+
eprintln!("retrying in {retries} seconds...");
138+
std::thread::sleep(Duration::from_secs(u64::from(retries)));
139+
retries += 1;
140+
}
141+
}
142+
match self {
143+
CrateSource::CratesIo { name, version, options } => {
144+
let extract_dir = PathBuf::from(LINTCHECK_SOURCES);
145+
let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS);
146+
147+
// url to download the crate from crates.io
148+
let url = format!("https://crates.io/api/v1/crates/{name}/{version}/download");
149+
println!("Downloading and extracting {name} {version} from {url}");
150+
create_dirs(&krate_download_dir, &extract_dir);
151+
152+
let krate_file_path = krate_download_dir.join(format!("{name}-{version}.crate.tar.gz"));
153+
// don't download/extract if we already have done so
154+
if !krate_file_path.is_file() {
155+
// create a file path to download and write the crate data into
156+
let mut krate_dest = fs::File::create(&krate_file_path).unwrap();
157+
let mut krate_req = get(&url).unwrap().into_reader();
158+
// copy the crate into the file
159+
io::copy(&mut krate_req, &mut krate_dest).unwrap();
160+
161+
// unzip the tarball
162+
let ungz_tar = flate2::read::GzDecoder::new(fs::File::open(&krate_file_path).unwrap());
163+
// extract the tar archive
164+
let mut archive = tar::Archive::new(ungz_tar);
165+
archive.unpack(&extract_dir).expect("Failed to extract!");
166+
}
167+
// crate is extracted, return a new Krate object which contains the path to the extracted
168+
// sources that clippy can check
169+
Crate {
170+
version: version.clone(),
171+
name: name.clone(),
172+
path: extract_dir.join(format!("{name}-{version}/")),
173+
options: options.clone(),
174+
}
175+
},
176+
CrateSource::Git {
177+
name,
178+
url,
179+
commit,
180+
options,
181+
} => {
182+
let repo_path = {
183+
let mut repo_path = PathBuf::from(LINTCHECK_SOURCES);
184+
// add a -git suffix in case we have the same crate from crates.io and a git repo
185+
repo_path.push(format!("{name}-git"));
186+
repo_path
187+
};
188+
// clone the repo if we have not done so
189+
if !repo_path.is_dir() {
190+
println!("Cloning {url} and checking out {commit}");
191+
if !Command::new("git")
192+
.arg("clone")
193+
.arg(url)
194+
.arg(&repo_path)
195+
.status()
196+
.expect("Failed to clone git repo!")
197+
.success()
198+
{
199+
eprintln!("Failed to clone {url} into {}", repo_path.display());
200+
}
201+
}
202+
// check out the commit/branch/whatever
203+
if !Command::new("git")
204+
.args(["-c", "advice.detachedHead=false"])
205+
.arg("checkout")
206+
.arg(commit)
207+
.current_dir(&repo_path)
208+
.status()
209+
.expect("Failed to check out commit")
210+
.success()
211+
{
212+
eprintln!("Failed to checkout {commit} of repo at {}", repo_path.display());
213+
}
214+
215+
Crate {
216+
version: commit.clone(),
217+
name: name.clone(),
218+
path: repo_path,
219+
options: options.clone(),
220+
}
221+
},
222+
CrateSource::Path { name, path, options } => {
223+
fn is_cache_dir(entry: &DirEntry) -> bool {
224+
fs::read(entry.path().join("CACHEDIR.TAG"))
225+
.map(|x| x.starts_with(b"Signature: 8a477f597d28d172789f06886806bc55"))
226+
.unwrap_or(false)
227+
}
228+
229+
// copy path into the dest_crate_root but skip directories that contain a CACHEDIR.TAG file.
230+
// The target/ directory contains a CACHEDIR.TAG file so it is the most commonly skipped directory
231+
// as a result of this filter.
232+
let dest_crate_root = PathBuf::from(LINTCHECK_SOURCES).join(name);
233+
if dest_crate_root.exists() {
234+
println!("Deleting existing directory at {dest_crate_root:?}");
235+
fs::remove_dir_all(&dest_crate_root).unwrap();
236+
}
237+
238+
println!("Copying {path:?} to {dest_crate_root:?}");
239+
240+
for entry in WalkDir::new(path).into_iter().filter_entry(|e| !is_cache_dir(e)) {
241+
let entry = entry.unwrap();
242+
let entry_path = entry.path();
243+
let relative_entry_path = entry_path.strip_prefix(path).unwrap();
244+
let dest_path = dest_crate_root.join(relative_entry_path);
245+
let metadata = entry_path.symlink_metadata().unwrap();
246+
247+
if metadata.is_dir() {
248+
fs::create_dir(dest_path).unwrap();
249+
} else if metadata.is_file() {
250+
fs::copy(entry_path, dest_path).unwrap();
251+
}
252+
}
253+
254+
Crate {
255+
version: String::from("local"),
256+
name: name.clone(),
257+
path: dest_crate_root,
258+
options: options.clone(),
259+
}
260+
},
261+
}
262+
}
263+
}
264+
265+
/// Create necessary directories to run the lintcheck tool.
266+
///
267+
/// # Panics
268+
///
269+
/// This function panics if creating one of the dirs fails.
270+
fn create_dirs(krate_download_dir: &Path, extract_dir: &Path) {
271+
fs::create_dir("target/lintcheck/").unwrap_or_else(|err| {
272+
assert_eq!(
273+
err.kind(),
274+
ErrorKind::AlreadyExists,
275+
"cannot create lintcheck target dir"
276+
);
277+
});
278+
fs::create_dir(krate_download_dir).unwrap_or_else(|err| {
279+
assert_eq!(err.kind(), ErrorKind::AlreadyExists, "cannot create crate download dir");
280+
});
281+
fs::create_dir(extract_dir).unwrap_or_else(|err| {
282+
assert_eq!(
283+
err.kind(),
284+
ErrorKind::AlreadyExists,
285+
"cannot create crate extraction dir"
286+
);
287+
});
288+
}

0 commit comments

Comments
 (0)