From ccfec46ac58ba72cccc6806d92b0adf22ea425d1 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Fri, 26 Jun 2020 17:16:17 +0200 Subject: [PATCH 1/7] Add basic db <-> index consistency check Checks just whether any crates/versions don't exist, but can easily be expanded with validation of other details. --- Cargo.lock | 53 ++++++++++++++ Cargo.toml | 1 + src/bin/cratesfyi.rs | 15 ++++ src/index/crates.rs | 55 +++++++++++++++ src/index/mod.rs | 14 +++- src/lib.rs | 2 + src/utils/consistency/data.rs | 32 +++++++++ src/utils/consistency/db.rs | 59 ++++++++++++++++ src/utils/consistency/diff.rs | 122 +++++++++++++++++++++++++++++++++ src/utils/consistency/index.rs | 23 +++++++ src/utils/consistency/mod.rs | 58 ++++++++++++++++ src/utils/mod.rs | 1 + 12 files changed, 434 insertions(+), 1 deletion(-) create mode 100644 src/index/crates.rs create mode 100644 src/utils/consistency/data.rs create mode 100644 src/utils/consistency/db.rs create mode 100644 src/utils/consistency/diff.rs create mode 100644 src/utils/consistency/index.rs create mode 100644 src/utils/consistency/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 0515be845..e4834f46a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,21 @@ name = "cpuid-bool" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "crates-index" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "git2 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "home 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)", + "smol_str 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crates-index-diff" version = "7.0.1" @@ -341,6 +356,7 @@ dependencies = [ "base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "comrak 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crates-index 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", "crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "dotenv 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1010,6 +1026,9 @@ dependencies = [ name = "hex" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "hmac" @@ -1020,6 +1039,14 @@ dependencies = [ "digest 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "home" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "html5ever" version = "0.25.1" @@ -1310,12 +1337,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "libgit2-sys" version = "0.12.6+1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.74 (registry+https://github.com/rust-lang/crates.io-index)", + "libssh2-sys 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", + "libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys 0.9.56 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libssh2-sys" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.74 (registry+https://github.com/rust-lang/crates.io-index)", "libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "openssl-sys 0.9.56 (registry+https://github.com/rust-lang/crates.io-index)", "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", + "vcpkg 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2831,6 +2872,14 @@ name = "smallvec" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "smol_str" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "socket2" version = "0.3.12" @@ -3741,6 +3790,7 @@ dependencies = [ "checksum core-foundation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171" "checksum core-foundation-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" "checksum cpuid-bool 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" +"checksum crates-index 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "15467291e8911aa3e73b0e77d988362da1df7ac974c7189ab38b94b6f7edfa7e" "checksum crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2e6bb290b5bb11353fbb46ca4c68ad2e8f54ab6674e4ee6a94c102054fdaf00f" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum criterion 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "70daa7ceec6cf143990669a04c7df13391d55fb27bd4079d252fca774ba244d8" @@ -3811,6 +3861,7 @@ dependencies = [ "checksum hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" "checksum hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" "checksum hmac 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "126888268dcc288495a26bf004b38c5fdbb31682f992c84ceb046a1f0fe38840" +"checksum home 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2456aef2e6b6a9784192ae780c0f15bc57df0e918585282325e8c8ac27737654" "checksum html5ever 0.25.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" "checksum http 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "28d569972648b2c512421b5f2a405ad6ac9666547189d0c5477a3f200f3e02f9" "checksum http-body 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "13d5ff830006f7646652e057693569bfe0d51760c0085a071769d142a205111b" @@ -3843,6 +3894,7 @@ dependencies = [ "checksum libflate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a1fbe6b967a94346446d37ace319ae85be7eca261bb8149325811ac435d35d64" "checksum libflate_lz77 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3286f09f7d4926fc486334f28d8d2e6ebe4f7f9994494b6dab27ddfad2c9b11b" "checksum libgit2-sys 0.12.6+1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bf81b43f9b45ab07897a780c9b7b26b1504497e469c7a78162fc29e3b8b1c1b3" +"checksum libssh2-sys 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "eafa907407504b0e683786d4aba47acf250f114d37357d56608333fd167dd0fc" "checksum libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "2eb5e43362e38e2bca2fd5f5134c4d4564a23a5c28e9b95411652021a8675ebe" "checksum lock_api 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" "checksum lock_api 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c" @@ -4006,6 +4058,7 @@ dependencies = [ "checksum slug 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39af1ce888a1253c8b9fcfa36626557650fb487c013620a743262d2769a3e9f3" "checksum smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "f7b0758c52e15a8b5e3691eae6cc559f08eee9406e548a4477ba4e67770a82b6" "checksum smallvec 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" +"checksum smol_str 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "2f7909a1d8bc166a862124d84fdc11bda0ea4ed3157ccca662296919c2972db1" "checksum socket2 0.3.12 (registry+https://github.com/rust-lang/crates.io-index)" = "03088793f677dce356f3ccc2edb1b314ad191ab702a5de3faf49304f7e104918" "checksum spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" diff --git a/Cargo.toml b/Cargo.toml index d62891cfb..3d548d53e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ edition = "2018" log = "0.4" regex = "1" structopt = "0.3" +crates-index = "0.15.0" crates-index-diff = "7" reqwest = { version = "0.10.6", features = ["blocking", "json"] } # TODO: Remove blocking when async is ready semver = { version = "0.9", features = ["serde"] } diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index bc6201bc7..e7d1799c1 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -443,6 +443,13 @@ enum DatabaseSubcommand { #[structopt(subcommand)] command: BlacklistSubcommand, }, + + /// Compares the database with the index and resolves inconsistencies + Synchronize { + /// Don't actually resolve the inconsistencies, just log them + #[structopt(long)] + dry_run: bool, + }, } impl DatabaseSubcommand { @@ -488,6 +495,14 @@ impl DatabaseSubcommand { } => db::delete_crate(&mut *ctx.conn()?, &*ctx.storage()?, &name) .context("failed to delete the crate")?, Self::Blacklist { command } => command.handle_args(ctx)?, + + Self::Synchronize { dry_run } => { + cratesfyi::utils::consistency::run_check( + &*ctx.config()?, + &mut *ctx.conn()?, + dry_run, + )?; + } } Ok(()) } diff --git a/src/index/crates.rs b/src/index/crates.rs new file mode 100644 index 000000000..bbdfab377 --- /dev/null +++ b/src/index/crates.rs @@ -0,0 +1,55 @@ +use crates_index::Crate; +use failure::ResultExt; +use std::io::{Seek, SeekFrom, Write}; + +pub(crate) struct Crates { + repo: git2::Repository, +} + +impl Crates { + pub(super) fn new(repo: git2::Repository) -> Self { + Self { repo } + } + + pub(crate) fn walk(&self, mut f: impl FnMut(Crate)) -> Result<(), failure::Error> { + log::debug!("Walking crates in index"); + let tree = self + .repo + .find_commit(self.repo.refname_to_id("refs/remotes/origin/master")?)? + .tree()?; + + // crates_index doesn't publicly expose their slice constructor, so need to write each blob + // to a file before loading it as a `Crate`. + let mut tmp = tempfile::NamedTempFile::new()?; + + let mut result = Ok(()); + + tree.walk(git2::TreeWalkMode::PreOrder, |_, entry| { + result = (|| { + if let Some(blob) = entry.to_object(&self.repo)?.as_blob() { + tmp.write_all(blob.content())?; + if let Ok(krate) = Crate::new(tmp.path()) { + f(krate); + } else { + log::warn!("Not a crate {}", entry.name().unwrap()); + } + tmp.as_file().set_len(0)?; + tmp.seek(SeekFrom::Start(0))?; + } + Result::<(), failure::Error>::Ok(()) + })() + .with_context(|_| { + format!( + "Loading crate details from {}", + entry.name().unwrap_or_default() + ) + }); + match result { + Ok(_) => git2::TreeWalkResult::Ok, + Err(_) => git2::TreeWalkResult::Abort, + } + })?; + + Ok(result?) + } +} diff --git a/src/index/mod.rs b/src/index/mod.rs index acb86a8fc..68e806bcd 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -2,11 +2,12 @@ use std::path::{Path, PathBuf}; use url::Url; -use self::api::Api; +use self::{api::Api, crates::Crates}; use crate::error::Result; use failure::ResultExt; pub(crate) mod api; +mod crates; pub struct Index { path: PathBuf, @@ -56,6 +57,17 @@ impl Index { Ok(diff) } + pub(crate) fn crates(&self) -> Result { + // First ensure the index is up to date, peeking will pull the latest changes without + // affecting anything else. + log::debug!("Updating index"); + self.diff()?.peek_changes()?; + // It'd be nice to use `crates_index` directly for interacting with the index, but it + // doesn't support bare repositories. So we use its `Crate` type but walk the index + // ourselves. + Ok(Crates::new(git2::Repository::open(&self.path)?)) + } + pub fn api(&self) -> &Api { &self.api } diff --git a/src/lib.rs b/src/lib.rs index d25e79c5a..fe3a1495b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,10 @@ //! [Docs.rs](https://docs.rs) (formerly cratesfyi) is an open source project to host //! documentation of crates for the Rust Programming Language. #![allow(clippy::cognitive_complexity)] +#![feature(type_alias_impl_trait)] pub use self::build_queue::BuildQueue; + pub use self::config::Config; pub use self::docbuilder::options::DocBuilderOptions; pub use self::docbuilder::DocBuilder; diff --git a/src/utils/consistency/data.rs b/src/utils/consistency/data.rs new file mode 100644 index 000000000..593bee3c0 --- /dev/null +++ b/src/utils/consistency/data.rs @@ -0,0 +1,32 @@ +use std::{collections::BTreeMap, fmt::Debug}; + +#[derive(Default, Debug)] +pub(crate) struct Data { + pub(crate) crates: BTreeMap, +} + +#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] +pub(crate) struct CrateId(pub(crate) String); + +#[derive(Default, Debug)] +pub(crate) struct Crate { + pub(crate) releases: BTreeMap, +} + +#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] +pub(crate) struct Version(pub(crate) String); + +#[derive(Default, Debug)] +pub(crate) struct Release {} + +impl std::fmt::Display for CrateId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.0, f) + } +} + +impl std::fmt::Display for Version { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.0, f) + } +} diff --git a/src/utils/consistency/db.rs b/src/utils/consistency/db.rs new file mode 100644 index 000000000..525b902f4 --- /dev/null +++ b/src/utils/consistency/db.rs @@ -0,0 +1,59 @@ +use super::data::{Crate, CrateId, Data, Release, Version}; +use std::collections::BTreeMap; + +pub(crate) fn load(conn: &mut postgres::Client) -> Result { + let rows = conn.query( + " + SELECT + crates.name, + releases.version + FROM crates + INNER JOIN releases ON releases.crate_id = crates.id + ORDER BY crates.id, releases.id + ", + &[], + )?; + + let mut data = Data { + crates: BTreeMap::new(), + }; + + let mut rows = rows.iter(); + + struct Current { + id: CrateId, + krate: Crate, + } + + let mut current = if let Some(row) = rows.next() { + Current { + id: CrateId(row.get("name")), + krate: Crate { + releases: { + let mut releases = BTreeMap::new(); + releases.insert(Version(row.get("version")), Release {}); + releases + }, + }, + } + } else { + return Ok(data); + }; + + for row in rows { + if row.get::<_, String>("name") != current.id.0 { + data.crates.insert( + std::mem::replace(&mut current.id, CrateId(row.get("name"))), + std::mem::take(&mut current.krate), + ); + } + current + .krate + .releases + .insert(Version(row.get("version")), Release::default()); + } + + data.crates.insert(current.id, current.krate); + + Ok(data) +} diff --git a/src/utils/consistency/diff.rs b/src/utils/consistency/diff.rs new file mode 100644 index 000000000..297520258 --- /dev/null +++ b/src/utils/consistency/diff.rs @@ -0,0 +1,122 @@ +use super::data::{Crate, CrateId, Data, Release, Version}; +use std::{collections::BTreeMap, fmt::Debug}; + +#[derive(Debug)] +pub(crate) struct DataDiff { + pub(crate) crates: CratesDiff, +} + +pub(crate) type CratesDiff = impl Iterator> + Debug; + +#[derive(Debug)] +pub(crate) struct CrateDiff { + pub(crate) releases: ReleasesDiff, +} + +pub(crate) type ReleasesDiff = impl Iterator> + Debug; + +#[derive(Debug)] +pub(crate) struct ReleaseDiff {} + +pub(crate) enum Diff { + Both(Key, Value::Diff), + Left(Key, Value), + Right(Key, Value), +} + +pub(crate) trait Diffable { + type Diff; + + fn diff(self, other: Self) -> Self::Diff; +} + +fn diff_map( + left: BTreeMap, + right: BTreeMap, +) -> impl Iterator> + Debug { + use std::{cmp::Ordering, collections::btree_map::IntoIter, iter::Peekable}; + + #[derive(Debug)] + struct DiffMap { + left: Peekable>, + right: Peekable>, + } + + impl Iterator for DiffMap { + type Item = Diff; + + fn next(&mut self) -> Option { + match (self.left.peek(), self.right.peek()) { + (Some((left, _)), Some((right, _))) => match left.cmp(right) { + Ordering::Less => { + let (key, value) = self.left.next().unwrap(); + Some(Diff::Left(key, value)) + } + Ordering::Equal => { + let (key, left) = self.left.next().unwrap(); + let (_, right) = self.right.next().unwrap(); + Some(Diff::Both(key, left.diff(right))) + } + Ordering::Greater => { + let (key, value) = self.right.next().unwrap(); + Some(Diff::Right(key, value)) + } + }, + (Some((_, _)), None) => { + let (key, value) = self.left.next().unwrap(); + Some(Diff::Left(key, value)) + } + (None, Some((_, _))) => { + let (key, value) = self.right.next().unwrap(); + Some(Diff::Right(key, value)) + } + (None, None) => None, + } + } + } + + DiffMap { + left: left.into_iter().peekable(), + right: right.into_iter().peekable(), + } +} + +impl Diffable for Data { + type Diff = DataDiff; + + fn diff(self, other: Self) -> Self::Diff { + fn diff_crates( + left: BTreeMap, + right: BTreeMap, + ) -> CratesDiff { + diff_map(left, right) + } + DataDiff { + crates: diff_crates(self.crates, other.crates), + } + } +} + +impl Diffable for Crate { + type Diff = CrateDiff; + + fn diff(self, other: Self) -> Self::Diff { + fn diff_releases( + left: BTreeMap, + right: BTreeMap, + ) -> ReleasesDiff { + diff_map(left, right) + } + CrateDiff { + releases: diff_releases(self.releases, other.releases), + } + } +} + +impl Diffable for Release { + type Diff = ReleaseDiff; + + fn diff(self, _other: Self) -> Self::Diff { + ReleaseDiff {} + } +} diff --git a/src/utils/consistency/index.rs b/src/utils/consistency/index.rs new file mode 100644 index 000000000..fed380622 --- /dev/null +++ b/src/utils/consistency/index.rs @@ -0,0 +1,23 @@ +use super::data::{Crate, CrateId, Data, Release, Version}; +use crate::{config::Config, index::Index}; + +pub(crate) fn load(config: &Config) -> Result { + let index = Index::new(&config.registry_index_path)?; + + let mut data = Data::default(); + + index.crates()?.walk(|krate| { + data.crates.insert( + CrateId(krate.name().into()), + Crate { + releases: krate + .versions() + .iter() + .map(|version| (Version(version.version().into()), Release::default())) + .collect(), + }, + ); + })?; + + Ok(data) +} diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs new file mode 100644 index 000000000..077ad94a2 --- /dev/null +++ b/src/utils/consistency/mod.rs @@ -0,0 +1,58 @@ +use self::diff::{Diff, Diffable}; +use crate::config::Config; +use failure::ResultExt; + +mod data; +mod db; +mod diff; +mod index; + +pub fn run_check( + config: &Config, + conn: &mut postgres::Client, + dry_run: bool, +) -> Result<(), failure::Error> { + if !dry_run { + failure::bail!("TODO: only a --dry-run synchronization is supported currently"); + } + + log::info!("Loading data from database..."); + let timer = std::time::Instant::now(); + let db_data = + self::db::load(conn).context("Loading crate data from database for consistency check")?; + log::info!("...loaded in {:?}", timer.elapsed()); + + log::info!("Loading data from index..."); + let timer = std::time::Instant::now(); + let index_data = + self::index::load(config).context("Loading crate data from index for consistency check")?; + log::info!("...loaded in {:?}", timer.elapsed()); + + let diff = db_data.diff(index_data); + + for krate in diff.crates { + match krate { + Diff::Both(id, diff) => { + for release in diff.releases { + match release { + Diff::Both(_, _) => {} + Diff::Left(version, _) => { + log::info!("Release in db not in index: {} {}", id, version); + } + Diff::Right(version, _) => { + log::info!("Release in index not in db: {} {}", id, version); + } + } + } + } + Diff::Left(id, _) => { + log::info!("Crate in db not in index: {}", id); + } + Diff::Right(id, _) => { + log::info!("Crate in index not in db: {}", id); + } + } + } + + Ok(()) +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 69935dac4..609a1a544 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -14,6 +14,7 @@ pub(crate) use self::rustc_version::parse_rustc_version; pub(crate) use self::cargo_metadata::{Dependency, Target}; mod cargo_metadata; +pub mod consistency; mod copy; mod daemon; mod github_updater; From 88188b4cbbd77006b39642e5be799f4c113f791b Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 18 Jul 2020 12:58:51 +0200 Subject: [PATCH 2/7] Improve debugging while walking crates --- src/index/crates.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/index/crates.rs b/src/index/crates.rs index bbdfab377..7ebc89cb5 100644 --- a/src/index/crates.rs +++ b/src/index/crates.rs @@ -31,7 +31,7 @@ impl Crates { if let Ok(krate) = Crate::new(tmp.path()) { f(krate); } else { - log::warn!("Not a crate {}", entry.name().unwrap()); + log::warn!("Not a crate '{}'", entry.name().unwrap()); } tmp.as_file().set_len(0)?; tmp.seek(SeekFrom::Start(0))?; @@ -40,8 +40,8 @@ impl Crates { })() .with_context(|_| { format!( - "Loading crate details from {}", - entry.name().unwrap_or_default() + "Loading crate details from '{}'", + entry.name().unwrap_or("") ) }); match result { From 9bb4cfb78d101e6dc904a2e125fe4cf7ff307fd2 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 18 Jul 2020 12:59:53 +0200 Subject: [PATCH 3/7] Rename CrateId to CrateName --- src/utils/consistency/data.rs | 6 +++--- src/utils/consistency/db.rs | 12 ++++++------ src/utils/consistency/diff.rs | 8 ++++---- src/utils/consistency/index.rs | 4 ++-- src/utils/consistency/mod.rs | 14 +++++++------- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/utils/consistency/data.rs b/src/utils/consistency/data.rs index 593bee3c0..1571fde2b 100644 --- a/src/utils/consistency/data.rs +++ b/src/utils/consistency/data.rs @@ -2,11 +2,11 @@ use std::{collections::BTreeMap, fmt::Debug}; #[derive(Default, Debug)] pub(crate) struct Data { - pub(crate) crates: BTreeMap, + pub(crate) crates: BTreeMap, } #[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] -pub(crate) struct CrateId(pub(crate) String); +pub(crate) struct CrateName(pub(crate) String); #[derive(Default, Debug)] pub(crate) struct Crate { @@ -19,7 +19,7 @@ pub(crate) struct Version(pub(crate) String); #[derive(Default, Debug)] pub(crate) struct Release {} -impl std::fmt::Display for CrateId { +impl std::fmt::Display for CrateName { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(&self.0, f) } diff --git a/src/utils/consistency/db.rs b/src/utils/consistency/db.rs index 525b902f4..ab4cdc24f 100644 --- a/src/utils/consistency/db.rs +++ b/src/utils/consistency/db.rs @@ -1,4 +1,4 @@ -use super::data::{Crate, CrateId, Data, Release, Version}; +use super::data::{Crate, CrateName, Data, Release, Version}; use std::collections::BTreeMap; pub(crate) fn load(conn: &mut postgres::Client) -> Result { @@ -21,13 +21,13 @@ pub(crate) fn load(conn: &mut postgres::Client) -> Result let mut rows = rows.iter(); struct Current { - id: CrateId, + name: CrateName, krate: Crate, } let mut current = if let Some(row) = rows.next() { Current { - id: CrateId(row.get("name")), + name: CrateName(row.get("name")), krate: Crate { releases: { let mut releases = BTreeMap::new(); @@ -41,9 +41,9 @@ pub(crate) fn load(conn: &mut postgres::Client) -> Result }; for row in rows { - if row.get::<_, String>("name") != current.id.0 { + if row.get::<_, String>("name") != current.name.0 { data.crates.insert( - std::mem::replace(&mut current.id, CrateId(row.get("name"))), + std::mem::replace(&mut current.name, CrateName(row.get("name"))), std::mem::take(&mut current.krate), ); } @@ -53,7 +53,7 @@ pub(crate) fn load(conn: &mut postgres::Client) -> Result .insert(Version(row.get("version")), Release::default()); } - data.crates.insert(current.id, current.krate); + data.crates.insert(current.name, current.krate); Ok(data) } diff --git a/src/utils/consistency/diff.rs b/src/utils/consistency/diff.rs index 297520258..656fbf62d 100644 --- a/src/utils/consistency/diff.rs +++ b/src/utils/consistency/diff.rs @@ -1,4 +1,4 @@ -use super::data::{Crate, CrateId, Data, Release, Version}; +use super::data::{Crate, CrateName, Data, Release, Version}; use std::{collections::BTreeMap, fmt::Debug}; #[derive(Debug)] @@ -6,7 +6,7 @@ pub(crate) struct DataDiff { pub(crate) crates: CratesDiff, } -pub(crate) type CratesDiff = impl Iterator> + Debug; +pub(crate) type CratesDiff = impl Iterator> + Debug; #[derive(Debug)] pub(crate) struct CrateDiff { @@ -86,8 +86,8 @@ impl Diffable for Data { fn diff(self, other: Self) -> Self::Diff { fn diff_crates( - left: BTreeMap, - right: BTreeMap, + left: BTreeMap, + right: BTreeMap, ) -> CratesDiff { diff_map(left, right) } diff --git a/src/utils/consistency/index.rs b/src/utils/consistency/index.rs index fed380622..0517c21c0 100644 --- a/src/utils/consistency/index.rs +++ b/src/utils/consistency/index.rs @@ -1,4 +1,4 @@ -use super::data::{Crate, CrateId, Data, Release, Version}; +use super::data::{Crate, CrateName, Data, Release, Version}; use crate::{config::Config, index::Index}; pub(crate) fn load(config: &Config) -> Result { @@ -8,7 +8,7 @@ pub(crate) fn load(config: &Config) -> Result { index.crates()?.walk(|krate| { data.crates.insert( - CrateId(krate.name().into()), + CrateName(krate.name().into()), Crate { releases: krate .versions() diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs index 077ad94a2..1c8f128e8 100644 --- a/src/utils/consistency/mod.rs +++ b/src/utils/consistency/mod.rs @@ -32,24 +32,24 @@ pub fn run_check( for krate in diff.crates { match krate { - Diff::Both(id, diff) => { + Diff::Both(name, diff) => { for release in diff.releases { match release { Diff::Both(_, _) => {} Diff::Left(version, _) => { - log::info!("Release in db not in index: {} {}", id, version); + log::info!("Release in db not in index: {} {}", name, version); } Diff::Right(version, _) => { - log::info!("Release in index not in db: {} {}", id, version); + log::info!("Release in index not in db: {} {}", name, version); } } } } - Diff::Left(id, _) => { - log::info!("Crate in db not in index: {}", id); + Diff::Left(name, _) => { + log::info!("Crate in db not in index: {}", name); } - Diff::Right(id, _) => { - log::info!("Crate in index not in db: {}", id); + Diff::Right(name, _) => { + log::info!("Crate in index not in db: {}", name); } } } From d8fb3fafbca7c2fd3bdea10969d614aed00b2e51 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 18 Jul 2020 13:06:44 +0200 Subject: [PATCH 4/7] Improve name comparison while loading db data --- src/utils/consistency/data.rs | 30 +++++++++++++++++++++++------- src/utils/consistency/db.rs | 5 +++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/utils/consistency/data.rs b/src/utils/consistency/data.rs index 1571fde2b..30dcb48c8 100644 --- a/src/utils/consistency/data.rs +++ b/src/utils/consistency/data.rs @@ -1,4 +1,8 @@ -use std::{collections::BTreeMap, fmt::Debug}; +use std::{ + cmp::PartialEq, + collections::BTreeMap, + fmt::{self, Debug, Display, Formatter}, +}; #[derive(Default, Debug)] pub(crate) struct Data { @@ -19,14 +23,26 @@ pub(crate) struct Version(pub(crate) String); #[derive(Default, Debug)] pub(crate) struct Release {} -impl std::fmt::Display for CrateName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(&self.0, f) +impl PartialEq for CrateName { + fn eq(&self, other: &String) -> bool { + self.0 == *other } } -impl std::fmt::Display for Version { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(&self.0, f) +impl PartialEq for Version { + fn eq(&self, other: &String) -> bool { + self.0 == *other + } +} + +impl Display for CrateName { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) + } +} + +impl Display for Version { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) } } diff --git a/src/utils/consistency/db.rs b/src/utils/consistency/db.rs index ab4cdc24f..52cc4df91 100644 --- a/src/utils/consistency/db.rs +++ b/src/utils/consistency/db.rs @@ -41,9 +41,10 @@ pub(crate) fn load(conn: &mut postgres::Client) -> Result }; for row in rows { - if row.get::<_, String>("name") != current.name.0 { + let name = row.get("name"); + if current.name != name { data.crates.insert( - std::mem::replace(&mut current.name, CrateName(row.get("name"))), + std::mem::replace(&mut current.name, CrateName(name)), std::mem::take(&mut current.krate), ); } From a056129076b55fdc4444c2f8a11ee28fae4a7700 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 18 Jul 2020 13:23:30 +0200 Subject: [PATCH 5/7] Concretely type the iterable diffs --- src/lib.rs | 1 - src/utils/consistency/diff.rs | 102 +++++++++++++++------------------- 2 files changed, 44 insertions(+), 59 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fe3a1495b..8c6198085 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ //! [Docs.rs](https://docs.rs) (formerly cratesfyi) is an open source project to host //! documentation of crates for the Rust Programming Language. #![allow(clippy::cognitive_complexity)] -#![feature(type_alias_impl_trait)] pub use self::build_queue::BuildQueue; diff --git a/src/utils/consistency/diff.rs b/src/utils/consistency/diff.rs index 656fbf62d..7ba1b9fae 100644 --- a/src/utils/consistency/diff.rs +++ b/src/utils/consistency/diff.rs @@ -1,20 +1,21 @@ use super::data::{Crate, CrateName, Data, Release, Version}; -use std::{collections::BTreeMap, fmt::Debug}; +use std::{ + cmp::Ordering, + collections::{btree_map::IntoIter, BTreeMap}, + fmt::Debug, + iter::Peekable, +}; #[derive(Debug)] pub(crate) struct DataDiff { - pub(crate) crates: CratesDiff, + pub(crate) crates: DiffMap, } -pub(crate) type CratesDiff = impl Iterator> + Debug; - #[derive(Debug)] pub(crate) struct CrateDiff { - pub(crate) releases: ReleasesDiff, + pub(crate) releases: DiffMap, } -pub(crate) type ReleasesDiff = impl Iterator> + Debug; - #[derive(Debug)] pub(crate) struct ReleaseDiff {} @@ -30,69 +31,60 @@ pub(crate) trait Diffable { fn diff(self, other: Self) -> Self::Diff; } -fn diff_map( - left: BTreeMap, - right: BTreeMap, -) -> impl Iterator> + Debug { - use std::{cmp::Ordering, collections::btree_map::IntoIter, iter::Peekable}; +#[derive(Debug)] +pub(crate) struct DiffMap { + left: Peekable>, + right: Peekable>, +} - #[derive(Debug)] - struct DiffMap { - left: Peekable>, - right: Peekable>, +impl DiffMap { + fn new(left: BTreeMap, right: BTreeMap) -> Self { + Self { + left: left.into_iter().peekable(), + right: right.into_iter().peekable(), + } } +} + +impl Iterator for DiffMap { + type Item = Diff; - impl Iterator for DiffMap { - type Item = Diff; - - fn next(&mut self) -> Option { - match (self.left.peek(), self.right.peek()) { - (Some((left, _)), Some((right, _))) => match left.cmp(right) { - Ordering::Less => { - let (key, value) = self.left.next().unwrap(); - Some(Diff::Left(key, value)) - } - Ordering::Equal => { - let (key, left) = self.left.next().unwrap(); - let (_, right) = self.right.next().unwrap(); - Some(Diff::Both(key, left.diff(right))) - } - Ordering::Greater => { - let (key, value) = self.right.next().unwrap(); - Some(Diff::Right(key, value)) - } - }, - (Some((_, _)), None) => { + fn next(&mut self) -> Option { + match (self.left.peek(), self.right.peek()) { + (Some((left, _)), Some((right, _))) => match left.cmp(right) { + Ordering::Less => { let (key, value) = self.left.next().unwrap(); Some(Diff::Left(key, value)) } - (None, Some((_, _))) => { + Ordering::Equal => { + let (key, left) = self.left.next().unwrap(); + let (_, right) = self.right.next().unwrap(); + Some(Diff::Both(key, left.diff(right))) + } + Ordering::Greater => { let (key, value) = self.right.next().unwrap(); Some(Diff::Right(key, value)) } - (None, None) => None, + }, + (Some((_, _)), None) => { + let (key, value) = self.left.next().unwrap(); + Some(Diff::Left(key, value)) + } + (None, Some((_, _))) => { + let (key, value) = self.right.next().unwrap(); + Some(Diff::Right(key, value)) } + (None, None) => None, } } - - DiffMap { - left: left.into_iter().peekable(), - right: right.into_iter().peekable(), - } } impl Diffable for Data { type Diff = DataDiff; fn diff(self, other: Self) -> Self::Diff { - fn diff_crates( - left: BTreeMap, - right: BTreeMap, - ) -> CratesDiff { - diff_map(left, right) - } DataDiff { - crates: diff_crates(self.crates, other.crates), + crates: DiffMap::new(self.crates, other.crates), } } } @@ -101,14 +93,8 @@ impl Diffable for Crate { type Diff = CrateDiff; fn diff(self, other: Self) -> Self::Diff { - fn diff_releases( - left: BTreeMap, - right: BTreeMap, - ) -> ReleasesDiff { - diff_map(left, right) - } CrateDiff { - releases: diff_releases(self.releases, other.releases), + releases: DiffMap::new(self.releases, other.releases), } } } From 306d1ed5be2a41130ddd80532d20936eebd72bad Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 8 Aug 2020 11:40:52 +0200 Subject: [PATCH 6/7] Use new Crate::from_slice constructor --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- src/index/crates.rs | 10 +--------- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4834f46a..8e3820719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,7 +322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "crates-index" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "git2 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -356,7 +356,7 @@ dependencies = [ "base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "comrak 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "crates-index 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crates-index 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)", "crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "dotenv 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3790,7 +3790,7 @@ dependencies = [ "checksum core-foundation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171" "checksum core-foundation-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" "checksum cpuid-bool 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" -"checksum crates-index 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "15467291e8911aa3e73b0e77d988362da1df7ac974c7189ab38b94b6f7edfa7e" +"checksum crates-index 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1df1045d54201422cb3a9910da25de7d59fbdad0d03cabd10e33ef592e12ae6d" "checksum crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2e6bb290b5bb11353fbb46ca4c68ad2e8f54ab6674e4ee6a94c102054fdaf00f" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum criterion 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "70daa7ceec6cf143990669a04c7df13391d55fb27bd4079d252fca774ba244d8" diff --git a/Cargo.toml b/Cargo.toml index 3d548d53e..aca43d3e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" log = "0.4" regex = "1" structopt = "0.3" -crates-index = "0.15.0" +crates-index = "0.15.1" crates-index-diff = "7" reqwest = { version = "0.10.6", features = ["blocking", "json"] } # TODO: Remove blocking when async is ready semver = { version = "0.9", features = ["serde"] } diff --git a/src/index/crates.rs b/src/index/crates.rs index 7ebc89cb5..3fafd166a 100644 --- a/src/index/crates.rs +++ b/src/index/crates.rs @@ -1,6 +1,5 @@ use crates_index::Crate; use failure::ResultExt; -use std::io::{Seek, SeekFrom, Write}; pub(crate) struct Crates { repo: git2::Repository, @@ -18,23 +17,16 @@ impl Crates { .find_commit(self.repo.refname_to_id("refs/remotes/origin/master")?)? .tree()?; - // crates_index doesn't publicly expose their slice constructor, so need to write each blob - // to a file before loading it as a `Crate`. - let mut tmp = tempfile::NamedTempFile::new()?; - let mut result = Ok(()); tree.walk(git2::TreeWalkMode::PreOrder, |_, entry| { result = (|| { if let Some(blob) = entry.to_object(&self.repo)?.as_blob() { - tmp.write_all(blob.content())?; - if let Ok(krate) = Crate::new(tmp.path()) { + if let Ok(krate) = Crate::from_slice(blob.content()) { f(krate); } else { log::warn!("Not a crate '{}'", entry.name().unwrap()); } - tmp.as_file().set_len(0)?; - tmp.seek(SeekFrom::Start(0))?; } Result::<(), failure::Error>::Ok(()) })() From d6501558eccb3b4a870e915e42863506569ed442 Mon Sep 17 00:00:00 2001 From: Nemo157 Date: Fri, 14 Aug 2020 19:31:16 +0200 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Joshua Nelson --- src/index/crates.rs | 4 ++-- src/lib.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/index/crates.rs b/src/index/crates.rs index 3fafd166a..f2d7f120e 100644 --- a/src/index/crates.rs +++ b/src/index/crates.rs @@ -25,14 +25,14 @@ impl Crates { if let Ok(krate) = Crate::from_slice(blob.content()) { f(krate); } else { - log::warn!("Not a crate '{}'", entry.name().unwrap()); + log::warn!("not a crate '{}'", entry.name().unwrap()); } } Result::<(), failure::Error>::Ok(()) })() .with_context(|_| { format!( - "Loading crate details from '{}'", + "loading crate details from '{}'", entry.name().unwrap_or("") ) }); diff --git a/src/lib.rs b/src/lib.rs index 8c6198085..d25e79c5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ #![allow(clippy::cognitive_complexity)] pub use self::build_queue::BuildQueue; - pub use self::config::Config; pub use self::docbuilder::options::DocBuilderOptions; pub use self::docbuilder::DocBuilder;