From de38887296bd4e6799ce98cf1093aa195aee67c9 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Mon, 27 Jul 2020 19:52:36 +0200 Subject: [PATCH 1/9] Refactor owner updating to update changed data and remove old owners --- src/db/add_package.rs | 109 ++++++++++++++++++++++++----- src/db/mod.rs | 5 +- src/docbuilder/rustwide_builder.rs | 14 +++- src/index/api.rs | 30 +++++--- src/test/fakes.rs | 22 +++--- 5 files changed, 139 insertions(+), 41 deletions(-) diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 985dcf265..b85b732ab 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -7,11 +7,11 @@ use std::{ use crate::{ docbuilder::BuildResult, error::Result, - index::api::{CrateOwner, RegistryCrateData}, + index::api::{CrateData, CrateOwner, ReleaseData}, storage::CompressionAlgorithm, utils::MetadataPackage, }; -use log::debug; +use log::{debug, info}; use postgres::Connection; use regex::Regex; use serde_json::Value; @@ -32,7 +32,7 @@ pub(crate) fn add_package_into_database( default_target: &str, source_files: Option, doc_targets: Vec, - registry_data: &RegistryCrateData, + registry_data: &ReleaseData, has_docs: bool, has_examples: bool, compression_algorithms: std::collections::HashSet, @@ -117,7 +117,6 @@ pub(crate) fn add_package_into_database( add_keywords_into_database(&conn, &metadata_pkg, release_id)?; add_authors_into_database(&conn, &metadata_pkg, release_id)?; - add_owners_into_database(&conn, ®istry_data.owners, crate_id)?; add_compression_into_database(&conn, compression_algorithms.into_iter(), release_id)?; // Update the crates table with the new release @@ -328,31 +327,103 @@ fn add_authors_into_database( Ok(()) } +pub(crate) fn update_crate_data_in_database( + conn: &Connection, + name: &str, + registry_data: &CrateData, +) -> Result<()> { + info!("Updating crate data for {}", name); + let crate_id = conn + .query("SELECT id FROM crates WHERE crates.name = $1", &[&name])? + .get(0) + .get(0); + + update_owners_in_database(conn, ®istry_data.owners, crate_id)?; + + Ok(()) +} + /// Adds owners into database -fn add_owners_into_database(conn: &Connection, owners: &[CrateOwner], crate_id: i32) -> Result<()> { +fn update_owners_in_database( + conn: &Connection, + owners: &[CrateOwner], + crate_id: i32, +) -> Result<()> { + let existing_owners: Vec = conn + .query( + " + SELECT login + FROM owners + INNER JOIN owner_rels + ON owner_rels.oid = owners.id + WHERE owner_rels.cid = $1 + ", + &[&crate_id], + )? + .into_iter() + .map(|row| row.get(0)) + .collect(); + for owner in owners { + debug!("Updating owner data for {}: {:?}", owner.login, owner); + + // Update any existing owner data since it is mutable and could have changed since last + // time we pulled it let owner_id: i32 = { - let rows = conn.query("SELECT id FROM owners WHERE login = $1", &[&owner.login])?; - if !rows.is_empty() { - rows.get(0).get(0) - } else { - conn.query( - "INSERT INTO owners (login, avatar, name, email) - VALUES ($1, $2, $3, $4) - RETURNING id", - &[&owner.login, &owner.avatar, &owner.name, &owner.email], - )? - .get(0) - .get(0) - } + conn.query( + " + INSERT INTO owners (login, avatar, name, email) + VALUES ($1, $2, $3, $4) + ON CONFLICT (login) DO UPDATE + SET + avatar = $2, + name = $3, + email = $4 + RETURNING id + ", + &[&owner.login, &owner.avatar, &owner.name, &owner.email], + )? + .get(0) + .get(0) }; // add relationship - let _ = conn.query( + let updated = conn.query( "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2)", &[&crate_id, &owner_id], ); + + match updated { + Ok(_) => debug!("Added new owner relationship"), + Err(e) + if e.as_db().and_then(|e| e.constraint.as_deref()) + == Some("owner_rels_cid_oid_key") => + { + debug!("Existing owner relationship"); + } + Err(e) => return Err(e.into()), + } } + + let to_remove = existing_owners + .iter() + .filter(|login| !owners.iter().any(|owner| &&owner.login == login)); + + for login in to_remove { + debug!("Removing owner relationship {}", login); + // remove relationship + conn.query( + " + DELETE FROM owner_rels + USING owners + WHERE owner_rels.cid = $1 + AND owner_rels.oid = owners.id + AND owners.login = $2 + ", + &[&crate_id, &login], + )?; + } + Ok(()) } diff --git a/src/db/mod.rs b/src/db/mod.rs index 732f1cbdf..8d9772708 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,7 +1,8 @@ //! Database operations -pub(crate) use self::add_package::add_build_into_database; -pub(crate) use self::add_package::add_package_into_database; +pub(crate) use self::add_package::{ + add_build_into_database, add_package_into_database, update_crate_data_in_database, +}; pub use self::delete::{delete_crate, delete_version}; pub use self::file::add_path_into_database; pub use self::migrate::migrate; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 85bb2c2f3..e5d7bb86a 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -2,7 +2,9 @@ use super::DocBuilder; use super::Metadata; use crate::db::blacklist::is_blacklisted; use crate::db::file::add_path_into_database; -use crate::db::{add_build_into_database, add_package_into_database, Pool}; +use crate::db::{ + add_build_into_database, add_package_into_database, update_crate_data_in_database, Pool, +}; use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; use crate::storage::CompressionAlgorithms; @@ -405,13 +407,21 @@ impl RustwideBuilder { &res.target, files_list, successful_targets, - &doc_builder.index.api().get_crate_data(name, version), + &doc_builder.index.api().get_release_data(name, version), has_docs, has_examples, algs, )?; + add_build_into_database(&conn, release_id, &res.result)?; + // Some crates.io crate data is mutable, so we proactively update it during a release + update_crate_data_in_database( + &conn, + name, + &doc_builder.index.api().get_crate_data(name), + )?; + doc_builder.add_to_cache(name, version); Ok(res) })?; diff --git a/src/index/api.rs b/src/index/api.rs index 7dbbe5c58..445b14298 100644 --- a/src/index/api.rs +++ b/src/index/api.rs @@ -14,18 +14,25 @@ const APP_USER_AGENT: &str = concat!( include_str!(concat!(env!("OUT_DIR"), "/git_version")) ); +#[derive(Debug)] pub(crate) struct Api { api_base: Option, client: reqwest::blocking::Client, } -pub(crate) struct RegistryCrateData { +#[derive(Debug)] +pub(crate) struct CrateData { + pub(crate) owners: Vec, +} + +#[derive(Debug)] +pub(crate) struct ReleaseData { pub(crate) release_time: DateTime, pub(crate) yanked: bool, pub(crate) downloads: i32, - pub(crate) owners: Vec, } +#[derive(Debug)] pub(crate) struct CrateOwner { pub(crate) avatar: String, pub(crate) email: String, @@ -55,7 +62,16 @@ impl Api { .ok_or_else(|| err_msg("index is missing an api base url")) } - pub(crate) fn get_crate_data(&self, name: &str, version: &str) -> RegistryCrateData { + pub(crate) fn get_crate_data(&self, name: &str) -> CrateData { + let owners = self.get_owners(name).unwrap_or_else(|err| { + warn!("Failed to get owners for {}: {}", name, err); + Vec::new() + }); + + CrateData { owners } + } + + pub(crate) fn get_release_data(&self, name: &str, version: &str) -> ReleaseData { let (release_time, yanked, downloads) = self .get_release_time_yanked_downloads(name, version) .unwrap_or_else(|err| { @@ -63,16 +79,10 @@ impl Api { (Utc::now(), false, 0) }); - let owners = self.get_owners(name).unwrap_or_else(|err| { - warn!("Failed to get owners for {}-{}: {}", name, version, err); - Vec::new() - }); - - RegistryCrateData { + ReleaseData { release_time, yanked, downloads, - owners, } } diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 4d30760ed..14e98144e 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -1,6 +1,6 @@ use super::TestDatabase; use crate::docbuilder::BuildResult; -use crate::index::api::RegistryCrateData; +use crate::index::api::{CrateData, ReleaseData}; use crate::storage::Storage; use crate::utils::{Dependency, MetadataPackage, Target}; use chrono::{DateTime, Utc}; @@ -19,7 +19,8 @@ pub(crate) struct FakeRelease<'a> { rustdoc_files: Vec<(&'a str, &'a [u8])>, doc_targets: Vec, default_target: Option<&'a str>, - registry_crate_data: RegistryCrateData, + registry_crate_data: CrateData, + registry_release_data: ReleaseData, has_docs: bool, has_examples: bool, /// This stores the content, while `package.readme` stores the filename @@ -60,11 +61,11 @@ impl<'a> FakeRelease<'a> { rustdoc_files: Vec::new(), doc_targets: Vec::new(), default_target: None, - registry_crate_data: RegistryCrateData { + registry_crate_data: CrateData { owners: Vec::new() }, + registry_release_data: ReleaseData { release_time: Utc::now(), yanked: false, downloads: 0, - owners: Vec::new(), }, has_docs: true, has_examples: false, @@ -73,7 +74,7 @@ impl<'a> FakeRelease<'a> { } pub(crate) fn downloads(mut self, downloads: i32) -> Self { - self.registry_crate_data.downloads = downloads; + self.registry_release_data.downloads = downloads; self } @@ -83,7 +84,7 @@ impl<'a> FakeRelease<'a> { } pub(crate) fn release_time(mut self, new: DateTime) -> Self { - self.registry_crate_data.release_time = new; + self.registry_release_data.release_time = new; self } @@ -116,7 +117,7 @@ impl<'a> FakeRelease<'a> { } pub(crate) fn yanked(mut self, new: bool) -> Self { - self.registry_crate_data.yanked = new; + self.registry_release_data.yanked = new; self } @@ -249,11 +250,16 @@ impl<'a> FakeRelease<'a> { self.default_target.unwrap_or("x86_64-unknown-linux-gnu"), source_meta, self.doc_targets, - &self.registry_crate_data, + &self.registry_release_data, self.has_docs, self.has_examples, algs, )?; + crate::db::update_crate_data_in_database( + &db.conn(), + &package.name, + &self.registry_crate_data, + )?; crate::db::add_build_into_database(&db.conn(), release_id, &self.build_result)?; Ok(release_id) From 087d982ddf4df9f9b1dbfc7f90f424940c5a7b7f Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Tue, 28 Jul 2020 19:00:17 +0200 Subject: [PATCH 2/9] Add test for updating owner details --- src/test/fakes.rs | 7 ++- src/web/crate_details.rs | 92 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 14e98144e..c9ecd9f7a 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -1,6 +1,6 @@ use super::TestDatabase; use crate::docbuilder::BuildResult; -use crate::index::api::{CrateData, ReleaseData}; +use crate::index::api::{CrateData, CrateOwner, ReleaseData}; use crate::storage::Storage; use crate::utils::{Dependency, MetadataPackage, Target}; use chrono::{DateTime, Utc}; @@ -167,6 +167,11 @@ impl<'a> FakeRelease<'a> { self.source_file("README.md", content.as_bytes()) } + pub(crate) fn add_owner(mut self, owner: CrateOwner) -> Self { + self.registry_crate_data.owners.push(owner); + self + } + /// Returns the release_id pub(crate) fn create(self) -> Result { use std::collections::HashSet; diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index fc0adad1b..769c00a52 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -315,6 +315,7 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { #[cfg(test)] mod tests { use super::*; + use crate::index::api::CrateOwner; use crate::test::{wrapper, TestDatabase}; use failure::Error; use kuchiki::traits::TendrilSink; @@ -610,4 +611,95 @@ mod tests { Ok(()) }); } + + #[test] + fn test_updating_owners() { + wrapper(|env| { + let db = env.db(); + + env.fake_release() + .name("foo") + .version("0.0.1") + .add_owner(CrateOwner { + login: "foobar".into(), + avatar: "https://example.org/foobar".into(), + name: "Foo Bar".into(), + email: "foobar@example.org".into(), + }) + .create()?; + + let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap(); + assert_eq!( + details.owners, + vec![("foobar".into(), "https://example.org/foobar".into())] + ); + + // Adding a new owner, and changing details on an existing owner + env.fake_release() + .name("foo") + .version("0.0.2") + .add_owner(CrateOwner { + login: "foobar".into(), + avatar: "https://example.org/foobarv2".into(), + name: "Foo Bar".into(), + email: "foobar@example.org".into(), + }) + .add_owner(CrateOwner { + login: "barfoo".into(), + avatar: "https://example.org/barfoo".into(), + name: "Bar Foo".into(), + email: "foobar@example.org".into(), + }) + .create()?; + + let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap(); + let mut owners = details.owners.clone(); + owners.sort(); + assert_eq!( + owners, + vec![ + ("barfoo".into(), "https://example.org/barfoo".into()), + ("foobar".into(), "https://example.org/foobarv2".into()) + ] + ); + + // Removing an existing owner + env.fake_release() + .name("foo") + .version("0.0.3") + .add_owner(CrateOwner { + login: "barfoo".into(), + avatar: "https://example.org/barfoo".into(), + name: "Bar Foo".into(), + email: "foobar@example.org".into(), + }) + .create()?; + + let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap(); + assert_eq!( + details.owners, + vec![("barfoo".into(), "https://example.org/barfoo".into())] + ); + + // Changing owner details on another of there crates applies too + env.fake_release() + .name("bar") + .version("0.0.1") + .add_owner(CrateOwner { + login: "barfoo".into(), + avatar: "https://example.org/barfoov2".into(), + name: "Bar Foo".into(), + email: "foobar@example.org".into(), + }) + .create()?; + + let details = CrateDetails::new(&db.conn(), "foo", "0.0.1").unwrap(); + assert_eq!( + details.owners, + vec![("barfoo".into(), "https://example.org/barfoov2".into())] + ); + + Ok(()) + }); + } } From 48d41b313abb11d14726cea26f7b2e00d2138d0f Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Mon, 29 Jun 2020 14:57:31 +0200 Subject: [PATCH 3/9] Move CRATESFYI_PREFIX handling to Config --- src/bin/cratesfyi.rs | 4 ++- src/config.rs | 9 +++++++ src/docbuilder/options.rs | 53 +++++---------------------------------- src/utils/daemon.rs | 40 +++++++++++------------------ 4 files changed, 33 insertions(+), 73 deletions(-) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index dff84007e..b2d83413c 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -249,6 +249,7 @@ struct Build { )] prefix: PathBuf, + /// DEPRECATED /// Sets the registry index path, where on disk the registry index will be cloned to #[structopt( name = "REGISTRY_INDEX_PATH", @@ -280,9 +281,10 @@ struct Build { impl Build { pub fn handle_args(self, ctx: Context) -> Result<(), Error> { let docbuilder = { - let mut doc_options = DocBuilderOptions::from_prefix(self.prefix); + let mut doc_options = DocBuilderOptions::new(&*ctx.config()?); if let Some(registry_index_path) = self.registry_index_path { + log::warn!("Use of deprecated cli flag --registry-index-path"); doc_options.registry_index_path = registry_index_path; } diff --git a/src/config.rs b/src/config.rs index 0054f353e..503889445 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,6 @@ use failure::{bail, format_err, Error, Fail, ResultExt}; use std::env::VarError; +use std::path::PathBuf; use std::str::FromStr; #[derive(Debug)] @@ -7,6 +8,9 @@ pub struct Config { // Build params pub(crate) build_attempts: u16, + pub(crate) prefix: PathBuf, + pub(crate) registry_index_path: PathBuf, + // Database connection params pub(crate) database_url: String, pub(crate) max_pool_size: u32, @@ -23,9 +27,14 @@ pub struct Config { impl Config { pub fn from_env() -> Result { + let prefix: PathBuf = require_env("CRATESFYI_PREFIX")?; + Ok(Self { build_attempts: env("DOCSRS_BUILD_ATTEMPTS", 5)?, + prefix: prefix.clone(), + registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?, + database_url: require_env("CRATESFYI_DATABASE_URL")?, max_pool_size: env("DOCSRS_MAX_POOL_SIZE", 90)?, min_pool_idle: env("DOCSRS_MIN_POOL_IDLE", 10)?, diff --git a/src/docbuilder/options.rs b/src/docbuilder/options.rs index 693b2c929..b0fbe3d10 100644 --- a/src/docbuilder/options.rs +++ b/src/docbuilder/options.rs @@ -1,8 +1,7 @@ -use crate::error::Result; +use crate::{config::Config, error::Result}; use std::path::PathBuf; -use std::{env, fmt}; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct DocBuilderOptions { pub keep_build_directory: bool, pub prefix: PathBuf, @@ -14,15 +13,11 @@ pub struct DocBuilderOptions { pub debug: bool, } -impl Default for DocBuilderOptions { - fn default() -> DocBuilderOptions { - let cwd = env::current_dir().unwrap(); - - let (prefix, registry_index_path) = generate_paths(cwd); - +impl DocBuilderOptions { + pub fn new(config: &Config) -> DocBuilderOptions { DocBuilderOptions { - prefix, - registry_index_path, + prefix: config.prefix.clone(), + registry_index_path: config.registry_index_path.clone(), keep_build_directory: false, skip_if_exists: false, @@ -32,36 +27,6 @@ impl Default for DocBuilderOptions { debug: false, } } -} - -impl fmt::Debug for DocBuilderOptions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "DocBuilderOptions {{ \ - registry_index_path: {:?}, \ - keep_build_directory: {:?}, skip_if_exists: {:?}, \ - skip_if_log_exists: {:?}, debug: {:?} }}", - self.registry_index_path, - self.keep_build_directory, - self.skip_if_exists, - self.skip_if_log_exists, - self.debug - ) - } -} - -impl DocBuilderOptions { - /// Creates new DocBuilderOptions from prefix - pub fn from_prefix(prefix: PathBuf) -> DocBuilderOptions { - let (prefix, registry_index_path) = generate_paths(prefix); - - DocBuilderOptions { - prefix, - registry_index_path, - ..Default::default() - } - } pub fn check_paths(&self) -> Result<()> { if !self.registry_index_path.exists() { @@ -74,9 +39,3 @@ impl DocBuilderOptions { Ok(()) } } - -fn generate_paths(prefix: PathBuf) -> (PathBuf, PathBuf) { - let registry_index_path = PathBuf::from(&prefix).join("crates.io-index"); - - (prefix, registry_index_path) -} diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 1e146c591..b1ba481a7 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -11,20 +11,23 @@ use crate::{ use chrono::{Timelike, Utc}; use failure::Error; use log::{debug, error, info}; -use std::path::PathBuf; use std::sync::Arc; +use std::thread; use std::time::Duration; -use std::{env, thread}; -fn start_registry_watcher(pool: Pool, build_queue: Arc) -> Result<(), Error> { +fn start_registry_watcher( + opts: DocBuilderOptions, + pool: Pool, + build_queue: Arc, +) -> Result<(), Error> { thread::Builder::new() .name("registry index reader".to_string()) .spawn(move || { // space this out to prevent it from clashing against the queue-builder thread on launch thread::sleep(Duration::from_secs(30)); loop { - let opts = opts(); - let mut doc_builder = DocBuilder::new(opts, pool.clone(), build_queue.clone()); + let mut doc_builder = + DocBuilder::new(opts.clone(), pool.clone(), build_queue.clone()); if doc_builder.is_locked() { debug!("Lock file exists, skipping checking new crates"); @@ -50,23 +53,14 @@ pub fn start_daemon( storage: Arc, enable_registry_watcher: bool, ) -> Result<(), Error> { - const CRATE_VARIABLES: &[&str] = &["CRATESFYI_PREFIX"]; - - // first check required environment variables - for v in CRATE_VARIABLES.iter() { - if env::var(v).is_err() { - panic!("Environment variable {} not found", v) - } - } - - let dbopts = opts(); + let dbopts = DocBuilderOptions::new(&config); // check paths once dbopts.check_paths().unwrap(); if enable_registry_watcher { // check new crates every minute - start_registry_watcher(db.clone(), build_queue.clone())?; + start_registry_watcher(dbopts.clone(), db.clone(), build_queue.clone())?; } // build new crates every minute @@ -76,8 +70,11 @@ pub fn start_daemon( thread::Builder::new() .name("build queue reader".to_string()) .spawn(move || { - let doc_builder = - DocBuilder::new(opts(), cloned_db.clone(), cloned_build_queue.clone()); + let doc_builder = DocBuilder::new( + dbopts.clone(), + cloned_db.clone(), + cloned_build_queue.clone(), + ); queue_builder(doc_builder, cloned_db, cloned_build_queue, cloned_storage).unwrap(); }) .unwrap(); @@ -131,10 +128,3 @@ where })?; Ok(()) } - -fn opts() -> DocBuilderOptions { - let prefix = PathBuf::from( - env::var("CRATESFYI_PREFIX").expect("CRATESFYI_PREFIX environment variable not found"), - ); - DocBuilderOptions::from_prefix(prefix) -} From 8fb7ad60237f3203aaae5a953cbe98526159c7d6 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 18 Jul 2020 13:16:23 +0200 Subject: [PATCH 4/9] Pass just the paths into DocBuilderOptions --- src/bin/cratesfyi.rs | 4 +++- src/config.rs | 4 ++-- src/docbuilder/options.rs | 8 ++++---- src/utils/daemon.rs | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index b2d83413c..2580adaeb 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -281,7 +281,9 @@ struct Build { impl Build { pub fn handle_args(self, ctx: Context) -> Result<(), Error> { let docbuilder = { - let mut doc_options = DocBuilderOptions::new(&*ctx.config()?); + let config = ctx.config()?; + let mut doc_options = + DocBuilderOptions::new(config.prefix.clone(), config.registry_index_path.clone()); if let Some(registry_index_path) = self.registry_index_path { log::warn!("Use of deprecated cli flag --registry-index-path"); diff --git a/src/config.rs b/src/config.rs index 503889445..cb7518fcc 100644 --- a/src/config.rs +++ b/src/config.rs @@ -8,8 +8,8 @@ pub struct Config { // Build params pub(crate) build_attempts: u16, - pub(crate) prefix: PathBuf, - pub(crate) registry_index_path: PathBuf, + pub prefix: PathBuf, + pub registry_index_path: PathBuf, // Database connection params pub(crate) database_url: String, diff --git a/src/docbuilder/options.rs b/src/docbuilder/options.rs index b0fbe3d10..a9cf399c8 100644 --- a/src/docbuilder/options.rs +++ b/src/docbuilder/options.rs @@ -1,4 +1,4 @@ -use crate::{config::Config, error::Result}; +use crate::error::Result; use std::path::PathBuf; #[derive(Clone, Debug)] @@ -14,10 +14,10 @@ pub struct DocBuilderOptions { } impl DocBuilderOptions { - pub fn new(config: &Config) -> DocBuilderOptions { + pub fn new(prefix: PathBuf, registry_index_path: PathBuf) -> DocBuilderOptions { DocBuilderOptions { - prefix: config.prefix.clone(), - registry_index_path: config.registry_index_path.clone(), + prefix, + registry_index_path, keep_build_directory: false, skip_if_exists: false, diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index b1ba481a7..50e9ba759 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -53,7 +53,7 @@ pub fn start_daemon( storage: Arc, enable_registry_watcher: bool, ) -> Result<(), Error> { - let dbopts = DocBuilderOptions::new(&config); + let dbopts = DocBuilderOptions::new(config.prefix.clone(), config.registry_index_path.clone()); // check paths once dbopts.check_paths().unwrap(); From c8af9184e460bbce5afb6135362c2e14c8148882 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Sat, 18 Jul 2020 13:18:47 +0200 Subject: [PATCH 5/9] Remove --registry-index-path and --prefix flags --- src/bin/cratesfyi.rs | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 2580adaeb..f8006423b 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -241,23 +241,6 @@ impl PrioritySubcommand { #[derive(Debug, Clone, PartialEq, Eq, StructOpt)] #[structopt(rename_all = "kebab-case")] struct Build { - #[structopt( - name = "PREFIX", - short = "P", - long = "prefix", - env = "CRATESFYI_PREFIX" - )] - prefix: PathBuf, - - /// DEPRECATED - /// Sets the registry index path, where on disk the registry index will be cloned to - #[structopt( - name = "REGISTRY_INDEX_PATH", - long = "registry-index-path", - alias = "crates-io-index-path" - )] - registry_index_path: Option, - /// Skips building documentation if documentation exists #[structopt(name = "SKIP_IF_EXISTS", short = "s", long = "skip")] skip_if_exists: bool, @@ -285,11 +268,6 @@ impl Build { let mut doc_options = DocBuilderOptions::new(config.prefix.clone(), config.registry_index_path.clone()); - if let Some(registry_index_path) = self.registry_index_path { - log::warn!("Use of deprecated cli flag --registry-index-path"); - doc_options.registry_index_path = registry_index_path; - } - doc_options.skip_if_exists = self.skip_if_exists; doc_options.skip_if_log_exists = self.skip_if_log_exists; doc_options.keep_build_directory = self.keep_build_directory; From c5be00cdf596eb80cf592a8e29d4787ce87e6482 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Mon, 27 Jul 2020 20:18:18 +0200 Subject: [PATCH 6/9] Add CLI command to update registry data for a crate --- src/bin/cratesfyi.rs | 17 +++++++++++++++++ src/db/add_package.rs | 2 +- src/db/mod.rs | 5 ++--- src/index/api.rs | 8 ++++---- src/index/mod.rs | 6 +++--- src/lib.rs | 2 +- 6 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index f8006423b..e3b938ad7 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use std::sync::Arc; use cratesfyi::db::{self, add_path_into_database, Pool}; +use cratesfyi::index::Index; use cratesfyi::utils::{remove_crate_priority, set_crate_priority}; use cratesfyi::{ BuildQueue, Config, DocBuilder, DocBuilderOptions, RustwideBuilder, Server, Storage, @@ -417,6 +418,12 @@ enum DatabaseSubcommand { /// Updates github stats for crates. UpdateGithubFields, + /// Updates info for a crate from the registry's API + UpdateCrateRegistryFields { + #[structopt(name = "CRATE")] + name: String, + }, + AddDirectory { /// Path of file or directory #[structopt(name = "DIRECTORY")] @@ -454,6 +461,16 @@ impl DatabaseSubcommand { .update_all_crates()?; } + Self::UpdateCrateRegistryFields { name } => { + let index = Index::new(&ctx.config()?.registry_index_path)?; + + db::update_crate_data_in_database( + &*ctx.conn()?, + &name, + &index.api().get_crate_data(&name), + )?; + } + Self::AddDirectory { directory, prefix } => { add_path_into_database(&*ctx.storage()?, &prefix, directory) .context("Failed to add directory into database")?; diff --git a/src/db/add_package.rs b/src/db/add_package.rs index b85b732ab..43821a65e 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -327,7 +327,7 @@ fn add_authors_into_database( Ok(()) } -pub(crate) fn update_crate_data_in_database( +pub fn update_crate_data_in_database( conn: &Connection, name: &str, registry_data: &CrateData, diff --git a/src/db/mod.rs b/src/db/mod.rs index 8d9772708..9783cea23 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,8 +1,7 @@ //! Database operations -pub(crate) use self::add_package::{ - add_build_into_database, add_package_into_database, update_crate_data_in_database, -}; +pub use self::add_package::update_crate_data_in_database; +pub(crate) use self::add_package::{add_build_into_database, add_package_into_database}; pub use self::delete::{delete_crate, delete_version}; pub use self::file::add_path_into_database; pub use self::migrate::migrate; diff --git a/src/index/api.rs b/src/index/api.rs index 445b14298..5784e745c 100644 --- a/src/index/api.rs +++ b/src/index/api.rs @@ -15,13 +15,13 @@ const APP_USER_AGENT: &str = concat!( ); #[derive(Debug)] -pub(crate) struct Api { +pub struct Api { api_base: Option, client: reqwest::blocking::Client, } #[derive(Debug)] -pub(crate) struct CrateData { +pub struct CrateData { pub(crate) owners: Vec, } @@ -33,7 +33,7 @@ pub(crate) struct ReleaseData { } #[derive(Debug)] -pub(crate) struct CrateOwner { +pub struct CrateOwner { pub(crate) avatar: String, pub(crate) email: String, pub(crate) login: String, @@ -62,7 +62,7 @@ impl Api { .ok_or_else(|| err_msg("index is missing an api base url")) } - pub(crate) fn get_crate_data(&self, name: &str) -> CrateData { + pub fn get_crate_data(&self, name: &str) -> CrateData { let owners = self.get_owners(name).unwrap_or_else(|err| { warn!("Failed to get owners for {}: {}", name, err); Vec::new() diff --git a/src/index/mod.rs b/src/index/mod.rs index 0953f3668..acb86a8fc 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -8,7 +8,7 @@ use failure::ResultExt; pub(crate) mod api; -pub(crate) struct Index { +pub struct Index { path: PathBuf, api: Api, } @@ -39,7 +39,7 @@ fn load_config(repo: &git2::Repository) -> Result { } impl Index { - pub(crate) fn new(path: impl AsRef) -> Result { + pub fn new(path: impl AsRef) -> Result { let path = path.as_ref().to_owned(); // This initializes the repository, then closes it afterwards to avoid leaking file descriptors. // See https://github.com/rust-lang/docs.rs/pull/847 @@ -56,7 +56,7 @@ impl Index { Ok(diff) } - pub(crate) fn api(&self) -> &Api { + pub fn api(&self) -> &Api { &self.api } } diff --git a/src/lib.rs b/src/lib.rs index d4276dc9f..d25e79c5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ mod config; pub mod db; mod docbuilder; mod error; -mod index; +pub mod index; pub mod storage; #[cfg(test)] mod test; From 9d81de996c00e700719fdf9ba17b4f05e4794b18 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Fri, 31 Jul 2020 17:03:16 +0200 Subject: [PATCH 7/9] Cleanup inserting owners into database --- src/db/add_package.rs | 77 +++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 43821a65e..6eab47aaf 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -349,20 +349,17 @@ fn update_owners_in_database( owners: &[CrateOwner], crate_id: i32, ) -> Result<()> { - let existing_owners: Vec = conn - .query( - " - SELECT login - FROM owners - INNER JOIN owner_rels - ON owner_rels.oid = owners.id - WHERE owner_rels.cid = $1 - ", - &[&crate_id], - )? - .into_iter() - .map(|row| row.get(0)) - .collect(); + let rows = conn.query( + " + SELECT login + FROM owners + INNER JOIN owner_rels + ON owner_rels.oid = owners.id + WHERE owner_rels.cid = $1 + ", + &[&crate_id], + )?; + let existing_owners = rows.into_iter().map(|row| -> String { row.get(0) }); for owner in owners { debug!("Updating owner data for {}: {:?}", owner.login, owner); @@ -372,15 +369,15 @@ fn update_owners_in_database( let owner_id: i32 = { conn.query( " - INSERT INTO owners (login, avatar, name, email) - VALUES ($1, $2, $3, $4) - ON CONFLICT (login) DO UPDATE - SET - avatar = $2, - name = $3, - email = $4 - RETURNING id - ", + INSERT INTO owners (login, avatar, name, email) + VALUES ($1, $2, $3, $4) + ON CONFLICT (login) DO UPDATE + SET + avatar = $2, + name = $3, + email = $4 + RETURNING id + ", &[&owner.login, &owner.avatar, &owner.name, &owner.email], )? .get(0) @@ -388,38 +385,26 @@ fn update_owners_in_database( }; // add relationship - let updated = conn.query( - "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2)", + conn.query( + "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2) ON CONFLICT DO NOTHING", &[&crate_id, &owner_id], - ); - - match updated { - Ok(_) => debug!("Added new owner relationship"), - Err(e) - if e.as_db().and_then(|e| e.constraint.as_deref()) - == Some("owner_rels_cid_oid_key") => - { - debug!("Existing owner relationship"); - } - Err(e) => return Err(e.into()), - } + )?; } - let to_remove = existing_owners - .iter() - .filter(|login| !owners.iter().any(|owner| &&owner.login == login)); + let to_remove = + existing_owners.filter(|login| !owners.iter().any(|owner| &owner.login == login)); for login in to_remove { debug!("Removing owner relationship {}", login); // remove relationship conn.query( " - DELETE FROM owner_rels - USING owners - WHERE owner_rels.cid = $1 - AND owner_rels.oid = owners.id - AND owners.login = $2 - ", + DELETE FROM owner_rels + USING owners + WHERE owner_rels.cid = $1 + AND owner_rels.oid = owners.id + AND owners.login = $2 + ", &[&crate_id, &login], )?; } From 0560662eddf4c0e4a6ee2fc2bc3b741182adc286 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Fri, 31 Jul 2020 17:05:00 +0200 Subject: [PATCH 8/9] Fix test comment --- src/web/crate_details.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 769c00a52..cc608bd10 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -681,7 +681,7 @@ mod tests { vec![("barfoo".into(), "https://example.org/barfoo".into())] ); - // Changing owner details on another of there crates applies too + // Changing owner details on another of their crates applies the change to both env.fake_release() .name("bar") .version("0.0.1") From 865c310336536e04659dd66ae3282efc6a2b1416 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Fri, 31 Jul 2020 17:13:45 +0200 Subject: [PATCH 9/9] Move index API failure handling to docbuilder --- src/bin/cratesfyi.rs | 2 +- src/docbuilder/rustwide_builder.rs | 21 +++++++++++++----- src/index/api.rs | 35 +++++++++++++++++------------- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index e3b938ad7..b078390e1 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -467,7 +467,7 @@ impl DatabaseSubcommand { db::update_crate_data_in_database( &*ctx.conn()?, &name, - &index.api().get_crate_data(&name), + &index.api().get_crate_data(&name)?, )?; } diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index e5d7bb86a..4ef38303c 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -7,6 +7,7 @@ use crate::db::{ }; use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; +use crate::index::api::ReleaseData; use crate::storage::CompressionAlgorithms; use crate::storage::Storage; use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata}; @@ -399,6 +400,15 @@ impl RustwideBuilder { } else { crate::web::metrics::NON_LIBRARY_BUILDS.inc(); } + + let release_data = match doc_builder.index.api().get_release_data(name, version) { + Ok(data) => data, + Err(err) => { + warn!("{:#?}", err); + ReleaseData::default() + } + }; + let release_id = add_package_into_database( &conn, res.cargo_metadata.root(), @@ -407,7 +417,7 @@ impl RustwideBuilder { &res.target, files_list, successful_targets, - &doc_builder.index.api().get_release_data(name, version), + &release_data, has_docs, has_examples, algs, @@ -416,11 +426,10 @@ impl RustwideBuilder { add_build_into_database(&conn, release_id, &res.result)?; // Some crates.io crate data is mutable, so we proactively update it during a release - update_crate_data_in_database( - &conn, - name, - &doc_builder.index.api().get_crate_data(name), - )?; + match doc_builder.index.api().get_crate_data(name) { + Ok(crate_data) => update_crate_data_in_database(&conn, name, &crate_data)?, + Err(err) => warn!("{:#?}", err), + } doc_builder.add_to_cache(name, version); Ok(res) diff --git a/src/index/api.rs b/src/index/api.rs index 5784e745c..ce9a3c6c0 100644 --- a/src/index/api.rs +++ b/src/index/api.rs @@ -1,6 +1,5 @@ use chrono::{DateTime, Utc}; -use failure::err_msg; -use log::warn; +use failure::{err_msg, ResultExt}; use reqwest::header::{HeaderValue, ACCEPT, USER_AGENT}; use semver::Version; use serde::Deserialize; @@ -32,6 +31,16 @@ pub(crate) struct ReleaseData { pub(crate) downloads: i32, } +impl Default for ReleaseData { + fn default() -> ReleaseData { + ReleaseData { + release_time: Utc::now(), + yanked: false, + downloads: 0, + } + } +} + #[derive(Debug)] pub struct CrateOwner { pub(crate) avatar: String, @@ -62,28 +71,24 @@ impl Api { .ok_or_else(|| err_msg("index is missing an api base url")) } - pub fn get_crate_data(&self, name: &str) -> CrateData { - let owners = self.get_owners(name).unwrap_or_else(|err| { - warn!("Failed to get owners for {}: {}", name, err); - Vec::new() - }); + pub fn get_crate_data(&self, name: &str) -> Result { + let owners = self + .get_owners(name) + .context(format!("Failed to get owners for {}", name))?; - CrateData { owners } + Ok(CrateData { owners }) } - pub(crate) fn get_release_data(&self, name: &str, version: &str) -> ReleaseData { + pub(crate) fn get_release_data(&self, name: &str, version: &str) -> Result { let (release_time, yanked, downloads) = self .get_release_time_yanked_downloads(name, version) - .unwrap_or_else(|err| { - warn!("Failed to get crate data for {}-{}: {}", name, version, err); - (Utc::now(), false, 0) - }); + .context(format!("Failed to get crate data for {}-{}", name, version))?; - ReleaseData { + Ok(ReleaseData { release_time, yanked, downloads, - } + }) } /// Get release_time, yanked and downloads from the registry's API