From e4990694d96a458c9f14659f7d8dfb5ec903e165 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Sun, 17 Jan 2021 00:05:04 +0100 Subject: [PATCH] Add gitlab repository information retrieval support: * Create RepositoryStatsUpdater type to use as interface over updaters * Add RepositoryStatsUpdater instance to global contexts so they can be used in CrateDetails * Add Pool field into RepositoryStatsUpdater * Return a default icon * Add mock tests for github and gitlab updaters --- Cargo.lock | 163 ++++++++++-- Cargo.toml | 1 + README.md | 4 +- src/bin/cratesfyi.rs | 34 ++- src/config.rs | 5 + src/context.rs | 2 + src/db/add_package.rs | 9 +- src/db/migrate.rs | 88 +++++++ src/docbuilder/rustwide_builder.rs | 37 +-- src/lib.rs | 1 + src/repositories/github.rs | 332 ++++++++++++++++++++++++ src/repositories/gitlab.rs | 320 +++++++++++++++++++++++ src/repositories/mod.rs | 20 ++ src/repositories/updater.rs | 402 +++++++++++++++++++++++++++++ src/test/fakes.rs | 21 +- src/test/mod.rs | 31 ++- src/utils/daemon.rs | 36 ++- src/utils/github_updater.rs | 400 ---------------------------- src/utils/mod.rs | 4 +- src/web/crate_details.rs | 132 +++++++--- src/web/extensions.rs | 9 +- src/web/releases.rs | 26 +- src/web/rustdoc.rs | 8 +- templates/crate/details.html | 18 +- templates/rustdoc/topbar.html | 12 +- 25 files changed, 1542 insertions(+), 573 deletions(-) create mode 100644 src/repositories/github.rs create mode 100644 src/repositories/gitlab.rs create mode 100644 src/repositories/mod.rs create mode 100644 src/repositories/updater.rs delete mode 100644 src/utils/github_updater.rs diff --git a/Cargo.lock b/Cargo.lock index 90c45ad40..529b2c2a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,7 +38,7 @@ version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" dependencies = [ - "memchr 2.3.3", + "memchr", ] [[package]] @@ -56,6 +56,23 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dabe5a181f83789739c194cbe5a897dde195078fac08568d09221fd6137a7ba8" +[[package]] +name = "arrayvec" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" + +[[package]] +name = "assert-json-diff" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4259cbe96513d2f1073027a259fc2ca917feb3026a5a8d984e3628e490255cc0" +dependencies = [ + "extend", + "serde", + "serde_json", +] + [[package]] name = "async-trait" version = "0.1.36" @@ -161,6 +178,18 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.7.3" @@ -198,7 +227,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931" dependencies = [ "lazy_static", - "memchr 2.3.3", + "memchr", "regex-automata", "serde", ] @@ -326,6 +355,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "colored" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd" +dependencies = [ + "atty", + "lazy_static", + "winapi 0.3.8", +] + [[package]] name = "comrak" version = "0.9.1" @@ -600,7 +640,7 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" dependencies = [ - "memchr 2.3.3", + "memchr", ] [[package]] @@ -625,6 +665,12 @@ dependencies = [ "syn", ] +[[package]] +name = "difference" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" + [[package]] name = "digest" version = "0.8.1" @@ -697,6 +743,7 @@ dependencies = [ "log 0.4.8", "lol_html", "mime_guess 2.0.3", + "mockito", "notify", "once_cell", "path-slash", @@ -801,6 +848,18 @@ dependencies = [ "termcolor", ] +[[package]] +name = "extend" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f47da3a72ec598d9c8937a7ebca8962a5c7a1f28444e38c2b33c771ba3f55f05" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "failure" version = "0.1.8" @@ -945,6 +1004,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + [[package]] name = "futf" version = "0.1.4" @@ -1039,7 +1104,7 @@ dependencies = [ "futures-macro", "futures-sink", "futures-task", - "memchr 2.3.3", + "memchr", "pin-project-lite", "pin-utils", "proc-macro-hack", @@ -1376,7 +1441,7 @@ dependencies = [ "globset", "lazy_static", "log 0.4.8", - "memchr 2.3.3", + "memchr", "regex", "same-file", "thread_local", @@ -1524,6 +1589,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f" +[[package]] +name = "lexical-core" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f866863575d0e1d654fbeeabdc927292fdf862873dc3c96c6f753357e13374" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if 1.0.0", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.91" @@ -1636,7 +1714,7 @@ dependencies = [ "encoding_rs", "lazy_static", "lazycell", - "memchr 2.3.3", + "memchr", "safemem", "selectors 0.21.0", "thiserror", @@ -1689,15 +1767,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" -[[package]] -name = "memchr" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a" -dependencies = [ - "libc", -] - [[package]] name = "memchr" version = "2.3.3" @@ -1824,6 +1893,24 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "mockito" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "102f0986ade96028c3227fc14fcbbbee0358ca33b3fedc9a400a97a6f5ad4a6e" +dependencies = [ + "assert-json-diff", + "colored", + "difference", + "httparse", + "lazy_static", + "log 0.4.8", + "rand 0.8.3", + "regex", + "serde_json", + "serde_urlencoded", +] + [[package]] name = "modifier" version = "0.1.0" @@ -1886,11 +1973,15 @@ checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" [[package]] name = "nom" -version = "3.2.1" +version = "6.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05aec50c70fd288702bcd93284a8444607f3292dbdf2a30de5ea5dcdbe72287b" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" dependencies = [ - "memchr 1.0.2", + "bitvec", + "funty", + "lexical-core", + "memchr", + "version_check 0.9.2", ] [[package]] @@ -2333,7 +2424,7 @@ dependencies = [ "fallible-iterator", "hmac", "md5", - "memchr 2.3.3", + "memchr", "rand 0.8.3", "sha2", "stringprep", @@ -2484,6 +2575,12 @@ dependencies = [ "r2d2", ] +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + [[package]] name = "rand" version = "0.6.5" @@ -2746,7 +2843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692" dependencies = [ "aho-corasick", - "memchr 2.3.3", + "memchr", "regex-syntax", "thread_local", ] @@ -3327,6 +3424,12 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e4b8c631c998468961a9ea159f064c5c8499b95b5e4a34b77849d45949d540" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stdweb" version = "0.4.20" @@ -3504,9 +3607,9 @@ dependencies = [ [[package]] name = "systemstat" -version = "0.1.5" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2078da8d09c6202bffd5e075946e65bfad5ce2cfa161edb15c5f014a8440adee" +checksum = "c31c241679f72241744c20d064a4db7feeb2caa214a8d6e2d4243b8c674a29a5" dependencies = [ "bytesize", "chrono", @@ -3517,6 +3620,12 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tar" version = "0.4.26" @@ -3696,7 +3805,7 @@ dependencies = [ "autocfg 1.0.0", "bytes 1.0.1", "libc", - "memchr 2.3.3", + "memchr", "mio 0.7.11", "num_cpus", "once_cell", @@ -3834,7 +3943,7 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b40075910de3a912adbd80b5d8bad6ad10a23eeb1f5bf9d4006839e899ba5bc" dependencies = [ - "memchr 2.3.3", + "memchr", "unchecked-index", ] @@ -4241,6 +4350,12 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" + [[package]] name = "xattr" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 7bbd35d57..17bfa8749 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,7 @@ procfs = "0.7" criterion = "0.3" kuchiki = "0.8" rand = "0.8" +mockito = "0.29" [build-dependencies] time = "0.1" diff --git a/README.md b/README.md index 28506f004..3441872e1 100644 --- a/README.md +++ b/README.md @@ -190,8 +190,10 @@ docker-compose run -- database add-directory [PREFIX] # Updates github stats for crates. # You need to set CRATESFYI_GITHUB_USERNAME, CRATESFYI_GITHUB_ACCESSTOKEN # environment variables in order to run this command. +# Set DOCSRS_GITLAB_ACCESSTOKEN to raise the rate limit, +# or leave it blank to fetch repositories at a slower rate. # You can set this environment variables in ~/.cratesfyi.env file. -docker-compose run -- database update-github-fields +cargo run -- database update-repository-fields ``` If you want to explore or edit database manually, you can connect to the database diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index f3fedb8a9..800e37943 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use std::sync::Arc; use docs_rs::db::{self, add_path_into_database, Pool, PoolClient}; +use docs_rs::repositories::RepositoryStatsUpdater; use docs_rs::utils::{remove_crate_priority, set_crate_priority}; use docs_rs::{ BuildQueue, Config, Context, DocBuilder, Index, Metrics, PackageKind, RustwideBuilder, Server, @@ -371,11 +372,11 @@ enum DatabaseSubcommand { version: Option, }, - /// Updates github stats for crates. - UpdateGithubFields, + /// Updates Github/Gitlab stats for crates. + UpdateRepositoryFields, - /// Backfill GitHub stats for crates. - BackfillGithubStats, + /// Backfill GitHub/Gitlab stats for crates. + BackfillRepositoryStats, /// Updates info for a crate from the registry's API UpdateCrateRegistryFields { @@ -421,16 +422,12 @@ impl DatabaseSubcommand { .context("Failed to run database migrations")?; } - Self::UpdateGithubFields => { - docs_rs::utils::GithubUpdater::new(ctx.config()?, ctx.pool()?)? - .ok_or_else(|| failure::format_err!("missing GitHub token"))? - .update_all_crates()?; + Self::UpdateRepositoryFields => { + ctx.repository_stats_updater()?.update_all_crates()?; } - Self::BackfillGithubStats => { - docs_rs::utils::GithubUpdater::new(ctx.config()?, ctx.pool()?)? - .ok_or_else(|| failure::format_err!("missing GitHub token"))? - .backfill_repositories()?; + Self::BackfillRepositoryStats => { + ctx.repository_stats_updater()?.backfill_repositories()?; } Self::UpdateCrateRegistryFields { name } => { @@ -535,6 +532,7 @@ struct BinContext { pool: OnceCell, metrics: OnceCell>, index: OnceCell>, + repository_stats_updater: OnceCell>, } impl BinContext { @@ -546,6 +544,7 @@ impl BinContext { pool: OnceCell::new(), metrics: OnceCell::new(), index: OnceCell::new(), + repository_stats_updater: OnceCell::new(), } } @@ -617,4 +616,15 @@ impl Context for BinContext { })? .clone()) } + + fn repository_stats_updater(&self) -> Result, Error> { + Ok(self + .repository_stats_updater + .get_or_try_init::<_, Error>(|| { + let config = self.config()?; + let pool = self.pool()?; + Ok(Arc::new(RepositoryStatsUpdater::new(&config, pool))) + })? + .clone()) + } } diff --git a/src/config.rs b/src/config.rs index 1ef97417a..a58550f95 100644 --- a/src/config.rs +++ b/src/config.rs @@ -30,6 +30,9 @@ pub struct Config { pub(crate) github_accesstoken: Option, pub(crate) github_updater_min_rate_limit: u32, + // Gitlab authentication + pub(crate) gitlab_accesstoken: Option, + // Max size of the files served by the docs.rs frontend pub(crate) max_file_size: usize, pub(crate) max_file_size_html: usize, @@ -90,6 +93,8 @@ impl Config { github_accesstoken: maybe_env("CRATESFYI_GITHUB_ACCESSTOKEN")?, github_updater_min_rate_limit: env("DOCSRS_GITHUB_UPDATER_MIN_RATE_LIMIT", 2500)?, + gitlab_accesstoken: maybe_env("DOCSRS_GITLAB_ACCESSTOKEN")?, + max_file_size: env("DOCSRS_MAX_FILE_SIZE", 50 * 1024 * 1024)?, max_file_size_html: env("DOCSRS_MAX_FILE_SIZE_HTML", 50 * 1024 * 1024)?, // LOL HTML only uses as much memory as the size of the start tag! diff --git a/src/context.rs b/src/context.rs index f0bfa1aff..104182bf2 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,4 +1,5 @@ use crate::db::Pool; +use crate::repositories::RepositoryStatsUpdater; use crate::{BuildQueue, Config, Index, Metrics, Storage}; use failure::Error; use std::sync::Arc; @@ -10,4 +11,5 @@ pub trait Context { fn pool(&self) -> Result; fn metrics(&self) -> Result, Error>; fn index(&self) -> Result, Error>; + fn repository_stats_updater(&self) -> Result, Error>; } diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 6db81c1f7..0a25da145 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -36,7 +36,7 @@ pub(crate) fn add_package_into_database( has_docs: bool, has_examples: bool, compression_algorithms: std::collections::HashSet, - github_repo: Option, + repository_id: Option, ) -> Result { debug!("Adding package into database"); let crate_id = initialize_package_in_database(conn, metadata_pkg)?; @@ -54,7 +54,8 @@ pub(crate) fn add_package_into_database( homepage_url, description, description_long, readme, keywords, have_examples, downloads, files, doc_targets, is_library, doc_rustc_version, - documentation_url, default_target, features, github_repo + documentation_url, default_target, features, + repository_id ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, @@ -85,7 +86,7 @@ pub(crate) fn add_package_into_database( documentation_url = $23, default_target = $24, features = $25, - github_repo = $26 + repository_id = $26 RETURNING id", &[ &crate_id, @@ -113,7 +114,7 @@ pub(crate) fn add_package_into_database( &metadata_pkg.documentation, &default_target, &features, - &github_repo, + &repository_id, ], )?; diff --git a/src/db/migrate.rs b/src/db/migrate.rs index 068ad51c5..42e4c572e 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -644,6 +644,94 @@ pub fn migrate(version: Option, conn: &mut Client) -> CratesfyiResult<( ALTER TABLE releases ADD COLUMN authors JSON; ", ), + migration!( + context, + 28, + // description + "Add gitlab handling: creation of the new repositories table which replaces and extend \ + github_repos", + // upgrade query + " + CREATE TABLE repositories ( + id SERIAL PRIMARY KEY, + host VARCHAR NOT NULL, + host_id VARCHAR NOT NULL, + name VARCHAR NOT NULL, + description VARCHAR, + last_commit TIMESTAMPTZ, + stars INT NOT NULL, + forks INT NOT NULL, + issues INT NOT NULL, + updated_at TIMESTAMPTZ NOT NULL, + UNIQUE (host, host_id) + ); + + ALTER TABLE releases ADD COLUMN repository_id INTEGER + REFERENCES repositories(id) ON DELETE SET NULL; + + INSERT INTO repositories(host, host_id, name, description, last_commit, stars, forks, issues, updated_at) + SELECT 'github.com', id, name, description, last_commit, stars, forks, issues, updated_at + FROM github_repos; + + UPDATE releases + SET repository_id = repositories.id + FROM repositories + WHERE releases.github_repo IS NOT NULL AND repositories.host_id = releases.github_repo; + + DROP INDEX releases_github_repo_idx; + DROP INDEX github_repos_stars_idx; + + CREATE INDEX releases_repo_idx ON releases(repository_id); + CREATE INDEX repos_stars_idx ON repositories(stars DESC); + + ALTER TABLE releases + DROP COLUMN github_repo; + + DROP TABLE github_repos; + ", + // downgrade query + " + CREATE TABLE github_repos ( + id VARCHAR PRIMARY KEY NOT NULL, + name VARCHAR NOT NULL, + description VARCHAR, + last_commit TIMESTAMPTZ, + stars INT NOT NULL, + forks INT NOT NULL, + issues INT NOT NULL, + updated_at TIMESTAMPTZ NOT NULL + ); + + ALTER TABLE releases ADD COLUMN github_repo VARCHAR + REFERENCES github_repos(id) ON DELETE SET NULL; + + INSERT INTO github_repos(id, name, description, last_commit, stars, forks, issues, updated_at) + SELECT host_id, name, description, last_commit, stars, forks, issues, updated_at + FROM repositories WHERE repositories.host = 'github.com'; + + UPDATE releases + SET github_repo = ( + SELECT host_id + FROM repositories + WHERE + repositories.host = 'github.com' AND + repositories.id = releases.repository_id + ) + WHERE + repository_id IS NOT NULL; + + DROP INDEX releases_repo_idx; + DROP INDEX repos_stars_idx; + + CREATE INDEX releases_github_repo_idx ON releases (github_repo); + CREATE INDEX github_repos_stars_idx ON github_repos(stars DESC); + + ALTER TABLE releases + DROP COLUMN repository_id; + + DROP TABLE repositories; + " + ), ]; for migration in migrations { diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index e155b2694..3f8a5d1a6 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -6,8 +6,9 @@ use crate::db::{ use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; use crate::index::api::ReleaseData; +use crate::repositories::RepositoryStatsUpdater; use crate::storage::CompressionAlgorithms; -use crate::utils::{copy_dir_all, parse_rustc_version, CargoMetadata, GithubUpdater}; +use crate::utils::{copy_dir_all, parse_rustc_version, CargoMetadata}; use crate::{db::blacklist::is_blacklisted, utils::MetadataPackage}; use crate::{Config, Context, Index, Metrics, Storage}; use docsrs_metadata::{Metadata, DEFAULT_TARGETS, HOST_TARGET}; @@ -42,6 +43,7 @@ pub struct RustwideBuilder { metrics: Arc, index: Arc, rustc_version: String, + repository_stats_updater: Arc, skip_build_if_exists: bool, } @@ -72,6 +74,7 @@ impl RustwideBuilder { metrics: context.metrics()?, index: context.index()?, rustc_version: String::new(), + repository_stats_updater: context.repository_stats_updater()?, skip_build_if_exists: false, }) } @@ -364,7 +367,7 @@ impl RustwideBuilder { }; let cargo_metadata = res.cargo_metadata.root(); - let github_repo = self.get_github_repo(&mut conn, cargo_metadata)?; + let repository = self.get_repo(cargo_metadata)?; let release_id = add_package_into_database( &mut conn, @@ -378,7 +381,7 @@ impl RustwideBuilder { has_docs, has_examples, algs, - github_repo, + repository, )?; if let Some(doc_coverage) = res.doc_coverage { @@ -651,32 +654,8 @@ impl RustwideBuilder { } } - fn get_github_repo( - &self, - conn: &mut Client, - metadata: &MetadataPackage, - ) -> Result> { - let updater = match GithubUpdater::new(self.config.clone(), self.db.clone())? { - Some(updater) => updater, - None => { - warn!("did not collect GitHub stats as no token was provided"); - return Ok(None); - } - }; - let repo = match &metadata.repository { - Some(url) => url, - None => { - debug!("did not collect GitHub stats as no repository URL was present"); - return Ok(None); - } - }; - match updater.load_repository(conn, repo) { - Ok(repo) => Ok(repo), - Err(err) => { - warn!("failed to collect GitHub stats: {}", err); - Ok(None) - } - } + fn get_repo(&self, metadata: &MetadataPackage) -> Result> { + self.repository_stats_updater.load_repository(metadata) } } diff --git a/src/lib.rs b/src/lib.rs index 41adf3e3f..827e45a12 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ mod docbuilder; mod error; pub mod index; mod metrics; +pub mod repositories; pub mod storage; #[cfg(test)] mod test; diff --git a/src/repositories/github.rs b/src/repositories/github.rs new file mode 100644 index 000000000..1ecbf0e2c --- /dev/null +++ b/src/repositories/github.rs @@ -0,0 +1,332 @@ +use crate::error::Result; +use crate::Config; +use chrono::{DateTime, Utc}; +use log::{trace, warn}; +use reqwest::{ + blocking::Client as HttpClient, + header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT}, +}; +use serde::Deserialize; + +use crate::repositories::{ + FetchRepositoriesResult, RateLimitReached, Repository, RepositoryForge, RepositoryName, + APP_USER_AGENT, +}; + +const GRAPHQL_UPDATE: &str = "query($ids: [ID!]!) { + nodes(ids: $ids) { + ... on Repository { + id + nameWithOwner + pushedAt + description + stargazerCount + forkCount + issues(states: [OPEN]) { totalCount } + } + } + rateLimit { + remaining + } +}"; + +const GRAPHQL_SINGLE: &str = "query($owner: String!, $repo: String!) { + repository(owner: $owner, name: $repo) { + id + nameWithOwner + pushedAt + description + stargazerCount + forkCount + issues(states: [OPEN]) { totalCount } + } +}"; + +pub struct GitHub { + client: HttpClient, + github_updater_min_rate_limit: u32, +} + +impl GitHub { + /// Returns `Err` if the access token has invalid syntax (but *not* if it isn't authorized). + /// Returns `Ok(None)` if there is no access token. + pub fn new(config: &Config) -> Result> { + let mut headers = HeaderMap::new(); + headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)); + headers.insert(ACCEPT, HeaderValue::from_static("application/json")); + + if let Some(ref token) = config.github_accesstoken { + headers.insert( + AUTHORIZATION, + HeaderValue::from_str(&format!("token {}", token))?, + ); + } else { + warn!("did not collect `github.com` stats as no token was provided"); + return Ok(None); + } + + let client = HttpClient::builder().default_headers(headers).build()?; + + Ok(Some(GitHub { + client, + github_updater_min_rate_limit: config.github_updater_min_rate_limit, + })) + } +} + +impl RepositoryForge for GitHub { + fn host(&self) -> &'static str { + "github.com" + } + + fn icon(&self) -> &'static str { + "github" + } + + /// How many repositories to update in a single chunk. Values over 100 are probably going to be + /// rejected by the GraphQL API. + fn chunk_size(&self) -> usize { + 100 + } + + fn fetch_repository(&self, name: &RepositoryName) -> Result> { + // Fetch the latest information from the GitHub API. + let response: GraphResponse = self.graphql( + GRAPHQL_SINGLE, + serde_json::json!({ + "owner": name.owner, + "repo": name.repo, + }), + )?; + if let Some(repo) = response.data.repository { + Ok(Some(Repository { + id: repo.id, + name_with_owner: repo.name_with_owner, + description: repo.description, + last_activity_at: repo.pushed_at, + stars: repo.stargazer_count, + forks: repo.fork_count, + issues: repo.issues.total_count, + })) + } else { + Ok(None) + } + } + + fn fetch_repositories(&self, node_ids: &[String]) -> Result { + let response: GraphResponse>> = self.graphql( + GRAPHQL_UPDATE, + serde_json::json!({ + "ids": node_ids, + }), + )?; + + // The error is returned *before* we reach the rate limit, to ensure we always have an + // amount of API calls we can make at any time. + trace!( + "GitHub GraphQL rate limit remaining: {}", + response.data.rate_limit.remaining + ); + if response.data.rate_limit.remaining < self.github_updater_min_rate_limit { + return Err(RateLimitReached.into()); + } + + let mut ret = FetchRepositoriesResult::default(); + + for error in &response.errors { + use GraphErrorPath::*; + match (error.error_type.as_str(), error.path.as_slice()) { + ("NOT_FOUND", [Segment(nodes), Index(idx)]) if nodes == "nodes" => { + ret.missing.push(node_ids[*idx as usize].clone()); + } + _ => failure::bail!("error updating repositories: {}", error.message), + } + } + // When a node is missing (for example if the repository was deleted or made private) the + // GraphQL API will return *both* a `null` instead of the data in the nodes list and a + // `NOT_FOUND` error in the errors list. + for node in response.data.nodes.into_iter().flatten() { + let repo = Repository { + id: node.id, + name_with_owner: node.name_with_owner, + description: node.description, + last_activity_at: node.pushed_at, + stars: node.stargazer_count, + forks: node.fork_count, + issues: node.issues.total_count, + }; + ret.present.insert(repo.id.clone(), repo); + } + + Ok(ret) + } +} + +impl GitHub { + fn graphql( + &self, + query: &str, + variables: impl serde::Serialize, + ) -> Result> { + #[cfg(not(test))] + let host = "https://api.github.com/graphql"; + #[cfg(test)] + let host = format!("{}/graphql", mockito::server_url()); + #[cfg(test)] + let host = &host; + + Ok(self + .client + .post(host) + .json(&serde_json::json!({ + "query": query, + "variables": variables, + })) + .send()? + .error_for_status()? + .json()?) + } +} + +#[derive(Debug, Deserialize)] +struct GraphResponse { + data: T, + #[serde(default)] + errors: Vec, +} + +#[derive(Debug, Deserialize)] +struct GraphError { + #[serde(rename = "type")] + error_type: String, + path: Vec, + message: String, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum GraphErrorPath { + Segment(String), + Index(i64), +} + +#[derive(Debug, Deserialize)] +struct GraphRateLimit { + remaining: u32, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GraphNodes { + nodes: Vec, + rate_limit: GraphRateLimit, +} + +#[derive(Debug, Deserialize)] +struct GraphRepositoryNode { + repository: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GraphRepository { + id: String, + name_with_owner: String, + pushed_at: Option>, + description: Option, + stargazer_count: i64, + fork_count: i64, + issues: GraphIssues, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GraphIssues { + total_count: i64, +} + +#[cfg(test)] +mod tests { + use super::GitHub; + use crate::repositories::updater::{repository_name, RepositoryForge}; + use mockito::mock; + + #[test] + fn test_rate_limit() { + crate::test::wrapper(|env| { + let mut config = env.base_config(); + config.github_accesstoken = Some("qsjdnfqdq".to_owned()); + let updater = GitHub::new(&config).expect("GitHub::new failed").unwrap(); + + let _m1 = mock("POST", "/graphql") + .with_header("content-type", "application/json") + .with_body(r#"{"data": {"nodes": [], "rateLimit": {"remaining": 0}}}"#) + .create(); + + match updater.fetch_repositories(&[String::new()]) { + Err(e) if format!("{:?}", e).contains("RateLimitReached") => {} + x => panic!("Expected Err(RateLimitReached), found: {:?}", x), + } + Ok(()) + }); + } + + #[test] + fn not_found() { + crate::test::wrapper(|env| { + let mut config = env.base_config(); + config.github_accesstoken = Some("qsjdnfqdq".to_owned()); + let updater = GitHub::new(&config).expect("GitHub::new failed").unwrap(); + + let _m1 = mock("POST", "/graphql") + .with_header("content-type", "application/json") + .with_body( + r#"{"data": {"nodes": [], "rateLimit": {"remaining": 100000}}, "errors": + [{"type": "NOT_FOUND", "path": ["nodes", 0], "message": "none"}]}"#, + ) + .create(); + + match updater.fetch_repositories(&[String::new()]) { + Ok(res) => { + assert_eq!(res.missing, vec![String::new()]); + assert_eq!(res.present.len(), 0); + } + x => panic!("Failed: {:?}", x), + } + Ok(()) + }); + } + + #[test] + fn get_repository_info() { + crate::test::wrapper(|env| { + let mut config = env.base_config(); + config.github_accesstoken = Some("qsjdnfqdq".to_owned()); + let updater = GitHub::new(&config).expect("GitHub::new failed").unwrap(); + + let _m1 = mock("POST", "/graphql") + .with_header("content-type", "application/json") + .with_body( + r#"{"data": {"repository": {"id": "hello", "nameWithOwner": "foo/bar", + "description": "this is", "stargazerCount": 10, "forkCount": 11, + "issues": {"totalCount": 12}}}}"#, + ) + .create(); + + let repo = updater + .fetch_repository( + &repository_name("https://gitlab.com/foo/bar").expect("repository_name failed"), + ) + .expect("fetch_repository failed") + .unwrap(); + + assert_eq!(repo.id, "hello"); + assert_eq!(repo.name_with_owner, "foo/bar"); + assert_eq!(repo.description, Some("this is".to_owned())); + assert_eq!(repo.stars, 10); + assert_eq!(repo.forks, 11); + assert_eq!(repo.issues, 12); + Ok(()) + }); + } +} diff --git a/src/repositories/gitlab.rs b/src/repositories/gitlab.rs new file mode 100644 index 000000000..46c912bc9 --- /dev/null +++ b/src/repositories/gitlab.rs @@ -0,0 +1,320 @@ +use crate::error::Result; +use chrono::{DateTime, Utc}; +use log::warn; +use reqwest::{ + blocking::Client as HttpClient, + header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT}, +}; +use serde::Deserialize; +use std::collections::HashSet; +use std::str::FromStr; + +use crate::repositories::{ + FetchRepositoriesResult, RateLimitReached, Repository, RepositoryForge, RepositoryName, + APP_USER_AGENT, +}; + +const GRAPHQL_UPDATE: &str = "query($ids: [ID!]!) { + projects(ids: $ids) { + nodes { + id + fullPath + lastActivityAt + description + starCount + forksCount + openIssuesCount + } + } +}"; + +const GRAPHQL_SINGLE: &str = "query($fullPath: ID!) { + project(fullPath: $fullPath) { + id + fullPath + lastActivityAt + description + starCount + forksCount + openIssuesCount + } +}"; + +pub struct GitLab { + client: HttpClient, + host: &'static str, +} + +impl GitLab { + pub fn new(host: &'static str, access_token: &Option) -> Result { + let mut headers = HeaderMap::new(); + headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)); + headers.insert(ACCEPT, HeaderValue::from_static("application/json")); + + if let Some(token) = access_token { + headers.insert( + AUTHORIZATION, + HeaderValue::from_str(&format!("Bearer {}", token))?, + ); + } else { + warn!( + "will try to retrieve `{}` stats without token since none was provided", + host + ); + } + + let client = HttpClient::builder().default_headers(headers).build()?; + Ok(GitLab { client, host }) + } +} + +impl RepositoryForge for GitLab { + fn host(&self) -> &'static str { + self.host + } + + fn icon(&self) -> &'static str { + "gitlab" + } + + fn chunk_size(&self) -> usize { + 100 + } + + fn fetch_repository(&self, name: &RepositoryName) -> Result> { + let project_path = format!("{}/{}", name.owner, name.repo); + // Fetch the latest information from the Gitlab API. + let response: (GraphResponse, Option) = self.graphql( + GRAPHQL_SINGLE, + serde_json::json!({ + "fullPath": &project_path, + }), + )?; + let (response, rate_limit) = response; + if let Some(repo) = response.data.and_then(|d| d.project) { + Ok(Some(Repository { + id: repo.id, + name_with_owner: repo.full_path, + description: repo.description, + last_activity_at: repo.last_activity_at, + stars: repo.star_count, + forks: repo.forks_count, + issues: repo.open_issues_count.unwrap_or(0), + })) + } else if rate_limit.map(|x| x < 1).unwrap_or(false) { + Err(RateLimitReached.into()) + } else { + Ok(None) + } + } + + fn fetch_repositories(&self, ids: &[String]) -> Result { + let response: ( + GraphResponse>>, + Option, + ) = self.graphql( + GRAPHQL_UPDATE, + serde_json::json!({ + "ids": ids, + }), + )?; + let (response, rate_limit) = response; + let mut ret = FetchRepositoriesResult::default(); + // When gitlab doesn't find an ID, it simply doesn't list it. So we need to actually check + // which nodes remain at the end to delete their DB entry. + let mut node_ids: HashSet<&String> = ids.iter().collect(); + + if let Some(data) = response.data { + if !response.errors.is_empty() { + failure::bail!("error updating repositories: {:?}", response.errors); + } + for node in data.projects.nodes.into_iter().flatten() { + let repo = Repository { + id: node.id, + name_with_owner: node.full_path, + description: node.description, + last_activity_at: node.last_activity_at, + stars: node.star_count, + forks: node.forks_count, + issues: node.open_issues_count.unwrap_or(0), + }; + let id = repo.id.clone(); + node_ids.remove(&id); + ret.present.insert(id, repo); + } + + if ret.present.is_empty() && rate_limit.map(|x| x < 1).unwrap_or(false) { + return Err(RateLimitReached.into()); + } + + // Those nodes were not returned by gitlab, meaning they don't exist (anymore?). + ret.missing = node_ids.into_iter().map(|s| s.to_owned()).collect(); + + Ok(ret) + } else if rate_limit.map(|x| x < 1).unwrap_or(false) { + Err(RateLimitReached.into()) + } else { + failure::bail!("no data") + } + } +} + +impl GitLab { + fn graphql( + &self, + query: &str, + variables: impl serde::Serialize, + ) -> Result<(GraphResponse, Option)> { + #[cfg(not(test))] + let host = format!("https://{}/api/graphql", self.host); + #[cfg(test)] + let host = format!("{}/api/graphql", mockito::server_url()); + + let res = self + .client + .post(&host) + .json(&serde_json::json!({ + "query": query, + "variables": variables, + })) + .send()? + .error_for_status()?; + // There are a few other header values that might interesting so keeping them here: + // * RateLimit-Observed: '1' + // * RateLimit-Remaining: '1999' + // * RateLimit-ResetTime: 'Wed, 10 Feb 2021 21:31:42 GMT' + // * RateLimit-Limit: '2000' + let rate_limit = res + .headers() + .get("RateLimit-Remaining") + .and_then(|x| usize::from_str(x.to_str().ok()?).ok()); + Ok((res.json()?, rate_limit)) + } +} + +#[derive(Debug, Deserialize)] +struct GraphProjects { + projects: GraphNodes, +} + +#[derive(Debug, Deserialize)] +struct GraphResponse { + data: Option, + #[serde(default)] + errors: Vec, +} + +#[derive(Debug, Deserialize)] +struct GraphError { + message: String, + locations: Vec, +} + +#[derive(Debug, Deserialize)] +struct GraphErrorLocation { + line: u32, + column: u32, +} + +#[derive(Debug, Deserialize)] +struct GraphRateLimit { + remaining: u32, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GraphNodes { + nodes: Vec, +} + +#[derive(Debug, Deserialize)] +struct GraphProjectNode { + project: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GraphProject { + id: String, + full_path: String, + last_activity_at: Option>, + description: Option, + star_count: i64, + forks_count: i64, + open_issues_count: Option, +} + +#[cfg(test)] +mod tests { + use super::GitLab; + use crate::repositories::updater::{repository_name, RepositoryForge}; + use mockito::mock; + + #[test] + fn test_rate_limit() { + let updater = GitLab::new("gitlab.com", &None).expect("GitLab::new failed"); + + let _m1 = mock("POST", "/api/graphql") + .with_header("content-type", "application/json") + .with_header("RateLimit-Remaining", "0") + .with_body("{}") + .create(); + + match updater.fetch_repository( + &repository_name("https://gitlab.com/foo/bar").expect("repository_name failed"), + ) { + Err(e) if format!("{:?}", e).contains("RateLimitReached") => {} + x => panic!("Expected Err(RateLimitReached), found: {:?}", x), + } + match updater.fetch_repositories(&[String::new()]) { + Err(e) if format!("{:?}", e).contains("RateLimitReached") => {} + x => panic!("Expected Err(RateLimitReached), found: {:?}", x), + } + } + + #[test] + fn not_found() { + let updater = GitLab::new("gitlab.com", &None).expect("GitLab::new failed"); + + let _m1 = mock("POST", "/api/graphql") + .with_header("content-type", "application/json") + .with_body(r#"{"data": {"projects": {"nodes": []}}}"#) + .create(); + + match updater.fetch_repositories(&[String::new()]) { + Ok(res) => { + assert_eq!(res.missing, vec![String::new()]); + assert_eq!(res.present.len(), 0); + } + x => panic!("Failed: {:?}", x), + } + } + + #[test] + fn get_repository_info() { + let updater = GitLab::new("gitlab.com", &None).expect("GitLab::new failed"); + + let _m1 = mock("POST", "/api/graphql") + .with_header("content-type", "application/json") + .with_body( + r#"{"data": {"project": {"id": "hello", "fullPath": "foo/bar", + "description": "this is", "starCount": 10, "forksCount": 11, + "openIssuesCount": 12}}}"#, + ) + .create(); + + let repo = updater + .fetch_repository( + &repository_name("https://gitlab.com/foo/bar").expect("repository_name failed"), + ) + .expect("fetch_repository failed") + .unwrap(); + + assert_eq!(repo.id, "hello"); + assert_eq!(repo.name_with_owner, "foo/bar"); + assert_eq!(repo.description, Some("this is".to_owned())); + assert_eq!(repo.stars, 10); + assert_eq!(repo.forks, 11); + assert_eq!(repo.issues, 12); + } +} diff --git a/src/repositories/mod.rs b/src/repositories/mod.rs new file mode 100644 index 000000000..9a298eb9b --- /dev/null +++ b/src/repositories/mod.rs @@ -0,0 +1,20 @@ +pub use self::github::GitHub; +pub use self::gitlab::GitLab; +pub(crate) use self::updater::RepositoryName; +pub use self::updater::{ + FetchRepositoriesResult, Repository, RepositoryForge, RepositoryStatsUpdater, +}; + +pub const APP_USER_AGENT: &str = concat!( + env!("CARGO_PKG_NAME"), + " ", + include_str!(concat!(env!("OUT_DIR"), "/git_version")) +); + +#[derive(Debug, failure::Fail)] +#[fail(display = "rate limit reached")] +struct RateLimitReached; + +mod github; +mod gitlab; +mod updater; diff --git a/src/repositories/updater.rs b/src/repositories/updater.rs new file mode 100644 index 000000000..e538aafbc --- /dev/null +++ b/src/repositories/updater.rs @@ -0,0 +1,402 @@ +use crate::error::Result; +use crate::repositories::{GitHub, GitLab}; +use crate::utils::MetadataPackage; +use crate::{db::Pool, Config}; +use chrono::{DateTime, Utc}; +use log::{debug, info, trace, warn}; +use once_cell::sync::Lazy; +use postgres::Client; +use regex::Regex; +use std::collections::{HashMap, HashSet}; +use std::fmt; + +pub trait RepositoryForge { + /// Result used both as the `host` column in the DB and to match repository URLs during + /// backfill. + fn host(&self) -> &'static str; + + /// FontAwesome icon used in the front-end. + fn icon(&self) -> &'static str; + + /// How many items we can query in one graphql request. + fn chunk_size(&self) -> usize; + + /// Used by both backfill_repositories and load_repository. When the repository is missing + /// `None` is returned. + fn fetch_repository(&self, name: &RepositoryName) -> Result>; + + /// Used by update_all_crates. + /// + /// The returned struct will contain all the information needed for `RepositoriesUpdater` to + /// update repositories that are still present and delete the missing ones. + fn fetch_repositories(&self, ids: &[String]) -> Result; +} + +#[derive(Debug)] +pub struct Repository { + pub id: String, + pub name_with_owner: String, + pub description: Option, + pub last_activity_at: Option>, + pub stars: i64, + pub forks: i64, + pub issues: i64, +} + +#[derive(Default, Debug)] +pub struct FetchRepositoriesResult { + pub present: HashMap, + pub missing: Vec, +} + +pub struct RepositoryStatsUpdater { + updaters: Vec>, + pool: Pool, +} + +impl fmt::Debug for RepositoryStatsUpdater { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "RepositoryStatsUpdater {{ updaters: ")?; + f.debug_list() + .entries(self.updaters.iter().map(|u| u.host())) + .finish()?; + write!(f, " }}") + } +} + +impl RepositoryStatsUpdater { + pub fn new(config: &Config, pool: Pool) -> Self { + let mut updaters: Vec> = Vec::new(); + if let Ok(Some(updater)) = GitHub::new(&config) { + updaters.push(Box::new(updater)); + } + if let Ok(updater) = GitLab::new("gitlab.com", &config.gitlab_accesstoken) { + updaters.push(Box::new(updater)); + } + if let Ok(updater) = GitLab::new("gitlab.freedesktop.org", &None) { + updaters.push(Box::new(updater)); + } + Self { updaters, pool } + } + + pub(crate) fn load_repository(&self, metadata: &MetadataPackage) -> Result> { + let url = match &metadata.repository { + Some(url) => url, + None => { + debug!("did not collect stats as no repository URL was present"); + return Ok(None); + } + }; + let mut conn = self.pool.get()?; + self.load_repository_inner(&mut conn, url) + } + + fn load_repository_inner(&self, conn: &mut Client, url: &str) -> Result> { + let name = match repository_name(url) { + Some(name) => name, + None => return Ok(None), + }; + + // Avoid querying the APIs for repositories we already loaded. + if let Some(row) = conn.query_opt( + "SELECT id FROM repositories WHERE name = $1 AND host = $2 LIMIT 1;", + &[&format!("{}/{}", name.owner, name.repo), &name.host], + )? { + return Ok(Some(row.get("id"))); + } + if let Some(updater) = self.updaters.iter().find(|u| u.host() == name.host) { + let res = match updater.fetch_repository(&name) { + Ok(Some(repo)) => self.store_repository(conn, updater.host(), repo), + Ok(None) => { + warn!( + "failed to fetch repository `{}` on host `{}`", + url, + updater.host() + ); + return Ok(None); + } + Err(err) => Err(err), + }; + return match res { + Ok(repo_id) => Ok(Some(repo_id)), + Err(err) => failure::bail!("failed to collect `{}` stats: {}", updater.host(), err), + }; + } + // It means that none of our updaters have a matching host. + Ok(None) + } + + pub fn update_all_crates(&self) -> Result<()> { + let mut conn = self.pool.get()?; + for updater in &self.updaters { + info!("started updating `{}` repositories stats", updater.host()); + + let needs_update = conn + .query( + "SELECT host_id + FROM repositories + WHERE host = $1 AND updated_at < NOW() - INTERVAL '1 day';", + &[&updater.host()], + )? + .into_iter() + .map(|row| row.get(0)) + .collect::>(); + + if needs_update.is_empty() { + info!( + "no `{}` repositories stats needed to be updated", + updater.host() + ); + continue; + } + // FIXME: The collect can be avoided if we use Itertools::chunks: + // https://docs.rs/itertools/0.10.0/itertools/trait.Itertools.html#method.chunks. + for chunk in needs_update.chunks(updater.chunk_size()) { + let res = updater.fetch_repositories(chunk)?; + for node in res.missing { + self.delete_repository(&mut conn, &node, updater.host())?; + } + for (_, repo) in res.present { + self.store_repository(&mut conn, updater.host(), repo)?; + } + } + info!("finished updating `{}` repositories stats", updater.host()); + } + Ok(()) + } + + pub fn backfill_repositories(&self) -> Result<()> { + let mut conn = self.pool.get()?; + for updater in &self.updaters { + info!( + "started backfilling `{}` repositories stats", + updater.host() + ); + + let needs_backfilling = conn.query( + "SELECT releases.id, crates.name, releases.version, releases.repository_url + FROM releases + INNER JOIN crates ON (crates.id = releases.crate_id) + WHERE repository_id IS NULL AND repository_url LIKE $1;", + &[&format!("%{}%", updater.host())], + )?; + + let mut missing_urls = HashSet::new(); + for row in &needs_backfilling { + let id: i32 = row.get("id"); + let name: String = row.get("name"); + let version: String = row.get("version"); + let url: String = row.get("repository_url"); + + if missing_urls.contains(&url) { + debug!("{} {} points to a known missing repo", name, version); + } else if let Some(node_id) = self.load_repository_inner(&mut conn, &url)? { + conn.execute( + "UPDATE releases SET repository_id = $1 WHERE id = $2;", + &[&node_id, &id], + )?; + info!( + "backfilled `{}` repositories for {} {}", + updater.host(), + name, + version, + ); + } else { + debug!( + "{} {} does not point to a {} repository", + name, + version, + updater.host(), + ); + missing_urls.insert(url); + } + } + } + + Ok(()) + } + + pub fn get_icon_name(&self, host: &str) -> &'static str { + for updater in &self.updaters { + if updater.host() == host { + return updater.icon(); + } + } + // The default icon in case it doesn't match any of the "known" ones. + "code-branch" + } + + fn store_repository(&self, conn: &mut Client, host: &str, repo: Repository) -> Result { + trace!( + "storing {} repository stats for {}", + host, + repo.name_with_owner, + ); + let data = conn.query_one( + "INSERT INTO repositories ( + host, host_id, name, description, last_commit, stars, forks, issues, updated_at + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW()) + ON CONFLICT (host, host_id) DO + UPDATE SET + name = $3, + description = $4, + last_commit = $5, + stars = $6, + forks = $7, + issues = $8, + updated_at = NOW() + RETURNING id;", + &[ + &host, + &repo.id, + &repo.name_with_owner, + &repo.description, + &repo.last_activity_at, + &(repo.stars as i32), + &(repo.forks as i32), + &(repo.issues as i32), + ], + )?; + Ok(data.get(0)) + } + + fn delete_repository(&self, conn: &mut Client, host_id: &str, host: &str) -> Result<()> { + trace!( + "removing repository stats for host ID `{}` and host `{}`", + host_id, + host + ); + conn.execute( + "DELETE FROM repositories WHERE host_id = $1 AND host = $2;", + &[&host_id, &host], + )?; + Ok(()) + } +} + +pub(crate) fn repository_name(url: &str) -> Option { + static RE: Lazy = Lazy::new(|| { + Regex::new(r"https?://(?P[^/]+)/(?P[\w\._/-]+)/(?P[\w\._-]+)").unwrap() + }); + + let cap = RE.captures(url)?; + let host = cap.name("host").expect("missing group 'host'").as_str(); + let owner = cap.name("owner").expect("missing group 'owner'").as_str(); + let repo = cap.name("repo").expect("missing group 'repo'").as_str(); + Some(RepositoryName { + owner, + repo: repo.strip_suffix(".git").unwrap_or(repo), + host, + }) +} + +#[derive(Debug, Eq, PartialEq)] +pub struct RepositoryName<'a> { + pub owner: &'a str, + pub repo: &'a str, + pub host: &'a str, +} + +#[cfg(test)] +mod test { + use super::*; + use crate::context::Context; + + #[test] + fn test_repository_name() { + fn assert_name<'a, T: Into>>(url: &str, data: T) { + let data = data.into(); + assert_eq!( + repository_name(url), + data.map(|(owner, repo, host)| RepositoryName { owner, repo, host }), + ); + } + + // gitlab checks + assert_name( + "https://gitlab.com/onur/cratesfyi", + ("onur", "cratesfyi", "gitlab.com"), + ); + assert_name( + "http://gitlab.com/onur/cratesfyi", + ("onur", "cratesfyi", "gitlab.com"), + ); + assert_name( + "https://gitlab.com/onur/cratesfyi.git", + ("onur", "cratesfyi", "gitlab.com"), + ); + assert_name( + "https://gitlab.com/docopt/docopt.rs", + ("docopt", "docopt.rs", "gitlab.com"), + ); + assert_name( + "https://gitlab.com/onur23cmD_M_R_L_/crates_fy-i", + ("onur23cmD_M_R_L_", "crates_fy-i", "gitlab.com"), + ); + assert_name( + "https://gitlab.freedesktop.org/test1/test2", + ("test1", "test2", "gitlab.freedesktop.org"), + ); + assert_name( + "https://gitlab.com/artgam3s/public-libraries/rust/rpa", + ("artgam3s/public-libraries/rust", "rpa", "gitlab.com"), + ); + + assert_name("https://gitlab.com/moi/", None); + assert_name("https://gitlab.com/moi", None); + assert_name("https://gitlab.com", None); + assert_name("https://gitlab.com/", None); + + // github checks + assert_name( + "https://github.com/onur/cratesfyi", + ("onur", "cratesfyi", "github.com"), + ); + assert_name( + "http://github.com/onur/cratesfyi", + ("onur", "cratesfyi", "github.com"), + ); + assert_name( + "https://github.com/onur/cratesfyi.git", + ("onur", "cratesfyi", "github.com"), + ); + assert_name( + "https://github.com/docopt/docopt.rs", + ("docopt", "docopt.rs", "github.com"), + ); + assert_name( + "https://github.com/onur23cmD_M_R_L_/crates_fy-i", + ("onur23cmD_M_R_L_", "crates_fy-i", "github.com"), + ); + + assert_name("https://github.com/moi/", None); + assert_name("https://github.com/moi", None); + assert_name("https://github.com", None); + assert_name("https://github.com/", None); + + // Unknown host + assert_name("https://git.sr.ht/~ireas/merge-rs", None); + } + + #[test] + fn test_icon_name() { + crate::test::wrapper(|env| { + let mut config = env.base_config(); + config.github_accesstoken = Some("qsjdnfqdq".to_owned()); + let updater = RepositoryStatsUpdater::new(&config, env.pool()?); + + assert_eq!(updater.get_icon_name(""), "code-branch"); + assert_eq!(updater.get_icon_name("random"), "code-branch"); + assert_eq!(updater.get_icon_name("github"), "code-branch"); + assert_eq!(updater.get_icon_name("github.com"), "github"); + assert_eq!(updater.get_icon_name("gitlab"), "code-branch"); + assert_eq!(updater.get_icon_name("gitlab.com"), "gitlab"); + assert_eq!(updater.get_icon_name("gitlab.freedesktop.org"), "gitlab"); + assert_eq!( + updater.get_icon_name("a.gitlab.freedesktop.org"), + "code-branch" + ); + Ok(()) + }); + } +} diff --git a/src/test/fakes.rs b/src/test/fakes.rs index c6a7bbbdd..aec6590b8 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -310,7 +310,7 @@ impl<'a> FakeRelease<'a> { } } - let github_repo = match self.github_stats { + let repository = match self.github_stats { Some(stats) => Some(stats.create(&mut self.db.conn())?), None => None, }; @@ -336,7 +336,7 @@ impl<'a> FakeRelease<'a> { self.has_docs, self.has_examples, algs, - github_repo, + repository, )?; crate::db::update_crate_data_in_database( &mut db.conn(), @@ -362,19 +362,20 @@ struct FakeGithubStats { } impl FakeGithubStats { - fn create(&self, conn: &mut Client) -> Result { + fn create(&self, conn: &mut Client) -> Result { let existing_count: i64 = conn - .query_one("SELECT COUNT(*) FROM github_repos;", &[])? + .query_one("SELECT COUNT(*) FROM repositories;", &[])? .get(0); - let id = base64::encode(format!("FAKE ID {}", existing_count)); + let host_id = base64::encode(format!("FAKE ID {}", existing_count)); - conn.execute( - "INSERT INTO github_repos (id, name, description, last_commit, stars, forks, issues, updated_at) - VALUES ($1, $2, 'Fake description!', NOW(), $3, $4, $5, NOW());", - &[&id, &self.repo, &self.stars, &self.forks, &self.issues], + let data = conn.query_one( + "INSERT INTO repositories (host, host_id, name, description, last_commit, stars, forks, issues, updated_at) + VALUES ('github.com', $1, $2, 'Fake description!', NOW(), $3, $4, $5, NOW()) + RETURNING id;", + &[&host_id, &self.repo, &self.stars, &self.forks, &self.issues], )?; - Ok(id) + Ok(data.get(0)) } } diff --git a/src/test/mod.rs b/src/test/mod.rs index 6bc3a6e82..6f46a43a9 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -2,6 +2,7 @@ mod fakes; pub(crate) use self::fakes::FakeBuild; use crate::db::{Pool, PoolClient}; +use crate::repositories::RepositoryStatsUpdater; use crate::storage::{Storage, StorageKind}; use crate::web::Server; use crate::{BuildQueue, Config, Context, Index, Metrics}; @@ -101,6 +102,7 @@ pub(crate) struct TestEnvironment { index: OnceCell>, metrics: OnceCell>, frontend: OnceCell, + repository_stats_updater: OnceCell>, } pub(crate) fn init_logger() { @@ -123,6 +125,7 @@ impl TestEnvironment { index: OnceCell::new(), metrics: OnceCell::new(), frontend: OnceCell::new(), + repository_stats_updater: OnceCell::new(), } } @@ -137,7 +140,7 @@ impl TestEnvironment { } } - fn base_config(&self) -> Config { + pub(crate) fn base_config(&self) -> Config { let mut config = Config::from_env().expect("failed to get base config"); // create index directory @@ -212,6 +215,17 @@ impl TestEnvironment { .clone() } + pub(crate) fn repository_stats_updater(&self) -> Arc { + self.repository_stats_updater + .get_or_init(|| { + Arc::new(RepositoryStatsUpdater::new( + &self.config(), + self.pool().expect("failed to get the pool"), + )) + }) + .clone() + } + pub(crate) fn db(&self) -> &TestDatabase { self.db.get_or_init(|| { TestDatabase::new(&self.config(), self.metrics()).expect("failed to initialize the db") @@ -251,11 +265,16 @@ impl Context for TestEnvironment { fn index(&self) -> Result, Error> { Ok(self.index()) } + + fn repository_stats_updater(&self) -> Result, Error> { + Ok(self.repository_stats_updater()) + } } pub(crate) struct TestDatabase { pool: Pool, schema: String, + repository_stats_updater: RepositoryStatsUpdater, } impl TestDatabase { @@ -264,6 +283,9 @@ impl TestDatabase { // test to create a fresh instance of the database to run within. let schema = format!("docs_rs_test_schema_{}", rand::random::()); + let pool = Pool::new_with_schema(&config, metrics, &schema)?; + let repository_stats_updater = RepositoryStatsUpdater::new(config, pool.clone()); + let mut conn = Connection::connect(&config.database_url, postgres::NoTls)?; conn.batch_execute(&format!( " @@ -298,8 +320,9 @@ impl TestDatabase { conn.batch_execute(&query)?; Ok(TestDatabase { - pool: Pool::new_with_schema(config, metrics, &schema)?, + pool, schema, + repository_stats_updater, }) } @@ -312,6 +335,10 @@ impl TestDatabase { .get() .expect("failed to get a connection out of the pool") } + + pub(crate) fn repository_stats_updater(&self) -> &RepositoryStatsUpdater { + &self.repository_stats_updater + } } impl Drop for TestDatabase { diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index d3bebd958..f27770f1a 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -2,10 +2,7 @@ //! //! This daemon will start web server, track new packages and build them -use crate::{ - utils::{queue_builder, GithubUpdater}, - Context, DocBuilder, RustwideBuilder, -}; +use crate::{utils::queue_builder, Context, DocBuilder, RustwideBuilder}; use failure::Error; use log::{debug, error, info}; use std::thread; @@ -66,29 +63,28 @@ pub fn start_daemon(context: &dyn Context, enable_registry_watcher: bool) -> Res // build new crates every minute let pool = context.pool()?; let build_queue = context.build_queue()?; - let cloned_config = config.clone(); let rustwide_builder = RustwideBuilder::init(context)?; thread::Builder::new() .name("build queue reader".to_string()) .spawn(move || { - let doc_builder = - DocBuilder::new(cloned_config.clone(), pool.clone(), build_queue.clone()); + let doc_builder = DocBuilder::new(config.clone(), pool.clone(), build_queue.clone()); queue_builder(doc_builder, rustwide_builder, build_queue).unwrap(); }) .unwrap(); - if let Some(github_updater) = GithubUpdater::new(config, context.pool()?)? { - cron( - "github stats updater", - Duration::from_secs(60 * 60), - move || { - github_updater.update_all_crates()?; - Ok(()) - }, - )?; - } else { - log::warn!("GitHub stats updater not started as no token was provided"); - } + // This call will still skip github repositories updates and continue if no token is provided + // (gitlab doesn't require to have a token). The only time this can return an error is when + // creating a pool or if config fails, which shouldn't happen here because this is run right at + // startup. + let updater = context.repository_stats_updater()?; + cron( + "repositories stats updater", + Duration::from_secs(60 * 60), + move || { + updater.update_all_crates()?; + Ok(()) + }, + )?; // Never returns; `server` blocks indefinitely when dropped // NOTE: if a failure occurred earlier in `start_daemon`, the server will _not_ be joined - @@ -98,7 +94,7 @@ pub fn start_daemon(context: &dyn Context, enable_registry_watcher: bool) -> Res .map_err(|_| failure::err_msg("web server panicked")) } -fn cron(name: &'static str, interval: Duration, exec: F) -> Result<(), Error> +pub(crate) fn cron(name: &'static str, interval: Duration, exec: F) -> Result<(), Error> where F: Fn() -> Result<(), Error> + Send + 'static, { diff --git a/src/utils/github_updater.rs b/src/utils/github_updater.rs deleted file mode 100644 index 0eb9191f9..000000000 --- a/src/utils/github_updater.rs +++ /dev/null @@ -1,400 +0,0 @@ -use crate::error::Result; -use crate::{db::Pool, Config}; -use chrono::{DateTime, Utc}; -use log::{debug, info, trace, warn}; -use once_cell::sync::Lazy; -use postgres::Client; -use regex::Regex; -use reqwest::{ - blocking::Client as HttpClient, - header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT}, -}; -use serde::Deserialize; -use std::collections::HashSet; -use std::sync::Arc; - -const APP_USER_AGENT: &str = concat!( - env!("CARGO_PKG_NAME"), - " ", - include_str!(concat!(env!("OUT_DIR"), "/git_version")) -); - -const GRAPHQL_UPDATE: &str = "query($ids: [ID!]!) { - nodes(ids: $ids) { - ... on Repository { - id - nameWithOwner - pushedAt - description - stargazerCount - forkCount - issues(states: [OPEN]) { totalCount } - } - } - rateLimit { - remaining - } -}"; - -const GRAPHQL_SINGLE: &str = "query($owner: String!, $repo: String!) { - repository(owner: $owner, name: $repo) { - id - nameWithOwner - pushedAt - description - stargazerCount - forkCount - issues(states: [OPEN]) { totalCount } - } -}"; - -/// How many repositories to update in a single chunk. Values over 100 are probably going to be -/// rejected by the GraphQL API. -const UPDATE_CHUNK_SIZE: usize = 100; - -pub struct GithubUpdater { - client: HttpClient, - pool: Pool, - config: Arc, -} - -impl GithubUpdater { - /// Returns `Err` if the access token has invalid syntax (but *not* if it isn't authorized). - /// Returns `Ok(None)` if there is no access token. - pub fn new(config: Arc, pool: Pool) -> Result> { - let mut headers = HeaderMap::new(); - headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)); - headers.insert(ACCEPT, HeaderValue::from_static("application/json")); - - if let Some(token) = &config.github_accesstoken { - headers.insert( - AUTHORIZATION, - HeaderValue::from_str(&format!("token {}", token))?, - ); - } else { - return Ok(None); - } - - let client = HttpClient::builder().default_headers(headers).build()?; - - Ok(Some(GithubUpdater { - client, - pool, - config, - })) - } - - pub fn backfill_repositories(&self) -> Result<()> { - info!("started backfilling GitHub repository stats"); - - let mut conn = self.pool.get()?; - let needs_backfilling = conn.query( - "SELECT releases.id, crates.name, releases.version, releases.repository_url - FROM releases - INNER JOIN crates ON (crates.id = releases.crate_id) - WHERE github_repo IS NULL AND repository_url LIKE '%github.com%';", - &[], - )?; - - let mut missing_urls = HashSet::new(); - for row in &needs_backfilling { - let id: i32 = row.get("id"); - let name: String = row.get("name"); - let version: String = row.get("version"); - let url: String = row.get("repository_url"); - - if missing_urls.contains(&url) { - debug!("{} {} points to a known missing repo", name, version); - } else if let Some(node_id) = self.load_repository(&mut conn, &url)? { - conn.execute( - "UPDATE releases SET github_repo = $1 WHERE id = $2;", - &[&node_id, &id], - )?; - info!("backfilled GitHub repository for {} {}", name, version); - } else { - debug!("{} {} does not point to a GitHub repository", name, version); - missing_urls.insert(url); - } - } - - Ok(()) - } - - pub(crate) fn load_repository(&self, conn: &mut Client, url: &str) -> Result> { - let name = match RepositoryName::from_url(url) { - Some(name) => name, - None => return Ok(None), - }; - - // Avoid querying the GitHub API for repositories we already loaded. - if let Some(row) = conn.query_opt( - "SELECT id FROM github_repos WHERE name = $1 LIMIT 1;", - &[&format!("{}/{}", name.owner, name.repo)], - )? { - return Ok(Some(row.get("id"))); - } - - // Fetch the latest information from the GitHub API. - let response: GraphResponse = self.graphql( - GRAPHQL_SINGLE, - serde_json::json!({ - "owner": name.owner, - "repo": name.repo, - }), - )?; - if let Some(repo) = response.data.repository { - self.store_repository(conn, &repo)?; - Ok(Some(repo.id)) - } else if let Some(error) = response.errors.get(0) { - use GraphErrorPath::*; - match (error.error_type.as_str(), error.path.as_slice()) { - ("NOT_FOUND", [Segment(repository)]) if repository == "repository" => Ok(None), - _ => failure::bail!("error loading repository: {}", error.message), - } - } else { - panic!("missing repository but there were no errors!"); - } - } - - /// Updates github fields in crates table - pub fn update_all_crates(&self) -> Result<()> { - info!("started updating GitHub repository stats"); - - let mut conn = self.pool.get()?; - let needs_update = conn - .query( - "SELECT id FROM github_repos WHERE updated_at < NOW() - INTERVAL '1 day';", - &[], - )? - .into_iter() - .map(|row| row.get(0)) - .collect::>(); - - if needs_update.is_empty() { - info!("no GitHub repository stats needed to be updated"); - return Ok(()); - } - - for chunk in needs_update.chunks(UPDATE_CHUNK_SIZE) { - if let Err(err) = self.update_repositories(&mut conn, &chunk) { - if err.downcast_ref::().is_some() { - warn!("rate limit reached, blocked the GitHub repository stats updater"); - return Ok(()); - } - return Err(err); - } - } - - info!("finished updating GitHub repository stats"); - Ok(()) - } - - fn update_repositories(&self, conn: &mut Client, node_ids: &[String]) -> Result<()> { - let response: GraphResponse>> = self.graphql( - GRAPHQL_UPDATE, - serde_json::json!({ - "ids": node_ids, - }), - )?; - - // The error is returned *before* we reach the rate limit, to ensure we always have an - // amount of API calls we can make at any time. - trace!( - "GitHub GraphQL rate limit remaining: {}", - response.data.rate_limit.remaining - ); - if response.data.rate_limit.remaining < self.config.github_updater_min_rate_limit { - return Err(RateLimitReached.into()); - } - - // When a node is missing (for example if the repository was deleted or made private) the - // GraphQL API will return *both* a `null` instead of the data in the nodes list and a - // `NOT_FOUND` error in the errors list. - for node in response.data.nodes.iter().flatten() { - self.store_repository(conn, &node)?; - } - for error in &response.errors { - use GraphErrorPath::*; - match (error.error_type.as_str(), error.path.as_slice()) { - ("NOT_FOUND", [Segment(nodes), Index(idx)]) if nodes == "nodes" => { - self.delete_repository(conn, &node_ids[*idx as usize])?; - } - _ => failure::bail!("error updating repositories: {}", error.message), - } - } - - Ok(()) - } - - fn graphql( - &self, - query: &str, - variables: impl serde::Serialize, - ) -> Result> { - Ok(self - .client - .post("https://api.github.com/graphql") - .json(&serde_json::json!({ - "query": query, - "variables": variables, - })) - .send()? - .error_for_status()? - .json()?) - } - - fn store_repository(&self, conn: &mut Client, repo: &GraphRepository) -> Result<()> { - trace!( - "storing GitHub repository stats for {}", - repo.name_with_owner - ); - conn.execute( - "INSERT INTO github_repos ( - id, name, description, last_commit, stars, forks, issues, updated_at - ) VALUES ($1, $2, $3, $4, $5, $6, $7, NOW()) - ON CONFLICT (id) DO - UPDATE SET - name = $2, - description = $3, - last_commit = $4, - stars = $5, - forks = $6, - issues = $7, - updated_at = NOW();", - &[ - &repo.id, - &repo.name_with_owner, - &repo.description, - &repo.pushed_at, - &(repo.stargazer_count as i32), - &(repo.fork_count as i32), - &(repo.issues.total_count as i32), - ], - )?; - Ok(()) - } - - fn delete_repository(&self, conn: &mut Client, id: &str) -> Result<()> { - trace!("removing GitHub repository stats for ID {}", id); - conn.execute("DELETE FROM github_repos WHERE id = $1;", &[&id])?; - Ok(()) - } -} - -#[derive(Debug, Eq, PartialEq)] -struct RepositoryName<'a> { - owner: &'a str, - repo: &'a str, -} - -impl<'a> RepositoryName<'a> { - fn from_url(url: &'a str) -> Option { - static RE: Lazy = Lazy::new(|| { - Regex::new(r"https?://(www.)?github\.com/(?P[\w\._-]+)/(?P[\w\._-]+)") - .unwrap() - }); - - match RE.captures(url) { - Some(cap) => { - let owner = cap.name("owner").expect("missing group 'owner'").as_str(); - let repo = cap.name("repo").expect("missing group 'repo'").as_str(); - Some(Self { - owner, - repo: repo.strip_suffix(".git").unwrap_or(repo), - }) - } - None => None, - } - } -} - -#[derive(Debug, failure::Fail)] -#[fail(display = "rate limit reached")] -struct RateLimitReached; - -#[derive(Debug, Deserialize)] -struct GraphResponse { - data: T, - #[serde(default)] - errors: Vec, -} - -#[derive(Debug, Deserialize)] -struct GraphError { - #[serde(rename = "type")] - error_type: String, - path: Vec, - message: String, -} - -#[derive(Debug, Deserialize)] -#[serde(untagged)] -enum GraphErrorPath { - Segment(String), - Index(i64), -} - -#[derive(Debug, Deserialize)] -struct GraphRateLimit { - remaining: u32, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -struct GraphNodes { - nodes: Vec, - rate_limit: GraphRateLimit, -} - -#[derive(Debug, Deserialize)] -struct GraphRepositoryNode { - repository: Option, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -struct GraphRepository { - id: String, - name_with_owner: String, - pushed_at: Option>, - description: Option, - stargazer_count: i64, - fork_count: i64, - issues: GraphIssues, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -struct GraphIssues { - total_count: i64, -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_repository_name() { - macro_rules! assert_name { - ($url:expr => ($owner:expr, $repo: expr)) => { - assert_eq!( - RepositoryName::from_url($url), - Some(RepositoryName { - owner: $owner, - repo: $repo - }) - ); - }; - } - - assert_name!("https://github.com/onur/cratesfyi" => ("onur", "cratesfyi")); - assert_name!("http://github.com/onur/cratesfyi" => ("onur", "cratesfyi")); - assert_name!("https://www.github.com/onur/cratesfyi" => ("onur", "cratesfyi")); - assert_name!("http://www.github.com/onur/cratesfyi" => ("onur", "cratesfyi")); - assert_name!("https://github.com/onur/cratesfyi.git" => ("onur", "cratesfyi")); - assert_name!("https://github.com/docopt/docopt.rs" => ("docopt", "docopt.rs")); - assert_name!("https://github.com/onur23cmD_M_R_L_/crates_fy-i" => ( - "onur23cmD_M_R_L_", "crates_fy-i" - )); - } -} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index aa3dd8f16..0431ce268 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -3,7 +3,6 @@ pub(crate) use self::cargo_metadata::{CargoMetadata, Package as MetadataPackage}; pub(crate) use self::copy::copy_dir_all; pub use self::daemon::start_daemon; -pub use self::github_updater::GithubUpdater; pub(crate) use self::html::rewrite_lol; pub use self::queue::{get_crate_priority, remove_crate_priority, set_crate_priority}; pub use self::queue_builder::queue_builder; @@ -16,8 +15,7 @@ mod cargo_metadata; #[cfg(feature = "consistency_check")] pub mod consistency; mod copy; -mod daemon; -mod github_updater; +pub(crate) mod daemon; mod html; mod pubsubhubbub; mod queue; diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index fbb941236..5a9b93685 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -1,5 +1,5 @@ use super::{match_version, redirect_base, render_markdown, MatchSemver, MetaData}; -use crate::{db::Pool, impl_webpage, web::page::WebPage}; +use crate::{db::Pool, impl_webpage, repositories::RepositoryStatsUpdater, web::page::WebPage}; use chrono::{DateTime, Utc}; use iron::prelude::*; use iron::Url; @@ -31,7 +31,7 @@ pub struct CrateDetails { have_examples: bool, // need to check this manually pub target_name: String, releases: Vec, - github_metadata: Option, + repository_metadata: Option, pub(crate) metadata: MetaData, is_library: bool, license: Option, @@ -47,11 +47,12 @@ pub struct CrateDetails { } #[derive(Debug, Clone, PartialEq, Serialize)] -struct GitHubMetadata { +struct RepositoryMetadata { stars: i32, forks: i32, issues: i32, name: Option, + icon: &'static str, } fn optional_markdown(markdown: &Option, serializer: S) -> Result @@ -73,7 +74,12 @@ pub struct Release { } impl CrateDetails { - pub fn new(conn: &mut Client, name: &str, version: &str) -> Option { + pub fn new( + conn: &mut Client, + name: &str, + version: &str, + up: &RepositoryStatsUpdater, + ) -> Option { // get all stuff, I love you rustfmt let query = " SELECT @@ -93,11 +99,11 @@ impl CrateDetails { releases.keywords, releases.have_examples, releases.target_name, - releases.github_repo, - github_repos.stars AS github_stars, - github_repos.forks AS github_forks, - github_repos.issues AS github_issues, - github_repos.name AS github_name, + repositories.host as repo_host, + repositories.stars as repo_stars, + repositories.forks as repo_forks, + repositories.issues as repo_issues, + repositories.name as repo_name, releases.is_library, releases.yanked, releases.doc_targets, @@ -111,7 +117,7 @@ impl CrateDetails { FROM releases INNER JOIN crates ON releases.crate_id = crates.id LEFT JOIN doc_coverage ON doc_coverage.release_id = releases.id - LEFT JOIN github_repos ON releases.github_repo = github_repos.id + LEFT JOIN repositories ON releases.repository_id = repositories.id WHERE crates.name = $1 AND releases.version = $2;"; let rows = conn.query(query, &[&name, &version]).unwrap(); @@ -128,16 +134,16 @@ impl CrateDetails { // get releases, sorted by semver let releases = releases_for_crate(conn, crate_id); - let github_metadata = if krate.get::<_, Option>("github_repo").is_some() { - Some(GitHubMetadata { - issues: krate.get("github_issues"), - stars: krate.get("github_stars"), - forks: krate.get("github_forks"), - name: krate.get("github_name"), - }) - } else { - None - }; + let repository_metadata = + krate + .get::<_, Option>("repo_host") + .map(|host| RepositoryMetadata { + issues: krate.get("repo_issues"), + stars: krate.get("repo_stars"), + forks: krate.get("repo_forks"), + name: krate.get("repo_name"), + icon: up.get_icon_name(&host), + }); let metadata = MetaData { name: krate.get("name"), @@ -173,7 +179,7 @@ impl CrateDetails { have_examples: krate.get("have_examples"), target_name: krate.get("target_name"), releases, - github_metadata, + repository_metadata, metadata, is_library: krate.get("is_library"), license: krate.get("license"), @@ -276,7 +282,8 @@ pub fn crate_details_handler(req: &mut Request) -> IronResult { match match_version(&mut conn, &name, req_version).and_then(|m| m.assume_exact())? { MatchSemver::Exact((version, _)) => { - let details = cexpect!(req, CrateDetails::new(&mut conn, &name, &version)); + let updater = extension!(req, RepositoryStatsUpdater); + let details = cexpect!(req, CrateDetails::new(&mut conn, &name, &version, &updater)); CrateDetailsPage { details }.into_response(req) } @@ -312,8 +319,13 @@ mod tests { version: &str, expected_last_successful_build: Option<&str>, ) -> Result<(), Error> { - let details = CrateDetails::new(&mut db.conn(), package, version) - .ok_or_else(|| failure::err_msg("could not fetch crate details"))?; + let details = CrateDetails::new( + &mut db.conn(), + package, + version, + &db.repository_stats_updater(), + ) + .ok_or_else(|| failure::err_msg("could not fetch crate details"))?; assert_eq!( details.last_successful_build, @@ -440,7 +452,13 @@ mod tests { .binary(true) .create()?; - let details = CrateDetails::new(&mut db.conn(), "foo", "0.2.0").unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + "0.2.0", + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.releases, vec![ @@ -509,7 +527,13 @@ mod tests { env.fake_release().name("foo").version("0.0.2").create()?; for version in &["0.0.1", "0.0.2", "0.0.3"] { - let details = CrateDetails::new(&mut db.conn(), "foo", version).unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + version, + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.latest_release().version, semver::Version::parse("0.0.3")? @@ -533,7 +557,13 @@ mod tests { env.fake_release().name("foo").version("0.0.2").create()?; for version in &["0.0.1", "0.0.2", "0.0.3-pre.1"] { - let details = CrateDetails::new(&mut db.conn(), "foo", version).unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + version, + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.latest_release().version, semver::Version::parse("0.0.2")? @@ -558,7 +588,13 @@ mod tests { env.fake_release().name("foo").version("0.0.2").create()?; for version in &["0.0.1", "0.0.2", "0.0.3"] { - let details = CrateDetails::new(&mut db.conn(), "foo", version).unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + version, + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.latest_release().version, semver::Version::parse("0.0.2")? @@ -591,7 +627,13 @@ mod tests { .create()?; for version in &["0.0.1", "0.0.2", "0.0.3"] { - let details = CrateDetails::new(&mut db.conn(), "foo", version).unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + version, + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.latest_release().version, semver::Version::parse("0.0.3")? @@ -647,7 +689,13 @@ mod tests { }) .create()?; - let details = CrateDetails::new(&mut db.conn(), "foo", "0.0.1").unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + "0.0.1", + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.owners, vec![("foobar".into(), "https://example.org/foobar".into())] @@ -671,7 +719,13 @@ mod tests { }) .create()?; - let details = CrateDetails::new(&mut db.conn(), "foo", "0.0.1").unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + "0.0.1", + &db.repository_stats_updater(), + ) + .unwrap(); let mut owners = details.owners; owners.sort(); assert_eq!( @@ -694,7 +748,13 @@ mod tests { }) .create()?; - let details = CrateDetails::new(&mut db.conn(), "foo", "0.0.1").unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + "0.0.1", + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.owners, vec![("barfoo".into(), "https://example.org/barfoo".into())] @@ -712,7 +772,13 @@ mod tests { }) .create()?; - let details = CrateDetails::new(&mut db.conn(), "foo", "0.0.1").unwrap(); + let details = CrateDetails::new( + &mut db.conn(), + "foo", + "0.0.1", + &db.repository_stats_updater(), + ) + .unwrap(); assert_eq!( details.owners, vec![("barfoo".into(), "https://example.org/barfoov2".into())] diff --git a/src/web/extensions.rs b/src/web/extensions.rs index 468e67b66..c8ac60120 100644 --- a/src/web/extensions.rs +++ b/src/web/extensions.rs @@ -1,5 +1,7 @@ use crate::web::page::TemplateData; -use crate::{db::Pool, BuildQueue, Config, Context, Metrics, Storage}; +use crate::{ + db::Pool, repositories::RepositoryStatsUpdater, BuildQueue, Config, Context, Metrics, Storage, +}; use failure::Error; use iron::{BeforeMiddleware, IronResult, Request}; use std::sync::Arc; @@ -12,6 +14,7 @@ pub(super) struct InjectExtensions { storage: Arc, metrics: Arc, template_data: Arc, + repository_stats_updater: Arc, } impl InjectExtensions { @@ -25,6 +28,7 @@ impl InjectExtensions { config: context.config()?, storage: context.storage()?, metrics: context.metrics()?, + repository_stats_updater: context.repository_stats_updater()?, template_data, }) } @@ -40,6 +44,8 @@ impl BeforeMiddleware for InjectExtensions { req.extensions.insert::(self.metrics.clone()); req.extensions .insert::(self.template_data.clone()); + req.extensions + .insert::(self.repository_stats_updater.clone()); Ok(()) } @@ -59,3 +65,4 @@ key!(Config => Arc); key!(Storage => Arc); key!(Metrics => Arc); key!(TemplateData => Arc); +key!(RepositoryStatsUpdater => Arc); diff --git a/src/web/releases.rs b/src/web/releases.rs index b2474880a..f6da8c189 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -70,9 +70,9 @@ pub(crate) fn get_releases(conn: &mut Client, page: i64, limit: i64, order: Orde // WARNING: it is _crucial_ that this always be hard-coded and NEVER be user input let (ordering, filter_failed): (&'static str, _) = match order { Order::ReleaseTime => ("releases.release_time", false), - Order::GithubStars => ("github_repos.stars", false), + Order::GithubStars => ("repositories.stars", false), Order::RecentFailures => ("releases.release_time", true), - Order::FailuresByGithubStars => ("github_repos.stars", true), + Order::FailuresByGithubStars => ("repositories.stars", true), }; let query = format!( "SELECT crates.name, @@ -81,10 +81,10 @@ pub(crate) fn get_releases(conn: &mut Client, page: i64, limit: i64, order: Orde releases.target_name, releases.release_time, releases.rustdoc_status, - github_repos.stars + repositories.stars FROM crates INNER JOIN releases ON crates.latest_version_id = releases.id - LEFT JOIN github_repos ON releases.github_repo = github_repos.id + LEFT JOIN repositories ON releases.repository_id = repositories.id WHERE ((NOT $3) OR (releases.build_status = FALSE AND releases.is_library = TRUE)) AND {0} IS NOT NULL @@ -123,16 +123,16 @@ fn get_releases_by_owner( releases.target_name, releases.release_time, releases.rustdoc_status, - github_repos.stars, + repositories.stars, owners.name, owners.login FROM crates INNER JOIN releases ON releases.id = crates.latest_version_id INNER JOIN owner_rels ON owner_rels.cid = crates.id INNER JOIN owners ON owners.id = owner_rels.oid - LEFT JOIN github_repos ON releases.github_repo = github_repos.id + LEFT JOIN repositories ON releases.repository_id = repositories.id WHERE owners.login = $1 - ORDER BY github_repos.stars DESC NULLS LAST + ORDER BY repositories.stars DESC NULLS LAST LIMIT $2 OFFSET $3"; let query = conn.query(query, &[&owner, &limit, &offset]).unwrap(); @@ -195,7 +195,7 @@ fn get_search_results( releases.target_name AS target_name, releases.release_time AS release_time, releases.rustdoc_status AS rustdoc_status, - github_repos.stars AS github_stars, + repositories.stars AS stars, COUNT(*) OVER() as total FROM crates INNER JOIN ( @@ -211,11 +211,11 @@ fn get_search_results( WHERE releases.rank = 1 ) AS latest_release ON latest_release.crate_id = crates.id INNER JOIN releases ON latest_release.id = releases.id - LEFT JOIN github_repos ON releases.github_repo = github_repos.id + LEFT JOIN repositories ON releases.repository_id = repositories.id WHERE ((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65 OR crates.name ILIKE CONCAT('%', $1, '%') - GROUP BY crates.id, releases.id, github_repos.stars + GROUP BY crates.id, releases.id, repositories.stars ORDER BY levenshtein(crates.name, $1) ASC, crates.name ILIKE CONCAT('%', $1, '%'), @@ -238,7 +238,7 @@ fn get_search_results( target_name: row.get("target_name"), release_time: row.get("release_time"), rustdoc_status: row.get("rustdoc_status"), - stars: row.get::<_, Option>("github_stars").unwrap_or(0), + stars: row.get::<_, Option>("stars").unwrap_or(0), }) .collect(); @@ -463,10 +463,10 @@ fn redirect_to_random_crate(req: &Request, conn: &mut PoolClient) -> IronResult< ) AS r INNER JOIN crates ON r.id = crates.id INNER JOIN releases ON crates.latest_version_id = releases.id - INNER JOIN github_repos ON releases.github_repo = github_repos.id + INNER JOIN repositories ON releases.repository_id = repositories.id WHERE releases.rustdoc_status = TRUE AND - github_repos.stars >= 100 + repositories.stars >= 100 LIMIT 1", &[&(config.random_crate_search_view_size as i32)] ) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 25e7c923c..0d21fd9dd 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -2,6 +2,7 @@ use crate::{ db::Pool, + repositories::RepositoryStatsUpdater, utils, web::{ crate_details::CrateDetails, csp::Csp, error::Nope, file::File, match_version, @@ -321,11 +322,13 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { } }; + let updater = extension!(req, RepositoryStatsUpdater); + rendering_time.step("crate details"); // Get the crate's details from the database // NOTE: we know this crate must exist because we just checked it above (or else `match_version` is buggy) - let krate = cexpect!(req, CrateDetails::new(&mut conn, &name, &version)); + let krate = cexpect!(req, CrateDetails::new(&mut conn, &name, &version, &updater)); // if visiting the full path to the default target, remove the target from the path // expects a req_path that looks like `[/:target]/.*` @@ -520,8 +523,9 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let storage = extension!(req, Storage); let config = extension!(req, Config); let base = redirect_base(req); + let updater = extension!(req, RepositoryStatsUpdater); - let crate_details = match CrateDetails::new(&mut conn, &name, &version) { + let crate_details = match CrateDetails::new(&mut conn, &name, &version, &updater) { Some(krate) => krate, None => return Err(Nope::VersionNotFound.into()), }; diff --git a/templates/crate/details.html b/templates/crate/details.html index 9dd28c06c..764b372bd 100644 --- a/templates/crate/details.html +++ b/templates/crate/details.html @@ -62,19 +62,19 @@ {%- if details.repository_url -%}
  • - {# If the repo link is for github, show some github stats #} - {# TODO: add support for hosts besides github (#35) #} - {%- if details.github_metadata -%} - {{ "github" | fab(fw=true) }} - {% if details.github_metadata.name %} - {{details.github_metadata.name}} + {# If the repo link is for github or gitlab, show some stats #} + {# TODO: add support for hosts besides github and gitlab (#35) #} + {%- if details.repository_metadata -%} + {{ details.repository_metadata.icon | fab(fw=true) }} + {% if details.repository_metadata.name %} + {{details.repository_metadata.name}} {% else %} Repository {% endif %}
    - {{ "star" | fas(fw=true, extra="left-margin") }} {{ details.github_metadata.stars }} - {{ "code-branch" | fas(fw=true) }} {{ details.github_metadata.forks }} - {{ "exclamation-circle" | fas(fw=true) }} {{ details.github_metadata.issues }} + {{ "star" | fas(fw=true, extra="left-margin") }} {{ details.repository_metadata.stars }} + {{ "code-branch" | fas(fw=true) }} {{ details.repository_metadata.forks }} + {{ "exclamation-circle" | fas(fw=true) }} {{ details.repository_metadata.issues }} {# If the repo link is unknown, just show a normal link #} {%- else -%} diff --git a/templates/rustdoc/topbar.html b/templates/rustdoc/topbar.html index 38c8e123a..12cae107e 100644 --- a/templates/rustdoc/topbar.html +++ b/templates/rustdoc/topbar.html @@ -58,16 +58,8 @@
  • {%- endif -%} - {# If the crate is hosted on GitHub, show some stats #} - {%- if krate.github_metadata -%} -
  • - - {{ "code-branch" | fas(fw=true) }} Repository - -
  • - - {# If all the crate has is a repo url, show it #} - {%- elif krate.repository_url -%} + {# If a the crate has a repo url, show it #} + {%- if krate.repository_url -%}
  • {{ "code-branch" | fas(fw=true) }} Repository