From 6ae09f69d91f4dea9fcfbed0046d8c744ac45bf9 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 10:02:17 -0400 Subject: [PATCH 01/10] Rendering readmes doesn't need an encodable version Going to need a Version instead in the next few commits. --- src/bin/render-readmes.rs | 64 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index ab332e64a07..f311c3de801 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -33,7 +33,6 @@ use tar::Archive; use url::Url; use cargo_registry::{Config, Version}; -use cargo_registry::version::EncodableVersion; use cargo_registry::schema::*; use cargo_registry::render::markdown_to_html; @@ -57,32 +56,25 @@ fn main() { versions_count / page_size + 1 }; for current_page in 0..pages { - let versions: Vec = versions::table + let versions: Vec<(Version, String)> = versions::table .inner_join(crates::table) .select((versions::all_columns, crates::name)) .limit(page_size) .offset(current_page * page_size) .load::<(Version, String)>(&conn) - .expect("error loading versions") - .into_iter() - .map(|(version, crate_name)| version.encodable(&crate_name)) - .collect(); + .expect("error loading versions"); let mut tasks = Vec::with_capacity(page_size as usize); - for version in versions { + for (version, krate_name) in versions { let config = config.clone(); let handle = thread::spawn(move || { - println!("[{}-{}] Rendering README...", version.krate, version.num); - let readme = get_readme(&config, &version); + println!("[{}-{}] Rendering README...", krate_name, version.num); + let readme = get_readme(&config, &version, &krate_name); if readme.is_none() { return; } let readme = readme.unwrap(); - let readme_path = format!( - "readmes/{}/{}-{}.html", - version.krate, - version.krate, - version.num - ); + let readme_path = + format!("readmes/{}/{}-{}.html", krate_name, krate_name, version.num); let readme_len = readme.len(); let mut body = Cursor::new(readme.into_bytes()); config @@ -96,7 +88,7 @@ fn main() { ) .expect(&format!( "[{}-{}] Couldn't upload file to S3", - version.krate, + krate_name, version.num )); }); @@ -104,23 +96,26 @@ fn main() { } for handle in tasks { if let Err(err) = handle.join() { - println!("Thead panicked: {:?}", err); + println!("Thread panicked: {:?}", err); } } } } /// Renders the readme of an uploaded crate version. -fn get_readme(config: &Config, version: &EncodableVersion) -> Option { +fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option { let mut handle = Easy::new(); - let location = match config.uploader.crate_location(&version.krate, &version.num) { + let location = match config.uploader.crate_location( + &krate_name, + &version.num.to_string(), + ) { Some(l) => l, None => return None, }; let date = time::now().rfc822z().to_string(); let url = Url::parse(&location).expect(&format!( "[{}-{}] Couldn't parse crate URL", - version.krate, + krate_name, version.num )); @@ -144,7 +139,7 @@ fn get_readme(config: &Config, version: &EncodableVersion) -> Option { if let Err(err) = req.perform() { println!( "[{}-{}] Unable to fetch crate: {}", - version.krate, + krate_name, version.num, err ); @@ -155,7 +150,7 @@ fn get_readme(config: &Config, version: &EncodableVersion) -> Option { let response = String::from_utf8_lossy(&response); println!( "[{}-{}] Failed to get a 200 response: {}", - version.krate, + krate_name, version.num, response ); @@ -164,21 +159,21 @@ fn get_readme(config: &Config, version: &EncodableVersion) -> Option { let reader = Cursor::new(response); let reader = GzDecoder::new(reader).expect(&format!( "[{}-{}] Invalid gzip header", - version.krate, + krate_name, version.num )); let mut archive = Archive::new(reader); let mut entries = archive.entries().expect(&format!( "[{}-{}] Invalid tar archive entries", - version.krate, + krate_name, version.num )); let manifest: Manifest = { - let path = format!("{}-{}/Cargo.toml", version.krate, version.num); - let contents = find_file_by_path(&mut entries, Path::new(&path), &version); + let path = format!("{}-{}/Cargo.toml", krate_name, version.num); + let contents = find_file_by_path(&mut entries, Path::new(&path), &version, &krate_name); toml::from_str(&contents).expect(&format!( "[{}-{}] Syntax error in manifest file", - version.krate, + krate_name, version.num )) }; @@ -188,14 +183,14 @@ fn get_readme(config: &Config, version: &EncodableVersion) -> Option { let rendered = { let path = format!( "{}-{}/{}", - version.krate, + krate_name, version.num, manifest.package.readme.unwrap() ); - let contents = find_file_by_path(&mut entries, Path::new(&path), &version); + let contents = find_file_by_path(&mut entries, Path::new(&path), &version, &krate_name); markdown_to_html(&contents).expect(&format!( "[{}-{}] Couldn't render README", - version.krate, + krate_name, version.num )) }; @@ -214,7 +209,8 @@ fn get_readme(config: &Config, version: &EncodableVersion) -> Option { fn find_file_by_path( entries: &mut tar::Entries, path: &Path, - version: &EncodableVersion, + version: &Version, + krate_name: &str, ) -> String { let mut file = entries .find(|entry| match *entry { @@ -229,20 +225,20 @@ fn find_file_by_path( }) .expect(&format!( "[{}-{}] couldn't open file: {}", - version.krate, + krate_name, version.num, path.display() )) .expect(&format!( "[{}-{}] file is not present: {}", - version.krate, + krate_name, version.num, path.display() )); let mut contents = String::new(); file.read_to_string(&mut contents).expect(&format!( "[{}-{}] Couldn't read file contents", - version.krate, + krate_name, version.num )); contents From 8336cefbefebfa304794acdbd403e9ac80d0676f Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 10:05:03 -0400 Subject: [PATCH 02/10] Keep track of the last time a version's README was rendered This will enable restarting of readme rendering without needing to rerender everything. --- .../down.sql | 1 + .../up.sql | 10 +++++++ src/bin/render-readmes.rs | 5 ++++ src/krate.rs | 1 + src/schema.rs | 21 ++++++++++++++ src/version.rs | 28 +++++++++++++++++++ 6 files changed, 66 insertions(+) create mode 100644 migrations/20170820180453_readme_rendering_tracking/down.sql create mode 100644 migrations/20170820180453_readme_rendering_tracking/up.sql diff --git a/migrations/20170820180453_readme_rendering_tracking/down.sql b/migrations/20170820180453_readme_rendering_tracking/down.sql new file mode 100644 index 00000000000..fa41d8aeb90 --- /dev/null +++ b/migrations/20170820180453_readme_rendering_tracking/down.sql @@ -0,0 +1 @@ +DROP TABLE readme_rendering; \ No newline at end of file diff --git a/migrations/20170820180453_readme_rendering_tracking/up.sql b/migrations/20170820180453_readme_rendering_tracking/up.sql new file mode 100644 index 00000000000..df9de6a16e3 --- /dev/null +++ b/migrations/20170820180453_readme_rendering_tracking/up.sql @@ -0,0 +1,10 @@ +CREATE TABLE readme_rendering ( + version_id INTEGER NOT NULL PRIMARY KEY, + rendered_at TIMESTAMP WITHOUT TIME ZONE +); + +ALTER TABLE readme_rendering +ADD CONSTRAINT "fk_readme_rendering_version_id" +FOREIGN KEY (version_id) +REFERENCES versions(id) +ON DELETE CASCADE; diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index f311c3de801..dd2bcbd345f 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -66,6 +66,11 @@ fn main() { let mut tasks = Vec::with_capacity(page_size as usize); for (version, krate_name) in versions { let config = config.clone(); + version.record_readme_rendering(&conn).expect(&format!( + "[{}-{}] Couldn't record rendering time", + krate_name, + version.num + )); let handle = thread::spawn(move || { println!("[{}-{}] Rendering README...", krate_name, version.num); let readme = get_readme(&config, &version, &krate_name); diff --git a/src/krate.rs b/src/krate.rs index 15c9121dd93..ec7074558c5 100644 --- a/src/krate.rs +++ b/src/krate.rs @@ -998,6 +998,7 @@ pub fn new(req: &mut Request) -> CargoResult { max, vers, )?; + version.record_readme_rendering(&conn)?; // Register this crate in our local git repo. let git_crate = git::Crate { diff --git a/src/schema.rs b/src/schema.rs index 0cf5e191f87..9d55898cb5b 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -442,6 +442,26 @@ table! { } } +table! { + /// Representation of the `readme_rendering` table. + /// + /// (Automatically generated by Diesel.) + readme_rendering (version_id) { + /// The `version_id` column of the `readme_rendering` table. + /// + /// Its SQL type is `Int4`. + /// + /// (Automatically generated by Diesel.) + version_id -> Int4, + /// The `rendered_at` column of the `readme_rendering` table. + /// + /// Its SQL type is `Nullable`. + /// + /// (Automatically generated by Diesel.) + rendered_at -> Nullable, + } +} + table! { /// Representation of the `reserved_crate_names` table. /// @@ -699,3 +719,4 @@ joinable!(version_authors -> versions (version_id)); joinable!(version_downloads -> versions (version_id)); joinable!(crate_owners -> teams (owner_id)); joinable!(crate_owners -> users (owner_id)); +joinable!(readme_rendering -> versions (version_id)); diff --git a/src/version.rs b/src/version.rs index 86fd05ac97d..5793fce1a80 100644 --- a/src/version.rs +++ b/src/version.rs @@ -4,6 +4,7 @@ use conduit::{Request, Response}; use conduit_router::RequestParams; use diesel; use diesel::pg::{Pg, PgConnection}; +use diesel::pg::upsert::*; use diesel::prelude::*; use semver; use serde_json; @@ -70,6 +71,13 @@ pub struct VersionLinks { pub authors: String, } +#[derive(Insertable, Debug, Clone, Copy)] +#[table_name = "readme_rendering"] +struct ReadmeRendering { + version_id: i32, + rendered_at: Timespec, +} + impl Version { pub fn encodable(self, crate_name: &str) -> EncodableVersion { let Version { @@ -127,6 +135,26 @@ impl Version { } }) } + + pub fn record_readme_rendering(&self, conn: &PgConnection) -> CargoResult<()> { + let rendered = ReadmeRendering { + version_id: self.id, + rendered_at: ::now(), + }; + + diesel::insert(&rendered.on_conflict( + readme_rendering::version_id, + do_update().set( + (readme_rendering::rendered_at.eq( + excluded( + readme_rendering::rendered_at, + ), + )), + ), + )).into(readme_rendering::table) + .execute(&*conn)?; + Ok(()) + } } impl NewVersion { From 26489e204c871ae4bbb09787c89a641ee1480c46 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 10:40:51 -0400 Subject: [PATCH 03/10] Use docopt for render-readmes because I'm about to add more options --- Cargo.lock | 20 ++++++++++++++++++++ Cargo.toml | 1 + src/bin/render-readmes.rs | 30 +++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69437568b67..8ca9f814a1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,6 +112,7 @@ dependencies = [ "diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", "diesel_codegen 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", "diesel_full_text_search 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "docopt 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "dotenv 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)", @@ -430,6 +431,18 @@ dependencies = [ "diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "docopt" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.10 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "dotenv" version = "0.10.1" @@ -1077,6 +1090,11 @@ name = "string_cache_shared" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "strsim" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "syn" version = "0.11.11" @@ -1305,6 +1323,7 @@ dependencies = [ "checksum diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "304226fa7a3982b0405f6bb95dd9c10c3e2000709f194038a60ec2c277150951" "checksum diesel_codegen 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18a42ca5c9b660add51d58bc5a50a87123380e1e458069c5504528a851ed7384" "checksum diesel_full_text_search 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab84b47676bc5344481e066f7a915ce292b4b87f734e397d651b7d085707c4b6" +"checksum docopt 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3b5b93718f8b3e5544fcc914c43de828ca6c6ace23e0332c6080a2977b49787a" "checksum dotenv 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d6f0e2bb24d163428d8031d3ebd2d2bd903ad933205a97d0f18c7c1aade380f3" "checksum dtoa 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "80c8b71fd71146990a9742fc06dcbbde19161a267e0ad4e572c35162f4578c90" "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" @@ -1387,6 +1406,7 @@ dependencies = [ "checksum string_cache 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2fa69b90c5398217fb0414706d1becea9325ad21ed5d87bd6dda82127911f324" "checksum string_cache_codegen 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "479cde50c3539481f33906a387f2bd17c8e87cb848c35b6021d41fb81ff9b4d7" "checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" +"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" "checksum tar 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "281285b717926caa919ad905ef89c63d75805c7d89437fb873100925a53f2b1b" diff --git a/Cargo.toml b/Cargo.toml index b0fa0734c57..4cd0fccc673 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ clippy = { version = "=0.0.142", optional = true } chrono = "0.4.0" pulldown-cmark = { version = "0.0.15", default-features = false } ammonia = "0.5.0" +docopt = "0.8.1" conduit = "0.8" conduit-conditional-get = "0.8" diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index dd2bcbd345f..59d71ca9768 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -2,10 +2,6 @@ // readme using the Markdown renderer from the cargo_registry crate. // // Warning: this can take a lot of time. -// -// Usage: -// cargo run --bin render-readmes [page-size: optional = 25] -// The page-size argument dictate how much versions should be queried and processed at once. #![deny(warnings)] @@ -15,6 +11,7 @@ extern crate serde_derive; extern crate cargo_registry; extern crate curl; extern crate diesel; +extern crate docopt; extern crate flate2; extern crate s3; extern crate tar; @@ -24,8 +21,8 @@ extern crate url; use curl::easy::{Easy, List}; use diesel::prelude::*; +use docopt::Docopt; use flate2::read::GzDecoder; -use std::env; use std::io::{Cursor, Read}; use std::path::Path; use std::thread; @@ -37,8 +34,24 @@ use cargo_registry::schema::*; use cargo_registry::render::markdown_to_html; const DEFAULT_PAGE_SIZE: i64 = 25; +const USAGE: &'static str = " +Usage: render-readmes [options] + render-readmes --help + +Options: + -h, --help Show this message. + --page-size NUM How many versions should be queried and processed at a time. +"; + +#[derive(Deserialize)] +struct Args { + flag_page_size: Option, +} fn main() { + let args: Args = Docopt::new(USAGE) + .and_then(|d| d.deserialize()) + .unwrap_or_else(|e| e.exit()); let config: Config = Default::default(); let conn = cargo_registry::db::connect_now().unwrap(); let versions_count = versions::table @@ -46,10 +59,9 @@ fn main() { .count() .get_result::(&conn) .expect("error counting versions"); - let page_size = match env::args().nth(1) { - None => DEFAULT_PAGE_SIZE, - Some(s) => s.parse::().unwrap_or(DEFAULT_PAGE_SIZE), - }; + + let page_size = args.flag_page_size.unwrap_or(DEFAULT_PAGE_SIZE); + let pages = if versions_count % page_size == 0 { versions_count / page_size } else { From 89eb251635390ca59e00cd34b8846e4409417a5f Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 10:44:33 -0400 Subject: [PATCH 04/10] Don't need to select any columns to get a count --- src/bin/render-readmes.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index 59d71ca9768..c8a6b2d7764 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -54,11 +54,9 @@ fn main() { .unwrap_or_else(|e| e.exit()); let config: Config = Default::default(); let conn = cargo_registry::db::connect_now().unwrap(); - let versions_count = versions::table - .select(versions::all_columns) - .count() - .get_result::(&conn) - .expect("error counting versions"); + let versions_count = versions::table.count().get_result::(&conn).expect( + "error counting versions", + ); let page_size = args.flag_page_size.unwrap_or(DEFAULT_PAGE_SIZE); From d4e7c324973dab1b8f4d40ffc1cbbb9d30616f6d Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 14:19:43 -0400 Subject: [PATCH 05/10] Render readmes earlier than a particular time to enable restarting --- Cargo.lock | 1 + Cargo.toml | 1 + src/bin/render-readmes.rs | 58 ++++++++++++++++++++++++++++++--------- src/version.rs | 5 +++- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ca9f814a1b..a6ba9ed0e29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,6 +118,7 @@ dependencies = [ "flate2 0.2.19 (registry+https://github.com/rust-lang/crates.io-index)", "git2 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", "license-exprs 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "oauth2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 4cd0fccc673..9c9bb5da6f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ chrono = "0.4.0" pulldown-cmark = { version = "0.0.15", default-features = false } ammonia = "0.5.0" docopt = "0.8.1" +itertools = "0.6.0" conduit = "0.8" conduit-conditional-get = "0.8" diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index c8a6b2d7764..6c6c2db5594 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -9,10 +9,12 @@ extern crate serde_derive; extern crate cargo_registry; +extern crate chrono; extern crate curl; extern crate diesel; extern crate docopt; extern crate flate2; +extern crate itertools; extern crate s3; extern crate tar; extern crate time; @@ -20,9 +22,12 @@ extern crate toml; extern crate url; use curl::easy::{Easy, List}; +use chrono::{Utc, TimeZone}; use diesel::prelude::*; +use diesel::expression::any; use docopt::Docopt; use flate2::read::GzDecoder; +use itertools::Itertools; use std::io::{Cursor, Read}; use std::path::Path; use std::thread; @@ -33,7 +38,7 @@ use cargo_registry::{Config, Version}; use cargo_registry::schema::*; use cargo_registry::render::markdown_to_html; -const DEFAULT_PAGE_SIZE: i64 = 25; +const DEFAULT_PAGE_SIZE: usize = 25; const USAGE: &'static str = " Usage: render-readmes [options] render-readmes --help @@ -41,11 +46,13 @@ Usage: render-readmes [options] Options: -h, --help Show this message. --page-size NUM How many versions should be queried and processed at a time. + --older-than DATE Only rerender readmes that are older than this date. "; #[derive(Deserialize)] struct Args { - flag_page_size: Option, + flag_page_size: Option, + flag_older_than: Option, } fn main() { @@ -54,25 +61,50 @@ fn main() { .unwrap_or_else(|e| e.exit()); let config: Config = Default::default(); let conn = cargo_registry::db::connect_now().unwrap(); - let versions_count = versions::table.count().get_result::(&conn).expect( - "error counting versions", - ); - let page_size = args.flag_page_size.unwrap_or(DEFAULT_PAGE_SIZE); + let start_time = Utc::now(); - let pages = if versions_count % page_size == 0 { - versions_count / page_size + let older_than = if let Some(ref time) = args.flag_older_than { + Utc.datetime_from_str(&time, "%Y-%m-%d %H:%M:%S") + .expect("Could not parse --older-than argument as a time") } else { - versions_count / page_size + 1 + start_time }; - for current_page in 0..pages { - let versions: Vec<(Version, String)> = versions::table + let older_than = older_than.naive_utc(); + + println!("Start time: {}", start_time); + println!("Rendering readmes older than: {}", older_than); + + let version_ids = versions::table + .inner_join(readme_rendering::table) + .filter(readme_rendering::rendered_at.lt(older_than)) + .select(versions::id) + .load::<(i32)>(&conn) + .expect("error loading version ids"); + + let total_versions = version_ids.len(); + println!("Rendering {} versions", total_versions); + + let page_size = args.flag_page_size.unwrap_or(DEFAULT_PAGE_SIZE); + + let total_pages = total_versions / page_size; + let total_pages = if total_versions % page_size == 0 { total_pages } else { total_pages + 1 }; + + let mut page_num = 0; + + for version_ids_chunk in &version_ids.into_iter().chunks(page_size) { + page_num += 1; + println!("= Page {} of {} ==================================", page_num, total_pages); + + let ids: Vec<_> = version_ids_chunk.collect(); + + let versions = versions::table .inner_join(crates::table) + .filter(versions::id.eq(any(ids))) .select((versions::all_columns, crates::name)) - .limit(page_size) - .offset(current_page * page_size) .load::<(Version, String)>(&conn) .expect("error loading versions"); + let mut tasks = Vec::with_capacity(page_size as usize); for (version, krate_name) in versions { let config = config.clone(); diff --git a/src/version.rs b/src/version.rs index 5793fce1a80..d54e3caad9f 100644 --- a/src/version.rs +++ b/src/version.rs @@ -24,6 +24,7 @@ use util::errors::CargoError; use util::{RequestUtils, CargoResult, human}; use license_exprs; +// Queryable has a custom implementation below #[derive(Clone, Identifiable, Associations, Debug)] #[belongs_to(Crate)] pub struct Version { @@ -71,8 +72,10 @@ pub struct VersionLinks { pub authors: String, } -#[derive(Insertable, Debug, Clone, Copy)] +#[derive(Insertable, Identifiable, Queryable, Associations, Debug, Clone, Copy)] +#[belongs_to(Version)] #[table_name = "readme_rendering"] +#[primary_key(version_id)] struct ReadmeRendering { version_id: i32, rendered_at: Timespec, From 43f6c00f2a0aa156c820ce6840c41220bc91fc24 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 14:32:36 -0400 Subject: [PATCH 06/10] Enable rendering readmes of a particular crate only This'll make it easier to debug and try fixing particular crates. --- src/bin/render-readmes.rs | 34 +++++++++++++++++++++++++++------- src/version.rs | 4 ++++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index 6c6c2db5594..5bca943e633 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -47,12 +47,14 @@ Options: -h, --help Show this message. --page-size NUM How many versions should be queried and processed at a time. --older-than DATE Only rerender readmes that are older than this date. + --crate NAME Only rerender readmes for the specified crate. "; #[derive(Deserialize)] struct Args { flag_page_size: Option, flag_older_than: Option, + flag_crate: Option, } fn main() { @@ -65,8 +67,9 @@ fn main() { let start_time = Utc::now(); let older_than = if let Some(ref time) = args.flag_older_than { - Utc.datetime_from_str(&time, "%Y-%m-%d %H:%M:%S") - .expect("Could not parse --older-than argument as a time") + Utc.datetime_from_str(&time, "%Y-%m-%d %H:%M:%S").expect( + "Could not parse --older-than argument as a time", + ) } else { start_time }; @@ -75,12 +78,21 @@ fn main() { println!("Start time: {}", start_time); println!("Rendering readmes older than: {}", older_than); - let version_ids = versions::table + let mut query = versions::table .inner_join(readme_rendering::table) + .inner_join(crates::table) .filter(readme_rendering::rendered_at.lt(older_than)) .select(versions::id) - .load::<(i32)>(&conn) - .expect("error loading version ids"); + .into_boxed(); + + if let Some(crate_name) = args.flag_crate { + println!("Rendering readmes for {}", crate_name); + query = query.filter(crates::name.eq(crate_name)); + } + + let version_ids = query.load::<(i32)>(&conn).expect( + "error loading version ids", + ); let total_versions = version_ids.len(); println!("Rendering {} versions", total_versions); @@ -88,13 +100,21 @@ fn main() { let page_size = args.flag_page_size.unwrap_or(DEFAULT_PAGE_SIZE); let total_pages = total_versions / page_size; - let total_pages = if total_versions % page_size == 0 { total_pages } else { total_pages + 1 }; + let total_pages = if total_versions % page_size == 0 { + total_pages + } else { + total_pages + 1 + }; let mut page_num = 0; for version_ids_chunk in &version_ids.into_iter().chunks(page_size) { page_num += 1; - println!("= Page {} of {} ==================================", page_num, total_pages); + println!( + "= Page {} of {} ==================================", + page_num, + total_pages + ); let ids: Vec<_> = version_ids_chunk.collect(); diff --git a/src/version.rs b/src/version.rs index d54e3caad9f..dbb1043fc78 100644 --- a/src/version.rs +++ b/src/version.rs @@ -24,6 +24,10 @@ use util::errors::CargoError; use util::{RequestUtils, CargoResult, human}; use license_exprs; +// This is necessary to allow joining version to both crates and readme_rendering +// in the render-readmes script. +enable_multi_table_joins!(crates, readme_rendering); + // Queryable has a custom implementation below #[derive(Clone, Identifiable, Associations, Debug)] #[belongs_to(Crate)] From e04ab26f37caef13cd6c6cd0904fb3f4acc917d3 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 15:23:00 -0400 Subject: [PATCH 07/10] Remove unnecessary parens --- src/version.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.rs b/src/version.rs index dbb1043fc78..08ff883ff29 100644 --- a/src/version.rs +++ b/src/version.rs @@ -152,11 +152,11 @@ impl Version { diesel::insert(&rendered.on_conflict( readme_rendering::version_id, do_update().set( - (readme_rendering::rendered_at.eq( + readme_rendering::rendered_at.eq( excluded( readme_rendering::rendered_at, ), - )), + ), ), )).into(readme_rendering::table) .execute(&*conn)?; From 4f9c1fa899b7a3767558ee02c9fcd124e3cdac40 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Aug 2017 16:00:58 -0400 Subject: [PATCH 08/10] cargo fmt --- src/version.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/version.rs b/src/version.rs index 08ff883ff29..b835d9674d3 100644 --- a/src/version.rs +++ b/src/version.rs @@ -151,13 +151,11 @@ impl Version { diesel::insert(&rendered.on_conflict( readme_rendering::version_id, - do_update().set( - readme_rendering::rendered_at.eq( - excluded( - readme_rendering::rendered_at, - ), + do_update().set(readme_rendering::rendered_at.eq( + excluded( + readme_rendering::rendered_at, ), - ), + )), )).into(readme_rendering::table) .execute(&*conn)?; Ok(()) From bd5f5f25222eb170a8f547586c4710a9cd51842a Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 26 Aug 2017 11:55:40 -0400 Subject: [PATCH 09/10] Use enumerate instead of a counter --- src/bin/render-readmes.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index 5bca943e633..be954c0c9f2 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -106,13 +106,16 @@ fn main() { total_pages + 1 }; - let mut page_num = 0; - - for version_ids_chunk in &version_ids.into_iter().chunks(page_size) { - page_num += 1; + for (page_num, version_ids_chunk) in + version_ids + .into_iter() + .chunks(page_size) + .into_iter() + .enumerate() + { println!( "= Page {} of {} ==================================", - page_num, + page_num + 1, total_pages ); From eee6c09f61cd5ed1027bc32e7d253c70e0fbb737 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 26 Aug 2017 13:34:34 -0400 Subject: [PATCH 10/10] Use indexed format args to reduce duplication --- src/bin/render-readmes.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index be954c0c9f2..c10ab0555a5 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -143,8 +143,7 @@ fn main() { return; } let readme = readme.unwrap(); - let readme_path = - format!("readmes/{}/{}-{}.html", krate_name, krate_name, version.num); + let readme_path = format!("readmes/{0}/{0}-{1}.html", krate_name, version.num); let readme_len = readme.len(); let mut body = Cursor::new(readme.into_bytes()); config