Skip to content

Commit 22f8632

Browse files
committed
Auto merge of #1693 - sgrif:sg-remove-crate-downloads, r=jtgeibel
Remove `crate_downloads` ⚠️ This PR contains destructive migrations. The commits in this PR must be deployed separately ⚠️ I've been looking through `pg:diagnose` on our new database, and we have two indexes on `crate_downloads` which are either unused or low scan, high write. I'd looked into these indexes previously and determined that we needed them (presumably this was before we introduced `recent_crate_downloads`), but seeing one flagged as unused was surprising so I gave this another look. It turns out the only place we're still using `crate_downloads` directly is a place that should have been using `recent_crate_downloads` anyway. With that code changed to use `recent_crate_downloads`, the only place that `crate_downloads` is used is to populate `recent_crate_downloads`. We can just as easily populate that view from `version_downloads` instead. It turns out that `crate_downloads` and its indexes accounts for 23% (!!!!) of our total database size, so there's a huge incentive to get rid of it. The only downside to doing this is that refreshing `recent_crate_downloads` takes longer if we do it on `version_downloads`. This is both due to the larger table size, and the lack of an index we can use for this (we could add an index to make it faster, but that's not worth it). When based on `version_downloads`, it takes 6 seconds instead of 600ms to populate. Ultimately this doesn't matter, since we do this off the web server anyway. Now that we're on PG 11, I also intend to partition the `version_downloads` table which will get our performance faster than 600ms when done.
2 parents 44060f3 + f6702c6 commit 22f8632

File tree

11 files changed

+69
-108
lines changed

11 files changed

+69
-108
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
DROP MATERIALIZED VIEW recent_crate_downloads;
2+
CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS
3+
SELECT crate_id, SUM(downloads) FROM crate_downloads
4+
WHERE date > date(CURRENT_TIMESTAMP - INTERVAL '90 days')
5+
GROUP BY crate_id;
6+
CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
DROP MATERIALIZED VIEW recent_crate_downloads;
2+
CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS
3+
SELECT crate_id, SUM(version_downloads.downloads) FROM version_downloads
4+
INNER JOIN versions
5+
ON version_downloads.version_id = versions.id
6+
WHERE version_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days')
7+
GROUP BY crate_id;
8+
CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id);
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CREATE TABLE crate_downloads (
2+
crate_id INTEGER NOT NULL REFERENCES crates (id),
3+
downloads INTEGER NOT NULL,
4+
date DATE NOT NULL,
5+
PRIMARY KEY (crate_id, date)
6+
);
7+
CREATE INDEX "index_crate_downloads_crate_id" ON crate_downloads (crate_id);
8+
CREATE INDEX "index_crate_downloads_date" ON crate_downloads (date);
9+
10+
INSERT INTO crate_downloads (crate_id, downloads, date)
11+
SELECT crate_id, sum(version_downloads.downloads), date
12+
FROM version_downloads
13+
INNER JOIN versions
14+
ON version_downloads.version_id = versions.id
15+
GROUP BY (crate_id, date);
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- ALERT! DESTRUCTIVE MIGRATION
2+
-- DO NOT DEPLOY UNTIL PREVIOUS COMMIT IS DEPLOYED
3+
DROP TABLE crate_downloads;

src/bin/update-downloads.rs

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ extern crate diesel;
66
use cargo_registry::{
77
db,
88
models::VersionDownload,
9-
schema::{crate_downloads, crates, metadata, version_downloads, versions},
9+
schema::{crates, metadata, version_downloads, versions},
1010
util::CargoResult,
1111
};
1212

@@ -53,7 +53,7 @@ fn update(conn: &PgConnection) -> QueryResult<()> {
5353
}
5454

5555
fn collect(conn: &PgConnection, rows: &[VersionDownload]) -> QueryResult<()> {
56-
use diesel::{insert_into, update};
56+
use diesel::update;
5757

5858
println!("updating {} versions", rows.len());
5959

@@ -77,18 +77,6 @@ fn collect(conn: &PgConnection, rows: &[VersionDownload]) -> QueryResult<()> {
7777
.set(crates::downloads.eq(crates::downloads + amt))
7878
.execute(conn)?;
7979

80-
// Update the total number of crate downloads for today
81-
insert_into(crate_downloads::table)
82-
.values((
83-
crate_downloads::crate_id.eq(crate_id),
84-
crate_downloads::downloads.eq(amt),
85-
crate_downloads::date.eq(download.date),
86-
))
87-
.on_conflict(crate_downloads::table.primary_key())
88-
.do_update()
89-
.set(crate_downloads::downloads.eq(crate_downloads::downloads + amt))
90-
.execute(conn)?;
91-
9280
// Now that everything else for this crate is done, update the global counter of total
9381
// downloads
9482
update(metadata::table)
@@ -144,16 +132,6 @@ mod test {
144132
(krate, version)
145133
}
146134

147-
macro_rules! crate_downloads {
148-
($conn:expr, $id:expr, $expected:expr) => {
149-
let dl = crate_downloads::table
150-
.filter(crate_downloads::crate_id.eq($id))
151-
.select(crate_downloads::downloads)
152-
.first($conn);
153-
assert_eq!(Ok($expected as i32), dl);
154-
};
155-
}
156-
157135
#[test]
158136
fn increment() {
159137
use diesel::dsl::*;
@@ -185,7 +163,6 @@ mod test {
185163
.select(crates::downloads)
186164
.first(&conn);
187165
assert_eq!(Ok(1), crate_downloads);
188-
crate_downloads!(&conn, krate.id, 1);
189166
crate::update(&conn).unwrap();
190167
let version_downloads = versions::table
191168
.find(version.id)
@@ -298,7 +275,6 @@ mod test {
298275
.unwrap();
299276
assert_eq!(krate2.downloads, 2);
300277
assert_eq!(krate2.updated_at, krate_before.updated_at);
301-
crate_downloads!(&conn, krate.id, 1);
302278
crate::update(&conn).unwrap();
303279
let version3 = versions::table
304280
.find(version.id)

src/controllers/krate/metadata.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
77
use crate::controllers::prelude::*;
88
use crate::models::{
9-
Category, Crate, CrateCategory, CrateDownload, CrateKeyword, CrateVersions, Keyword, User,
10-
Version,
9+
Category, Crate, CrateCategory, CrateKeyword, CrateVersions, Keyword, RecentCrateDownloads,
10+
User, Version,
1111
};
1212
use crate::schema::*;
1313
use crate::views::{
@@ -101,8 +101,6 @@ pub fn summary(req: &mut dyn Request) -> CargoResult<Response> {
101101

102102
/// Handles the `GET /crates/:crate_id` route.
103103
pub fn show(req: &mut dyn Request) -> CargoResult<Response> {
104-
use diesel::dsl::*;
105-
106104
let name = &req.params()["crate_id"];
107105
let conn = req.db_conn()?;
108106
let krate = Crate::by_name(name).first::<Crate>(&*conn)?;
@@ -123,10 +121,10 @@ pub fn show(req: &mut dyn Request) -> CargoResult<Response> {
123121
.inner_join(categories::table)
124122
.select(categories::all_columns)
125123
.load(&*conn)?;
126-
let recent_downloads = CrateDownload::belonging_to(&krate)
127-
.filter(crate_downloads::date.gt(date(now - 90.days())))
128-
.select(sum(crate_downloads::downloads))
129-
.get_result(&*conn)?;
124+
let recent_downloads = RecentCrateDownloads::belonging_to(&krate)
125+
.select(recent_crate_downloads::downloads)
126+
.get_result(&*conn)
127+
.optional()?;
130128

131129
let badges = badges::table
132130
.filter(badges::crate_id.eq(krate.id))

src/models.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ pub use self::download::VersionDownload;
66
pub use self::email::{Email, NewEmail};
77
pub use self::follow::Follow;
88
pub use self::keyword::{CrateKeyword, Keyword};
9-
pub use self::krate::{Crate, CrateDownload, CrateVersions, NewCrate};
9+
pub use self::krate::{Crate, CrateVersions, NewCrate, RecentCrateDownloads};
1010
pub use self::owner::{CrateOwner, Owner, OwnerKind};
1111
pub use self::rights::Rights;
1212
pub use self::team::{NewTeam, Team};

src/models/krate.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use chrono::{NaiveDate, NaiveDateTime};
1+
use chrono::NaiveDateTime;
22
use diesel::associations::Identifiable;
33
use diesel::pg::Pg;
44
use diesel::prelude::*;
@@ -20,14 +20,13 @@ use crate::schema::*;
2020
/// and are possibly of malicious intent e.g. ad tracking networks, etc.
2121
const DOCUMENTATION_BLOCKLIST: [&str; 1] = ["rust-ci.org"];
2222

23-
#[derive(Debug, Insertable, Queryable, Identifiable, Associations, AsChangeset, Clone, Copy)]
23+
#[derive(Debug, Queryable, Identifiable, Associations, Clone, Copy)]
2424
#[belongs_to(Crate)]
25-
#[primary_key(crate_id, date)]
26-
#[table_name = "crate_downloads"]
27-
pub struct CrateDownload {
25+
#[primary_key(crate_id)]
26+
#[table_name = "recent_crate_downloads"]
27+
pub struct RecentCrateDownloads {
2828
pub crate_id: i32,
2929
pub downloads: i32,
30-
pub date: NaiveDate,
3130
}
3231

3332
#[derive(Debug, Clone, Queryable, Identifiable, Associations, AsChangeset, QueryableByName)]

src/schema.patch

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,10 @@ index df884e4..18e08cd 100644
5555

5656
/// Representation of the `reserved_crate_names` table.
5757
///
58-
@@ -881,22 +901,24 @@ table! {
58+
@@ -881,21 +901,23 @@ table! {
5959
}
6060

6161
joinable!(api_tokens -> users (user_id));
62-
joinable!(crate_downloads -> crates (crate_id));
6362
joinable!(crate_owner_invitations -> crates (crate_id));
6463
joinable!(crate_owners -> crates (crate_id));
6564
-joinable!(crate_owners -> users (created_by));

src/schema.rs

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -176,35 +176,6 @@ table! {
176176
}
177177
}
178178

179-
table! {
180-
use diesel::sql_types::*;
181-
use diesel_full_text_search::{TsVector as Tsvector};
182-
183-
/// Representation of the `crate_downloads` table.
184-
///
185-
/// (Automatically generated by Diesel.)
186-
crate_downloads (crate_id, date) {
187-
/// The `crate_id` column of the `crate_downloads` table.
188-
///
189-
/// Its SQL type is `Int4`.
190-
///
191-
/// (Automatically generated by Diesel.)
192-
crate_id -> Int4,
193-
/// The `downloads` column of the `crate_downloads` table.
194-
///
195-
/// Its SQL type is `Int4`.
196-
///
197-
/// (Automatically generated by Diesel.)
198-
downloads -> Int4,
199-
/// The `date` column of the `crate_downloads` table.
200-
///
201-
/// Its SQL type is `Date`.
202-
///
203-
/// (Automatically generated by Diesel.)
204-
date -> Date,
205-
}
206-
}
207-
208179
table! {
209180
use diesel::sql_types::*;
210181
use diesel_full_text_search::{TsVector as Tsvector};
@@ -956,7 +927,6 @@ table! {
956927
}
957928

958929
joinable!(api_tokens -> users (user_id));
959-
joinable!(crate_downloads -> crates (crate_id));
960930
joinable!(crate_owner_invitations -> crates (crate_id));
961931
joinable!(crate_owners -> crates (crate_id));
962932
joinable!(crate_owners -> teams (owner_id));
@@ -984,7 +954,6 @@ allow_tables_to_appear_in_same_query!(
984954
background_jobs,
985955
badges,
986956
categories,
987-
crate_downloads,
988957
crate_owner_invitations,
989958
crate_owners,
990959
crates,

src/tests/builders.rs

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
//! Structs using the builder pattern that make it easier to create records in tests.
22
33
use cargo_registry::{
4-
models::{Crate, CrateDownload, Keyword, NewCrate, NewVersion, Version},
5-
schema::{crate_downloads, dependencies, versions},
4+
models::{Crate, Keyword, NewCrate, NewVersion, Version},
5+
schema::{crates, dependencies, version_downloads, versions},
66
util::CargoResult,
77
views::krate_publish as u,
88
};
99
use std::{collections::HashMap, io::Read};
1010

11-
use chrono::Utc;
1211
use diesel::prelude::*;
1312
use flate2::{write::GzEncoder, Compression};
1413

@@ -240,45 +239,34 @@ impl<'a> CrateBuilder<'a> {
240239
// Since we are using `NewCrate`, we can't set all the
241240
// crate properties in a single DB call.
242241

243-
let old_downloads = self.downloads.unwrap_or(0) - self.recent_downloads.unwrap_or(0);
244-
let now = Utc::now();
245-
let old_date = now.naive_utc().date() - chrono::Duration::days(91);
246-
247242
if let Some(downloads) = self.downloads {
248-
let crate_download = CrateDownload {
249-
crate_id: krate.id,
250-
downloads: old_downloads,
251-
date: old_date,
252-
};
253-
254-
insert_into(crate_downloads::table)
255-
.values(&crate_download)
256-
.execute(connection)?;
257-
krate.downloads = downloads;
258-
update(&krate).set(&krate).execute(connection)?;
259-
}
260-
261-
if self.recent_downloads.is_some() {
262-
let crate_download = CrateDownload {
263-
crate_id: krate.id,
264-
downloads: self.recent_downloads.unwrap(),
265-
date: now.naive_utc().date(),
266-
};
267-
268-
insert_into(crate_downloads::table)
269-
.values(&crate_download)
270-
.execute(connection)?;
271-
272-
no_arg_sql_function!(refresh_recent_crate_downloads, ());
273-
select(refresh_recent_crate_downloads).execute(connection)?;
243+
krate = update(&krate)
244+
.set(crates::downloads.eq(downloads))
245+
.returning(cargo_registry::models::krate::ALL_COLUMNS)
246+
.get_result(connection)?;
274247
}
275248

276249
if self.versions.is_empty() {
277250
self.versions.push(VersionBuilder::new("0.99.0"));
278251
}
279252

253+
let mut last_version_id = 0;
280254
for version_builder in self.versions {
281-
version_builder.build(krate.id, self.owner_id, connection)?;
255+
last_version_id = version_builder
256+
.build(krate.id, self.owner_id, connection)?
257+
.id;
258+
}
259+
260+
if let Some(downloads) = self.recent_downloads {
261+
insert_into(version_downloads::table)
262+
.values((
263+
version_downloads::version_id.eq(last_version_id),
264+
version_downloads::downloads.eq(downloads),
265+
))
266+
.execute(connection)?;
267+
268+
no_arg_sql_function!(refresh_recent_crate_downloads, ());
269+
select(refresh_recent_crate_downloads).execute(connection)?;
282270
}
283271

284272
if !self.keywords.is_empty() {

0 commit comments

Comments
 (0)