Skip to content

Commit 59a4ca9

Browse files
committed
Auto merge of #1694 - sgrif:sg-remove-pk-from-version-downloads, r=jtgeibel
Remove the `id` column from `version_downloads` :warning: This PR contains destructive migrations. Each commit must be deployed separately :warning: The `version_downloads` table and its indexes account for a whopping 69% of our total database size. The primary key alone is nearly 1GB, or 25% of our total database. This is never really used for anything besides fetching in batches in `update-downloads`. We could just do batching based on crate_id and date, but I've opted to remove the batching entirely. We usually process somewhere between 2k and 3k rows every time it runs -- we'd have to process over 50k rows to even consume 1MB of memory, so we can just load them all at once. With our code changed to never remove the column, we can just completely remove it. This brings the size of the table down to 1600MB total, or 1256MB after a full vacuum. Since Diesel always runs migrations in a transaction, and we can't add an index concurrently inside a transaction, the `down.sql` included is only for development/tests. If we need to roll this migration back in production, the column will need to be added/populated and the indexes created manually in a way that doesn't lock the table. If we were to run the blocking version included in down.sql against production, the table would be locked for multiple minutes, which would take some pages down and make the download endpoint painfully slow. I don't anticipate needing to roll this migration back.
2 parents 22f8632 + 86ba333 commit 59a4ca9

File tree

7 files changed

+42
-28
lines changed

7 files changed

+42
-28
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
-- Diesel always runs migrations in a transaction, and we can't create indexes
2+
-- concurrently in a transaction. We can't block crate downloads on this, so
3+
-- just raise if we try to run this in production
4+
DO $$
5+
DECLARE
6+
tmp_index_exists boolean;
7+
BEGIN
8+
tmp_index_exists := (SELECT EXISTS (
9+
SELECT * FROM pg_index
10+
INNER JOIN pg_class
11+
ON pg_class.oid = pg_index.indexrelid
12+
WHERE relname = 'version_downloads_tmp'
13+
));
14+
IF NOT tmp_index_exists THEN
15+
IF (SELECT COUNT(*) FROM version_downloads) > 1000 THEN
16+
RAISE EXCEPTION 'Indexes need to be created concurrently in production, manually create them and try again';
17+
ELSE
18+
ALTER TABLE version_downloads ADD COLUMN id SERIAL;
19+
CREATE UNIQUE INDEX version_downloads_tmp ON version_downloads (id);
20+
CREATE UNIQUE INDEX version_downloads_unique ON version_downloads (version_id, date);
21+
CREATE UNIQUE INDEX index_version_downloads_not_processed ON version_downloads (id) WHERE processed = false;
22+
END IF;
23+
END IF;
24+
END $$;
25+
26+
ALTER TABLE version_downloads
27+
DROP CONSTRAINT version_downloads_pkey,
28+
ADD CONSTRAINT version_downloads_pkey PRIMARY KEY USING INDEX version_downloads_tmp;
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- ALERT! DESTRUCTIVE MIGRATION
2+
-- DO NOT DEPLOY UNTIL PREVIOUS COMMIT IS DEPLOYED
3+
ALTER TABLE version_downloads
4+
DROP CONSTRAINT version_downloads_pkey,
5+
ADD CONSTRAINT version_downloads_pkey PRIMARY KEY USING INDEX version_downloads_unique,
6+
DROP COLUMN id;

mirage/fixtures/version-downloads.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,16 @@ export default [
33
{
44
date: '2017-02-10T00:00:00Z',
55
downloads: 2,
6-
id: 3023900,
76
version: 40905,
87
},
98
{
109
date: '2017-02-10T00:00:00Z',
1110
downloads: 1,
12-
id: 3024236,
1311
version: 18445,
1412
},
1513
{
1614
date: '2017-02-11T00:00:00Z',
1715
downloads: 1,
18-
id: 3027018,
1916
version: 40905,
2017
},
2118
];

src/bin/update-downloads.rs

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ use cargo_registry::{
1212

1313
use diesel::prelude::*;
1414

15-
static LIMIT: i64 = 1000;
16-
1715
fn main() -> CargoResult<()> {
1816
let conn = db::connect_now()?;
1917
update(&conn)?;
@@ -25,18 +23,11 @@ fn update(conn: &PgConnection) -> QueryResult<()> {
2523
use diesel::dsl::now;
2624
use diesel::select;
2725

28-
let mut max = Some(0);
29-
while let Some(m) = max {
30-
let rows = version_downloads
31-
.filter(processed.eq(false))
32-
.filter(id.gt(m))
33-
.filter(downloads.ne(counted))
34-
.order(id)
35-
.limit(LIMIT)
36-
.load(conn)?;
37-
collect(conn, &rows)?;
38-
max = rows.last().map(|d| d.id);
39-
}
26+
let rows = version_downloads
27+
.filter(processed.eq(false))
28+
.filter(downloads.ne(counted))
29+
.load(conn)?;
30+
collect(conn, &rows)?;
4031

4132
// Anything older than 24 hours ago will be frozen and will not be queried
4233
// against again.
@@ -62,7 +53,7 @@ fn collect(conn: &PgConnection, rows: &[VersionDownload]) -> QueryResult<()> {
6253

6354
conn.transaction::<_, diesel::result::Error, _>(|| {
6455
// increment the number of counted downloads
65-
update(version_downloads::table.find(download.id))
56+
update(version_downloads::table.find(download.id()))
6657
.set(version_downloads::counted.eq(version_downloads::counted + amt))
6758
.execute(conn)?;
6859

src/models/download.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use crate::views::EncodableVersionDownload;
77

88
#[derive(Queryable, Identifiable, Associations, Debug, Clone, Copy)]
99
#[belongs_to(Version)]
10+
#[primary_key(version_id, date)]
1011
pub struct VersionDownload {
11-
pub id: i32,
1212
pub version_id: i32,
1313
pub downloads: i32,
1414
pub counted: i32,
@@ -34,7 +34,6 @@ impl VersionDownload {
3434

3535
pub fn encodable(self) -> EncodableVersionDownload {
3636
EncodableVersionDownload {
37-
id: self.id,
3837
version: self.version_id,
3938
downloads: self.downloads,
4039
date: self.date.to_string(),

src/schema.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -786,13 +786,7 @@ table! {
786786
/// Representation of the `version_downloads` table.
787787
///
788788
/// (Automatically generated by Diesel.)
789-
version_downloads (id) {
790-
/// The `id` column of the `version_downloads` table.
791-
///
792-
/// Its SQL type is `Int4`.
793-
///
794-
/// (Automatically generated by Diesel.)
795-
id -> Int4,
789+
version_downloads (version_id, date) {
796790
/// The `version_id` column of the `version_downloads` table.
797791
///
798792
/// Its SQL type is `Int4`.

src/views.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ pub struct EncodableDependency {
6666

6767
#[derive(Serialize, Deserialize, Debug)]
6868
pub struct EncodableVersionDownload {
69-
pub id: i32,
7069
pub version: i32,
7170
pub downloads: i32,
7271
pub date: String,

0 commit comments

Comments
 (0)