Skip to content

Fix keywords that weren't lowercased before inserting #919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions migrations/20170728002039_fix_keywords/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-- Not reversible
68 changes: 68 additions & 0 deletions migrations/20170728002039_fix_keywords/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
-- For all keywords that have a corresponding lowercased keyword,
-- (these keywords should not have been created but there was a bug
-- that created them; that has been fixed and no more are being created)
WITH messed_up_keywords AS (
select keywords.id as upper_id, k.id as lower_id
from keywords
inner join keywords as k on LOWER(keywords.keyword) = k.keyword
where LOWER(keywords.keyword) != keywords.keyword
)
-- Find all the crates that use the uppercased keyword BUT NOT the lowercased keyword
-- (many crates are associated with both lower and upper cased because of a bug)
, messed_up_crates AS (
select crate_id, upper_id, lower_id
from crates_keywords
inner join messed_up_keywords on crates_keywords.keyword_id = messed_up_keywords.upper_id
where messed_up_keywords.lower_id not in (
select keyword_id
from crates_keywords as ck
where ck.crate_id = crates_keywords.crate_id
)
)
-- Associate these crates with the lowercased keyword AS WELL AS the uppercased keyword
INSERT INTO crates_keywords (crate_id, keyword_id)
SELECT crate_id, lower_id as keyword_id
FROM messed_up_crates
;

-- For all keywords that have a corresponding lowercased keyword,
-- (this is repeated exactly from above)
WITH messed_up_keywords AS (
select keywords.id as upper_id, k.id as lower_id
from keywords
inner join keywords as k on LOWER(keywords.keyword) = k.keyword
where LOWER(keywords.keyword) != keywords.keyword
)
-- Delete records associating crates to the uppercased keyword
DELETE
FROM crates_keywords
WHERE crates_keywords.keyword_id IN (
SELECT upper_id FROM messed_up_keywords
)
;

-- For all keywords that have a corresponding lowercased keyword,
-- (this is repeated exactly from above)
WITH messed_up_keywords AS (
select keywords.id as upper_id, k.id as lower_id
from keywords
inner join keywords as k on LOWER(keywords.keyword) = k.keyword
where LOWER(keywords.keyword) != keywords.keyword
)
-- Delete the uppercased keyword
-- No more crates should be associated with these keywords because of
-- the previous delete.
DELETE
FROM keywords
WHERE keywords.id IN (
SELECT upper_id FROM messed_up_keywords
)
;

-- For all keywords that are not properly lowercased but do not
-- have a corresponding lowercased keyword, update them to be
-- lower cased, preserving any crate associations using them.
UPDATE keywords
SET keyword = lower(keyword)
WHERE keyword != lower(keyword)
;
44 changes: 43 additions & 1 deletion src/keyword.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl Keyword {
.execute(conn)?;
}
keywords::table
.filter(::lower(keywords::keyword).eq(any(&lowercase_names)))
.filter(keywords::keyword.eq(any(&lowercase_names)))
.load(conn)
}

Expand Down Expand Up @@ -189,3 +189,45 @@ pub fn show(req: &mut Request) -> CargoResult<Response> {
}
Ok(req.json(&R { keyword: kw.encodable() }))
}

#[cfg(test)]
mod tests {
use super::*;
use dotenv::dotenv;
use std::env;
use diesel;
use diesel::connection::SimpleConnection;

#[derive(Insertable)]
#[table_name = "keywords"]
struct NewKeyword<'a> {
keyword: &'a str,
}

fn pg_connection() -> PgConnection {
let _ = dotenv();
let database_url =
env::var("TEST_DATABASE_URL").expect("TEST_DATABASE_URL must be set to run tests");
let conn = PgConnection::establish(&database_url).unwrap();
// These tests deadlock if run concurrently
conn.batch_execute("BEGIN;").unwrap();
conn
}

#[test]
fn dont_associate_with_non_lowercased_keywords() {
let conn = pg_connection();
// The code should be preventing lowercased keywords from existing,
// but if one happens to sneak in there, don't associate crates with it.
let bad_keyword = NewKeyword { keyword: "NO" };

diesel::insert(&bad_keyword)
.into(keywords::table)
.execute(&conn)
.unwrap();

let associated = Keyword::find_or_create_all(&conn, &["no"]).unwrap();
assert_eq!(associated.len(), 1);
assert_eq!(associated.first().unwrap().keyword, "no");
}
}