Skip to content

Commit 12c4632

Browse files
Unify more code to remove even more duplication
1 parent 82ac3a4 commit 12c4632

File tree

6 files changed

+294
-300
lines changed

6 files changed

+294
-300
lines changed

src/db/migrate.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> CratesfyiResult<(
644644
REFERENCES repositories(id) ON DELETE SET NULL;
645645
646646
INSERT INTO repositories(host, host_id, name, description, last_commit, stars, forks, issues, updated_at)
647-
SELECT 'github', id, name, description, last_commit, stars, forks, issues, updated_at
647+
SELECT 'github.com', id, name, description, last_commit, stars, forks, issues, updated_at
648648
FROM github_repos;
649649
650650
UPDATE releases
@@ -681,12 +681,12 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> CratesfyiResult<(
681681
682682
INSERT INTO github_repos(id, name, description, last_commit, stars, forks, issues, updated_at)
683683
SELECT host_id, name, description, last_commit, stars, forks, issues, updated_at
684-
FROM repositories WHERE repositories.host = 'github';
684+
FROM repositories WHERE repositories.host = 'github.com';
685685
686686
UPDATE releases
687687
SET github_repo = repositories.host_id
688688
FROM repositories
689-
WHERE repositories.host_id = releases.github_repo AND releases.repository IS NOT NULL AND repositories.host = 'github';
689+
WHERE repositories.host_id = releases.github_repo AND releases.repository IS NOT NULL AND repositories.host = 'github.com';
690690
691691
DROP INDEX releases_github_repo_idx;
692692
DROP INDEX github_repos_stars_idx;

src/test/fakes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ impl FakeGithubStats {
372372

373373
let data = conn.query_one(
374374
"INSERT INTO repositories (host, host_id, name, description, last_commit, stars, forks, issues, updated_at)
375-
VALUES ('github', $1, $2, 'Fake description!', NOW(), $3, $4, $5, NOW())
375+
VALUES ('github.com', $1, $2, 'Fake description!', NOW(), $3, $4, $5, NOW())
376376
RETURNING id;",
377377
&[&host_id, &self.repo, &self.stars, &self.forks, &self.issues],
378378
)?;

src/utils/github_updater.rs

Lines changed: 75 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
use crate::error::Result;
22
use crate::{db::Pool, Config};
33
use chrono::{DateTime, Utc};
4-
use log::{debug, info, trace, warn};
5-
use once_cell::sync::Lazy;
4+
use log::{info, trace, warn};
65
use postgres::Client;
7-
use regex::Regex;
86
use reqwest::{
97
blocking::Client as HttpClient,
108
header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT},
119
};
1210
use serde::Deserialize;
13-
use std::collections::HashSet;
1411
use std::sync::Arc;
1512

16-
use crate::utils::{RepositoryName, Updater, APP_USER_AGENT};
13+
use crate::utils::{Updater, APP_USER_AGENT};
1714

1815
const GRAPHQL_UPDATE: &str = "query($ids: [ID!]!) {
1916
nodes(ids: $ids) {
@@ -81,42 +78,6 @@ impl Updater for GithubUpdater {
8178
}))
8279
}
8380

84-
fn backfill_repositories(&self) -> Result<()> {
85-
info!("started backfilling GitHub repository stats");
86-
87-
let mut conn = self.pool.get()?;
88-
let needs_backfilling = conn.query(
89-
"SELECT releases.id, crates.name, releases.version, releases.repository_url
90-
FROM releases
91-
INNER JOIN crates ON (crates.id = releases.crate_id)
92-
WHERE repository IS NULL AND repository_url LIKE '%github.com%';",
93-
&[],
94-
)?;
95-
96-
let mut missing_urls = HashSet::new();
97-
for row in &needs_backfilling {
98-
let id: i32 = row.get("id");
99-
let name: String = row.get("name");
100-
let version: String = row.get("version");
101-
let url: String = row.get("repository_url");
102-
103-
if missing_urls.contains(&url) {
104-
debug!("{} {} points to a known missing repo", name, version);
105-
} else if let Some(node_id) = self.load_repository(&mut conn, &url)? {
106-
conn.execute(
107-
"UPDATE releases SET repository = $1 WHERE id = $2;",
108-
&[&node_id, &id],
109-
)?;
110-
info!("backfilled GitHub repository for {} {}", name, version);
111-
} else {
112-
debug!("{} {} does not point to a GitHub repository", name, version);
113-
missing_urls.insert(url);
114-
}
115-
}
116-
117-
Ok(())
118-
}
119-
12081
fn load_repository(&self, conn: &mut Client, url: &str) -> Result<Option<i32>> {
12182
let name = match Self::repository_name(url) {
12283
Some(name) => name,
@@ -125,8 +86,8 @@ impl Updater for GithubUpdater {
12586

12687
// Avoid querying the GitHub API for repositories we already loaded.
12788
if let Some(row) = conn.query_opt(
128-
"SELECT id FROM repositories WHERE name = $1 AND host = 'github' LIMIT 1;",
129-
&[&format!("{}/{}", name.owner, name.repo)],
89+
"SELECT id FROM repositories WHERE name = $1 AND host = $2 LIMIT 1;",
90+
&[&format!("{}/{}", name.owner, name.repo), &name.host],
13091
)? {
13192
return Ok(Some(row.get("id")));
13293
}
@@ -140,7 +101,17 @@ impl Updater for GithubUpdater {
140101
}),
141102
)?;
142103
if let Some(repo) = response.data.repository {
143-
Ok(Some(self.store_repository(conn, &repo)?))
104+
Ok(Some(self.store_repository(
105+
conn,
106+
Self::hosts()[0],
107+
&repo.id,
108+
&repo.name_with_owner,
109+
&repo.description,
110+
&repo.pushed_at,
111+
repo.stargazer_count,
112+
repo.fork_count,
113+
repo.issues.total_count,
114+
)?))
144115
} else if let Some(error) = response.errors.get(0) {
145116
use GraphErrorPath::*;
146117
match (error.error_type.as_str(), error.path.as_slice()) {
@@ -156,55 +127,52 @@ impl Updater for GithubUpdater {
156127
fn update_all_crates(&self) -> Result<()> {
157128
info!("started updating GitHub repository stats");
158129

130+
let mut updated = 0;
159131
let mut conn = self.pool.get()?;
160-
let needs_update = conn
161-
.query(
162-
"SELECT host_id
163-
FROM repositories
164-
WHERE host = 'github' AND updated_at < NOW() - INTERVAL '1 day';",
165-
&[],
166-
)?
167-
.into_iter()
168-
.map(|row| row.get(0))
169-
.collect::<Vec<String>>();
170-
171-
if needs_update.is_empty() {
172-
info!("no GitHub repository stats needed to be updated");
173-
return Ok(());
174-
}
175-
176-
for chunk in needs_update.chunks(UPDATE_CHUNK_SIZE) {
177-
if let Err(err) = self.update_repositories(&mut conn, &chunk) {
178-
if err.downcast_ref::<RateLimitReached>().is_some() {
179-
warn!("rate limit reached, blocked the GitHub repository stats updater");
180-
return Ok(());
132+
for host in Self::hosts() {
133+
let needs_update = conn
134+
.query(
135+
"SELECT host_id
136+
FROM repositories
137+
WHERE host = $1 AND updated_at < NOW() - INTERVAL '1 day';",
138+
&[&host],
139+
)?
140+
.into_iter()
141+
.map(|row| row.get(0))
142+
.collect::<Vec<String>>();
143+
144+
for chunk in needs_update.chunks(UPDATE_CHUNK_SIZE) {
145+
if let Err(err) = self.update_repositories(&mut conn, &chunk) {
146+
if err.downcast_ref::<RateLimitReached>().is_some() {
147+
warn!("rate limit reached, blocked the GitHub repository stats updater");
148+
return Ok(());
149+
}
150+
return Err(err);
181151
}
182-
return Err(err);
183152
}
153+
154+
updated += needs_update.len();
184155
}
185156

186-
info!("finished updating GitHub repository stats");
157+
if updated == 0 {
158+
info!("no GitHub repository stats needed to be updated");
159+
} else {
160+
info!("finished updating GitHub repository stats");
161+
}
187162
Ok(())
188163
}
189164

190-
fn repository_name(url: &str) -> Option<RepositoryName> {
191-
static RE: Lazy<Regex> = Lazy::new(|| {
192-
Regex::new(r"https?://(www.)?github\.com/(?P<owner>[\w\._-]+)/(?P<repo>[\w\._-]+)")
193-
.unwrap()
194-
});
195-
196-
let cap = RE.captures(url)?;
197-
let owner = cap.name("owner").expect("missing group 'owner'").as_str();
198-
let repo = cap.name("repo").expect("missing group 'repo'").as_str();
199-
Some(RepositoryName {
200-
owner,
201-
repo: repo.strip_suffix(".git").unwrap_or(repo),
202-
})
203-
}
204-
205165
fn name() -> &'static str {
206166
"Github"
207167
}
168+
169+
fn hosts() -> &'static [&'static str] {
170+
&["github.com"]
171+
}
172+
173+
fn pool(&self) -> &Pool {
174+
&self.pool
175+
}
208176
}
209177

210178
impl GithubUpdater {
@@ -226,19 +194,31 @@ impl GithubUpdater {
226194
return Err(RateLimitReached.into());
227195
}
228196

197+
let host = Self::hosts()[0];
198+
229199
// When a node is missing (for example if the repository was deleted or made private) the
230200
// GraphQL API will return *both* a `null` instead of the data in the nodes list and a
231201
// `NOT_FOUND` error in the errors list.
232202
for node in &response.data.nodes {
233203
if let Some(node) = node {
234-
self.store_repository(conn, &node)?;
204+
self.store_repository(
205+
conn,
206+
host,
207+
&node.id,
208+
&node.name_with_owner,
209+
&node.description,
210+
&node.pushed_at,
211+
node.stargazer_count,
212+
node.fork_count,
213+
node.issues.total_count,
214+
)?;
235215
}
236216
}
237217
for error in &response.errors {
238218
use GraphErrorPath::*;
239219
match (error.error_type.as_str(), error.path.as_slice()) {
240220
("NOT_FOUND", [Segment(nodes), Index(idx)]) if nodes == "nodes" => {
241-
self.delete_repository(conn, &node_ids[*idx as usize], "github")?;
221+
self.delete_repository(conn, &node_ids[*idx as usize], host)?;
242222
}
243223
_ => failure::bail!("error updating repositories: {}", error.message),
244224
}
@@ -263,38 +243,6 @@ impl GithubUpdater {
263243
.error_for_status()?
264244
.json()?)
265245
}
266-
267-
fn store_repository(&self, conn: &mut Client, repo: &GraphRepository) -> Result<i32> {
268-
trace!(
269-
"storing GitHub repository stats for {}",
270-
repo.name_with_owner
271-
);
272-
let data = conn.query_one(
273-
"INSERT INTO repositories (
274-
host, host_id, name, description, last_commit, stars, forks, issues, updated_at
275-
) VALUES ('github', $1, $2, $3, $4, $5, $6, $7, NOW())
276-
ON CONFLICT (host, host_id) DO
277-
UPDATE SET
278-
name = $2,
279-
description = $3,
280-
last_commit = $4,
281-
stars = $5,
282-
forks = $6,
283-
issues = $7,
284-
updated_at = NOW()
285-
RETURNING id;",
286-
&[
287-
&repo.id,
288-
&repo.name_with_owner,
289-
&repo.description,
290-
&repo.pushed_at,
291-
&(repo.stargazer_count as i32),
292-
&(repo.fork_count as i32),
293-
&(repo.issues.total_count as i32),
294-
],
295-
)?;
296-
Ok(data.get(0))
297-
}
298246
}
299247

300248
#[derive(Debug, failure::Fail)]
@@ -361,29 +309,31 @@ struct GraphIssues {
361309
#[cfg(test)]
362310
mod test {
363311
use super::*;
312+
use crate::utils::RepositoryName;
364313

365314
#[test]
366315
fn test_repository_name() {
367316
macro_rules! assert_name {
368-
($url:expr => ($owner:expr, $repo: expr)) => {
317+
($url:expr => ($owner:expr, $repo:expr, $host:expr)) => {
369318
assert_eq!(
370319
GithubUpdater::repository_name($url),
371320
Some(RepositoryName {
372321
owner: $owner,
373-
repo: $repo
322+
repo: $repo,
323+
host: $host,
374324
})
375325
);
376326
};
377327
}
378328

379-
assert_name!("https://github.com/onur/cratesfyi" => ("onur", "cratesfyi"));
380-
assert_name!("http://github.com/onur/cratesfyi" => ("onur", "cratesfyi"));
381-
assert_name!("https://www.github.com/onur/cratesfyi" => ("onur", "cratesfyi"));
382-
assert_name!("http://www.github.com/onur/cratesfyi" => ("onur", "cratesfyi"));
383-
assert_name!("https://github.com/onur/cratesfyi.git" => ("onur", "cratesfyi"));
384-
assert_name!("https://github.com/docopt/docopt.rs" => ("docopt", "docopt.rs"));
329+
assert_name!("https://github.com/onur/cratesfyi" => ("onur", "cratesfyi", "github.com"));
330+
assert_name!("http://github.com/onur/cratesfyi" => ("onur", "cratesfyi", "github.com"));
331+
assert_name!("https://www.github.com/onur/cratesfyi" => ("onur", "cratesfyi", "www.github.com"));
332+
assert_name!("http://www.github.com/onur/cratesfyi" => ("onur", "cratesfyi", "www.github.com"));
333+
assert_name!("https://github.com/onur/cratesfyi.git" => ("onur", "cratesfyi", "github.com"));
334+
assert_name!("https://github.com/docopt/docopt.rs" => ("docopt", "docopt.rs", "github.com"));
385335
assert_name!("https://github.com/onur23cmD_M_R_L_/crates_fy-i" => (
386-
"onur23cmD_M_R_L_", "crates_fy-i"
336+
"onur23cmD_M_R_L_", "crates_fy-i", "github.com"
387337
));
388338
}
389339
}

0 commit comments

Comments
 (0)