Skip to content

Commit 13f4d8c

Browse files
Big cleanup:
* remove DB interactions from updaters and move them into RepositoryStatsUpdater * rework Updater trait into a simpler RepositoryForge
1 parent ff3a08e commit 13f4d8c

File tree

7 files changed

+416
-541
lines changed

7 files changed

+416
-541
lines changed

src/bin/cratesfyi.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ impl DatabaseSubcommand {
423423
}
424424

425425
Self::UpdateRepositoryFields => {
426-
RepositoryStatsUpdater::update_all_crates(&ctx)?;
426+
RepositoryStatsUpdater::update_all_crates(&ctx.config()?, &ctx.pool()?)?;
427427
}
428428

429429
Self::BackfillRepositoryStats => {

src/docbuilder/rustwide_builder.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -653,12 +653,7 @@ impl RustwideBuilder {
653653
}
654654

655655
fn get_repo(&self, conn: &mut Client, metadata: &MetadataPackage) -> Result<Option<i32>> {
656-
RepositoryStatsUpdater::load_repository(
657-
conn,
658-
metadata,
659-
self.config.clone(),
660-
self.db.clone(),
661-
)
656+
RepositoryStatsUpdater::load_repository(conn, metadata, self.config.clone())
662657
}
663658
}
664659

src/repositories/github.rs

Lines changed: 60 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
use crate::error::Result;
2-
use crate::{db::Pool, Config};
2+
use crate::Config;
33
use chrono::{DateTime, Utc};
4-
use log::{info, trace, warn};
5-
use postgres::Client;
4+
use log::{trace, warn};
65
use reqwest::{
76
blocking::Client as HttpClient,
87
header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT},
98
};
109
use serde::Deserialize;
1110
use std::sync::Arc;
1211

13-
use crate::repositories::{Updater, APP_USER_AGENT};
12+
use crate::repositories::{
13+
FetchRepositoriesResult, Repository, RepositoryForge, RepositoryName, APP_USER_AGENT,
14+
};
1415

1516
const GRAPHQL_UPDATE: &str = "query($ids: [ID!]!) {
1617
nodes(ids: $ids) {
@@ -41,57 +42,54 @@ const GRAPHQL_SINGLE: &str = "query($owner: String!, $repo: String!) {
4142
}
4243
}";
4344

44-
/// How many repositories to update in a single chunk. Values over 100 are probably going to be
45-
/// rejected by the GraphQL API.
46-
const UPDATE_CHUNK_SIZE: usize = 100;
47-
4845
pub struct GitHub {
4946
client: HttpClient,
50-
pool: Pool,
51-
config: Arc<Config>,
47+
github_updater_min_rate_limit: u32,
5248
}
5349

54-
impl Updater for GitHub {
50+
impl GitHub {
5551
/// Returns `Err` if the access token has invalid syntax (but *not* if it isn't authorized).
5652
/// Returns `Ok(None)` if there is no access token.
57-
fn new(config: Arc<Config>, pool: Pool) -> Result<Option<Self>> {
53+
pub fn new(config: &Arc<Config>) -> Result<Option<Self>> {
5854
let mut headers = HeaderMap::new();
5955
headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT));
6056
headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
6157

62-
if let Some(token) = &config.github_accesstoken {
58+
if let Some(ref token) = config.github_accesstoken {
6359
headers.insert(
6460
AUTHORIZATION,
6561
HeaderValue::from_str(&format!("token {}", token))?,
6662
);
6763
} else {
68-
warn!("did not collect GitHub stats as no token was provided");
64+
warn!("did not collect `github.com` stats as no token was provided");
6965
return Ok(None);
7066
}
7167

7268
let client = HttpClient::builder().default_headers(headers).build()?;
7369

7470
Ok(Some(GitHub {
7571
client,
76-
pool,
77-
config,
72+
github_updater_min_rate_limit: config.github_updater_min_rate_limit,
7873
}))
7974
}
75+
}
8076

81-
fn load_repository(&self, conn: &mut Client, url: &str) -> Result<Option<i32>> {
82-
let name = match Self::repository_name(url) {
83-
Some(name) => name,
84-
None => return Ok(None),
85-
};
77+
impl RepositoryForge for GitHub {
78+
fn host(&self) -> &str {
79+
"github.com"
80+
}
8681

87-
// Avoid querying the GitHub API for repositories we already loaded.
88-
if let Some(row) = conn.query_opt(
89-
"SELECT id FROM repositories WHERE name = $1 AND host = $2 LIMIT 1;",
90-
&[&format!("{}/{}", name.owner, name.repo), &name.host],
91-
)? {
92-
return Ok(Some(row.get("id")));
93-
}
82+
fn icon(&self) -> &str {
83+
"github"
84+
}
9485

86+
/// How many repositories to update in a single chunk. Values over 100 are probably going to be
87+
/// rejected by the GraphQL API.
88+
fn chunk_size(&self) -> usize {
89+
100
90+
}
91+
92+
fn fetch_repository(&self, name: &RepositoryName) -> Result<Option<Repository>> {
9593
// Fetch the latest information from the GitHub API.
9694
let response: GraphResponse<GraphRepositoryNode> = self.graphql(
9795
GRAPHQL_SINGLE,
@@ -101,82 +99,21 @@ impl Updater for GitHub {
10199
}),
102100
)?;
103101
if let Some(repo) = response.data.repository {
104-
Ok(Some(self.store_repository(
105-
conn,
106-
Self::hosts()[0],
107-
&repo.id,
108-
&repo.name_with_owner,
109-
&repo.description,
110-
&repo.pushed_at,
111-
repo.stargazer_count,
112-
repo.fork_count,
113-
repo.issues.total_count,
114-
)?))
115-
} else if let Some(error) = response.errors.get(0) {
116-
use GraphErrorPath::*;
117-
match (error.error_type.as_str(), error.path.as_slice()) {
118-
("NOT_FOUND", [Segment(repository)]) if repository == "repository" => Ok(None),
119-
_ => failure::bail!("error loading repository: {}", error.message),
120-
}
121-
} else {
122-
panic!("missing repository but there were no errors!");
123-
}
124-
}
125-
126-
/// Updates github fields in crates table
127-
fn update_all_crates(&self) -> Result<()> {
128-
info!("started updating GitHub repository stats");
129-
130-
let mut updated = 0;
131-
let mut conn = self.pool.get()?;
132-
for host in Self::hosts() {
133-
let needs_update = conn
134-
.query(
135-
"SELECT host_id
136-
FROM repositories
137-
WHERE host = $1 AND updated_at < NOW() - INTERVAL '1 day';",
138-
&[&host],
139-
)?
140-
.into_iter()
141-
.map(|row| row.get(0))
142-
.collect::<Vec<String>>();
143-
144-
for chunk in needs_update.chunks(UPDATE_CHUNK_SIZE) {
145-
if let Err(err) = self.update_repositories(&mut conn, &chunk) {
146-
if err.downcast_ref::<RateLimitReached>().is_some() {
147-
warn!("rate limit reached, blocked the GitHub repository stats updater");
148-
return Ok(());
149-
}
150-
return Err(err);
151-
}
152-
}
153-
154-
updated += needs_update.len();
155-
}
156-
157-
if updated == 0 {
158-
info!("no GitHub repository stats needed to be updated");
102+
Ok(Some(Repository {
103+
id: repo.id,
104+
name_with_owner: repo.name_with_owner,
105+
description: repo.description,
106+
last_activity_at: repo.pushed_at,
107+
stars: repo.stargazer_count,
108+
forks: repo.fork_count,
109+
issues: repo.issues.total_count,
110+
}))
159111
} else {
160-
info!("finished updating GitHub repository stats");
112+
Ok(None)
161113
}
162-
Ok(())
163-
}
164-
165-
fn name() -> &'static str {
166-
"Github"
167114
}
168115

169-
fn hosts() -> &'static [&'static str] {
170-
&["github.com"]
171-
}
172-
173-
fn pool(&self) -> &Pool {
174-
&self.pool
175-
}
176-
}
177-
178-
impl GitHub {
179-
fn update_repositories(&self, conn: &mut Client, node_ids: &[String]) -> Result<()> {
116+
fn fetch_repositories(&self, node_ids: &[String]) -> Result<FetchRepositoriesResult> {
180117
let response: GraphResponse<GraphNodes<Option<GraphRepository>>> = self.graphql(
181118
GRAPHQL_UPDATE,
182119
serde_json::json!({
@@ -190,43 +127,44 @@ impl GitHub {
190127
"GitHub GraphQL rate limit remaining: {}",
191128
response.data.rate_limit.remaining
192129
);
193-
if response.data.rate_limit.remaining < self.config.github_updater_min_rate_limit {
130+
if response.data.rate_limit.remaining < self.github_updater_min_rate_limit {
194131
return Err(RateLimitReached.into());
195132
}
196133

197-
let host = Self::hosts()[0];
134+
let mut ret = FetchRepositoriesResult::default();
198135

199-
// When a node is missing (for example if the repository was deleted or made private) the
200-
// GraphQL API will return *both* a `null` instead of the data in the nodes list and a
201-
// `NOT_FOUND` error in the errors list.
202-
for node in &response.data.nodes {
203-
if let Some(node) = node {
204-
self.store_repository(
205-
conn,
206-
host,
207-
&node.id,
208-
&node.name_with_owner,
209-
&node.description,
210-
&node.pushed_at,
211-
node.stargazer_count,
212-
node.fork_count,
213-
node.issues.total_count,
214-
)?;
215-
}
216-
}
217136
for error in &response.errors {
218137
use GraphErrorPath::*;
219138
match (error.error_type.as_str(), error.path.as_slice()) {
220139
("NOT_FOUND", [Segment(nodes), Index(idx)]) if nodes == "nodes" => {
221-
self.delete_repository(conn, &node_ids[*idx as usize], host)?;
140+
ret.missing.push(node_ids[*idx as usize].clone());
222141
}
223142
_ => failure::bail!("error updating repositories: {}", error.message),
224143
}
225144
}
145+
// When a node is missing (for example if the repository was deleted or made private) the
146+
// GraphQL API will return *both* a `null` instead of the data in the nodes list and a
147+
// `NOT_FOUND` error in the errors list.
148+
for node in response.data.nodes.into_iter() {
149+
if let Some(node) = node {
150+
let repo = Repository {
151+
id: node.id,
152+
name_with_owner: node.name_with_owner,
153+
description: node.description,
154+
last_activity_at: node.pushed_at,
155+
stars: node.stargazer_count,
156+
forks: node.fork_count,
157+
issues: node.issues.total_count,
158+
};
159+
ret.present.insert(repo.id.clone(), repo);
160+
}
161+
}
226162

227-
Ok(())
163+
Ok(ret)
228164
}
165+
}
229166

167+
impl GitHub {
230168
fn graphql<T: serde::de::DeserializeOwned>(
231169
&self,
232170
query: &str,
@@ -305,39 +243,3 @@ struct GraphRepository {
305243
struct GraphIssues {
306244
total_count: i64,
307245
}
308-
309-
#[cfg(test)]
310-
mod test {
311-
use super::*;
312-
use crate::repositories::RepositoryName;
313-
314-
#[test]
315-
fn test_repository_name() {
316-
macro_rules! assert_name {
317-
($url:expr => ($owner:expr, $repo:expr, $host:expr)) => {
318-
assert_eq!(
319-
GitHub::repository_name($url),
320-
Some(RepositoryName {
321-
owner: $owner,
322-
repo: $repo,
323-
host: $host,
324-
})
325-
);
326-
};
327-
($url:expr => None) => {
328-
assert_eq!(GitHub::repository_name($url), None);
329-
};
330-
}
331-
332-
assert_name!("https://github.com/onur/cratesfyi" => ("onur", "cratesfyi", "github.com"));
333-
assert_name!("http://github.com/onur/cratesfyi" => ("onur", "cratesfyi", "github.com"));
334-
assert_name!("https://github.com/onur/cratesfyi.git" => ("onur", "cratesfyi", "github.com"));
335-
assert_name!("https://github.com/docopt/docopt.rs" => ("docopt", "docopt.rs", "github.com"));
336-
assert_name!("https://github.com/onur23cmD_M_R_L_/crates_fy-i" => (
337-
"onur23cmD_M_R_L_", "crates_fy-i", "github.com"
338-
));
339-
assert_name!("https://www.github.com/onur/cratesfyi" => None);
340-
assert_name!("http://www.github.com/onur/cratesfyi" => None);
341-
assert_name!("http://www.gitlab.com/onur/cratesfyi" => None);
342-
}
343-
}

0 commit comments

Comments
 (0)