1
1
use crate :: error:: Result ;
2
- use crate :: { db :: Pool , Config } ;
2
+ use crate :: Config ;
3
3
use chrono:: { DateTime , Utc } ;
4
- use log:: { info, trace, warn} ;
5
- use postgres:: Client ;
4
+ use log:: { trace, warn} ;
6
5
use reqwest:: {
7
6
blocking:: Client as HttpClient ,
8
7
header:: { HeaderMap , HeaderValue , ACCEPT , AUTHORIZATION , USER_AGENT } ,
9
8
} ;
10
9
use serde:: Deserialize ;
11
10
use std:: sync:: Arc ;
12
11
13
- use crate :: repositories:: { Updater , APP_USER_AGENT } ;
12
+ use crate :: repositories:: {
13
+ FetchRepositoriesResult , Repository , RepositoryForge , RepositoryName , APP_USER_AGENT ,
14
+ } ;
14
15
15
16
const GRAPHQL_UPDATE : & str = "query($ids: [ID!]!) {
16
17
nodes(ids: $ids) {
@@ -41,57 +42,54 @@ const GRAPHQL_SINGLE: &str = "query($owner: String!, $repo: String!) {
41
42
}
42
43
}" ;
43
44
44
- /// How many repositories to update in a single chunk. Values over 100 are probably going to be
45
- /// rejected by the GraphQL API.
46
- const UPDATE_CHUNK_SIZE : usize = 100 ;
47
-
48
45
pub struct GitHub {
49
46
client : HttpClient ,
50
- pool : Pool ,
51
- config : Arc < Config > ,
47
+ github_updater_min_rate_limit : u32 ,
52
48
}
53
49
54
- impl Updater for GitHub {
50
+ impl GitHub {
55
51
/// Returns `Err` if the access token has invalid syntax (but *not* if it isn't authorized).
56
52
/// Returns `Ok(None)` if there is no access token.
57
- fn new ( config : Arc < Config > , pool : Pool ) -> Result < Option < Self > > {
53
+ pub fn new ( config : & Arc < Config > ) -> Result < Option < Self > > {
58
54
let mut headers = HeaderMap :: new ( ) ;
59
55
headers. insert ( USER_AGENT , HeaderValue :: from_static ( APP_USER_AGENT ) ) ;
60
56
headers. insert ( ACCEPT , HeaderValue :: from_static ( "application/json" ) ) ;
61
57
62
- if let Some ( token) = & config. github_accesstoken {
58
+ if let Some ( ref token) = config. github_accesstoken {
63
59
headers. insert (
64
60
AUTHORIZATION ,
65
61
HeaderValue :: from_str ( & format ! ( "token {}" , token) ) ?,
66
62
) ;
67
63
} else {
68
- warn ! ( "did not collect GitHub stats as no token was provided" ) ;
64
+ warn ! ( "did not collect `github.com` stats as no token was provided" ) ;
69
65
return Ok ( None ) ;
70
66
}
71
67
72
68
let client = HttpClient :: builder ( ) . default_headers ( headers) . build ( ) ?;
73
69
74
70
Ok ( Some ( GitHub {
75
71
client,
76
- pool,
77
- config,
72
+ github_updater_min_rate_limit : config. github_updater_min_rate_limit ,
78
73
} ) )
79
74
}
75
+ }
80
76
81
- fn load_repository ( & self , conn : & mut Client , url : & str ) -> Result < Option < i32 > > {
82
- let name = match Self :: repository_name ( url) {
83
- Some ( name) => name,
84
- None => return Ok ( None ) ,
85
- } ;
77
+ impl RepositoryForge for GitHub {
78
+ fn host ( & self ) -> & str {
79
+ "github.com"
80
+ }
86
81
87
- // Avoid querying the GitHub API for repositories we already loaded.
88
- if let Some ( row) = conn. query_opt (
89
- "SELECT id FROM repositories WHERE name = $1 AND host = $2 LIMIT 1;" ,
90
- & [ & format ! ( "{}/{}" , name. owner, name. repo) , & name. host ] ,
91
- ) ? {
92
- return Ok ( Some ( row. get ( "id" ) ) ) ;
93
- }
82
+ fn icon ( & self ) -> & str {
83
+ "github"
84
+ }
94
85
86
+ /// How many repositories to update in a single chunk. Values over 100 are probably going to be
87
+ /// rejected by the GraphQL API.
88
+ fn chunk_size ( & self ) -> usize {
89
+ 100
90
+ }
91
+
92
+ fn fetch_repository ( & self , name : & RepositoryName ) -> Result < Option < Repository > > {
95
93
// Fetch the latest information from the GitHub API.
96
94
let response: GraphResponse < GraphRepositoryNode > = self . graphql (
97
95
GRAPHQL_SINGLE ,
@@ -101,82 +99,21 @@ impl Updater for GitHub {
101
99
} ) ,
102
100
) ?;
103
101
if let Some ( repo) = response. data . repository {
104
- Ok ( Some ( self . store_repository (
105
- conn,
106
- Self :: hosts ( ) [ 0 ] ,
107
- & repo. id ,
108
- & repo. name_with_owner ,
109
- & repo. description ,
110
- & repo. pushed_at ,
111
- repo. stargazer_count ,
112
- repo. fork_count ,
113
- repo. issues . total_count ,
114
- ) ?) )
115
- } else if let Some ( error) = response. errors . get ( 0 ) {
116
- use GraphErrorPath :: * ;
117
- match ( error. error_type . as_str ( ) , error. path . as_slice ( ) ) {
118
- ( "NOT_FOUND" , [ Segment ( repository) ] ) if repository == "repository" => Ok ( None ) ,
119
- _ => failure:: bail!( "error loading repository: {}" , error. message) ,
120
- }
121
- } else {
122
- panic ! ( "missing repository but there were no errors!" ) ;
123
- }
124
- }
125
-
126
- /// Updates github fields in crates table
127
- fn update_all_crates ( & self ) -> Result < ( ) > {
128
- info ! ( "started updating GitHub repository stats" ) ;
129
-
130
- let mut updated = 0 ;
131
- let mut conn = self . pool . get ( ) ?;
132
- for host in Self :: hosts ( ) {
133
- let needs_update = conn
134
- . query (
135
- "SELECT host_id
136
- FROM repositories
137
- WHERE host = $1 AND updated_at < NOW() - INTERVAL '1 day';" ,
138
- & [ & host] ,
139
- ) ?
140
- . into_iter ( )
141
- . map ( |row| row. get ( 0 ) )
142
- . collect :: < Vec < String > > ( ) ;
143
-
144
- for chunk in needs_update. chunks ( UPDATE_CHUNK_SIZE ) {
145
- if let Err ( err) = self . update_repositories ( & mut conn, & chunk) {
146
- if err. downcast_ref :: < RateLimitReached > ( ) . is_some ( ) {
147
- warn ! ( "rate limit reached, blocked the GitHub repository stats updater" ) ;
148
- return Ok ( ( ) ) ;
149
- }
150
- return Err ( err) ;
151
- }
152
- }
153
-
154
- updated += needs_update. len ( ) ;
155
- }
156
-
157
- if updated == 0 {
158
- info ! ( "no GitHub repository stats needed to be updated" ) ;
102
+ Ok ( Some ( Repository {
103
+ id : repo. id ,
104
+ name_with_owner : repo. name_with_owner ,
105
+ description : repo. description ,
106
+ last_activity_at : repo. pushed_at ,
107
+ stars : repo. stargazer_count ,
108
+ forks : repo. fork_count ,
109
+ issues : repo. issues . total_count ,
110
+ } ) )
159
111
} else {
160
- info ! ( "finished updating GitHub repository stats" ) ;
112
+ Ok ( None )
161
113
}
162
- Ok ( ( ) )
163
- }
164
-
165
- fn name ( ) -> & ' static str {
166
- "Github"
167
114
}
168
115
169
- fn hosts ( ) -> & ' static [ & ' static str ] {
170
- & [ "github.com" ]
171
- }
172
-
173
- fn pool ( & self ) -> & Pool {
174
- & self . pool
175
- }
176
- }
177
-
178
- impl GitHub {
179
- fn update_repositories ( & self , conn : & mut Client , node_ids : & [ String ] ) -> Result < ( ) > {
116
+ fn fetch_repositories ( & self , node_ids : & [ String ] ) -> Result < FetchRepositoriesResult > {
180
117
let response: GraphResponse < GraphNodes < Option < GraphRepository > > > = self . graphql (
181
118
GRAPHQL_UPDATE ,
182
119
serde_json:: json!( {
@@ -190,43 +127,44 @@ impl GitHub {
190
127
"GitHub GraphQL rate limit remaining: {}" ,
191
128
response. data. rate_limit. remaining
192
129
) ;
193
- if response. data . rate_limit . remaining < self . config . github_updater_min_rate_limit {
130
+ if response. data . rate_limit . remaining < self . github_updater_min_rate_limit {
194
131
return Err ( RateLimitReached . into ( ) ) ;
195
132
}
196
133
197
- let host = Self :: hosts ( ) [ 0 ] ;
134
+ let mut ret = FetchRepositoriesResult :: default ( ) ;
198
135
199
- // When a node is missing (for example if the repository was deleted or made private) the
200
- // GraphQL API will return *both* a `null` instead of the data in the nodes list and a
201
- // `NOT_FOUND` error in the errors list.
202
- for node in & response. data . nodes {
203
- if let Some ( node) = node {
204
- self . store_repository (
205
- conn,
206
- host,
207
- & node. id ,
208
- & node. name_with_owner ,
209
- & node. description ,
210
- & node. pushed_at ,
211
- node. stargazer_count ,
212
- node. fork_count ,
213
- node. issues . total_count ,
214
- ) ?;
215
- }
216
- }
217
136
for error in & response. errors {
218
137
use GraphErrorPath :: * ;
219
138
match ( error. error_type . as_str ( ) , error. path . as_slice ( ) ) {
220
139
( "NOT_FOUND" , [ Segment ( nodes) , Index ( idx) ] ) if nodes == "nodes" => {
221
- self . delete_repository ( conn , & node_ids[ * idx as usize ] , host ) ? ;
140
+ ret . missing . push ( node_ids[ * idx as usize ] . clone ( ) ) ;
222
141
}
223
142
_ => failure:: bail!( "error updating repositories: {}" , error. message) ,
224
143
}
225
144
}
145
+ // When a node is missing (for example if the repository was deleted or made private) the
146
+ // GraphQL API will return *both* a `null` instead of the data in the nodes list and a
147
+ // `NOT_FOUND` error in the errors list.
148
+ for node in response. data . nodes . into_iter ( ) {
149
+ if let Some ( node) = node {
150
+ let repo = Repository {
151
+ id : node. id ,
152
+ name_with_owner : node. name_with_owner ,
153
+ description : node. description ,
154
+ last_activity_at : node. pushed_at ,
155
+ stars : node. stargazer_count ,
156
+ forks : node. fork_count ,
157
+ issues : node. issues . total_count ,
158
+ } ;
159
+ ret. present . insert ( repo. id . clone ( ) , repo) ;
160
+ }
161
+ }
226
162
227
- Ok ( ( ) )
163
+ Ok ( ret )
228
164
}
165
+ }
229
166
167
+ impl GitHub {
230
168
fn graphql < T : serde:: de:: DeserializeOwned > (
231
169
& self ,
232
170
query : & str ,
@@ -305,39 +243,3 @@ struct GraphRepository {
305
243
struct GraphIssues {
306
244
total_count : i64 ,
307
245
}
308
-
309
- #[ cfg( test) ]
310
- mod test {
311
- use super :: * ;
312
- use crate :: repositories:: RepositoryName ;
313
-
314
- #[ test]
315
- fn test_repository_name ( ) {
316
- macro_rules! assert_name {
317
- ( $url: expr => ( $owner: expr, $repo: expr, $host: expr) ) => {
318
- assert_eq!(
319
- GitHub :: repository_name( $url) ,
320
- Some ( RepositoryName {
321
- owner: $owner,
322
- repo: $repo,
323
- host: $host,
324
- } )
325
- ) ;
326
- } ;
327
- ( $url: expr => None ) => {
328
- assert_eq!( GitHub :: repository_name( $url) , None ) ;
329
- } ;
330
- }
331
-
332
- assert_name ! ( "https://github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" , "github.com" ) ) ;
333
- assert_name ! ( "http://github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" , "github.com" ) ) ;
334
- assert_name ! ( "https://github.com/onur/cratesfyi.git" => ( "onur" , "cratesfyi" , "github.com" ) ) ;
335
- assert_name ! ( "https://github.com/docopt/docopt.rs" => ( "docopt" , "docopt.rs" , "github.com" ) ) ;
336
- assert_name ! ( "https://github.com/onur23cmD_M_R_L_/crates_fy-i" => (
337
- "onur23cmD_M_R_L_" , "crates_fy-i" , "github.com"
338
- ) ) ;
339
- assert_name ! ( "https://www.github.com/onur/cratesfyi" => None ) ;
340
- assert_name ! ( "http://www.github.com/onur/cratesfyi" => None ) ;
341
- assert_name ! ( "http://www.gitlab.com/onur/cratesfyi" => None ) ;
342
- }
343
- }
0 commit comments