1
1
use crate :: error:: Result ;
2
2
use crate :: { db:: Pool , Config } ;
3
3
use chrono:: { DateTime , Utc } ;
4
- use log:: { debug, info, trace, warn} ;
5
- use once_cell:: sync:: Lazy ;
4
+ use log:: { info, trace, warn} ;
6
5
use postgres:: Client ;
7
- use regex:: Regex ;
8
6
use reqwest:: {
9
7
blocking:: Client as HttpClient ,
10
8
header:: { HeaderMap , HeaderValue , ACCEPT , AUTHORIZATION , USER_AGENT } ,
11
9
} ;
12
10
use serde:: Deserialize ;
13
- use std:: collections:: HashSet ;
14
11
use std:: sync:: Arc ;
15
12
16
- use crate :: utils:: { RepositoryName , Updater , APP_USER_AGENT } ;
13
+ use crate :: utils:: { Updater , APP_USER_AGENT } ;
17
14
18
15
const GRAPHQL_UPDATE : & str = "query($ids: [ID!]!) {
19
16
nodes(ids: $ids) {
@@ -81,42 +78,6 @@ impl Updater for GithubUpdater {
81
78
} ) )
82
79
}
83
80
84
- fn backfill_repositories ( & self ) -> Result < ( ) > {
85
- info ! ( "started backfilling GitHub repository stats" ) ;
86
-
87
- let mut conn = self . pool . get ( ) ?;
88
- let needs_backfilling = conn. query (
89
- "SELECT releases.id, crates.name, releases.version, releases.repository_url
90
- FROM releases
91
- INNER JOIN crates ON (crates.id = releases.crate_id)
92
- WHERE repository IS NULL AND repository_url LIKE '%github.com%';" ,
93
- & [ ] ,
94
- ) ?;
95
-
96
- let mut missing_urls = HashSet :: new ( ) ;
97
- for row in & needs_backfilling {
98
- let id: i32 = row. get ( "id" ) ;
99
- let name: String = row. get ( "name" ) ;
100
- let version: String = row. get ( "version" ) ;
101
- let url: String = row. get ( "repository_url" ) ;
102
-
103
- if missing_urls. contains ( & url) {
104
- debug ! ( "{} {} points to a known missing repo" , name, version) ;
105
- } else if let Some ( node_id) = self . load_repository ( & mut conn, & url) ? {
106
- conn. execute (
107
- "UPDATE releases SET repository = $1 WHERE id = $2;" ,
108
- & [ & node_id, & id] ,
109
- ) ?;
110
- info ! ( "backfilled GitHub repository for {} {}" , name, version) ;
111
- } else {
112
- debug ! ( "{} {} does not point to a GitHub repository" , name, version) ;
113
- missing_urls. insert ( url) ;
114
- }
115
- }
116
-
117
- Ok ( ( ) )
118
- }
119
-
120
81
fn load_repository ( & self , conn : & mut Client , url : & str ) -> Result < Option < i32 > > {
121
82
let name = match Self :: repository_name ( url) {
122
83
Some ( name) => name,
@@ -125,8 +86,8 @@ impl Updater for GithubUpdater {
125
86
126
87
// Avoid querying the GitHub API for repositories we already loaded.
127
88
if let Some ( row) = conn. query_opt (
128
- "SELECT id FROM repositories WHERE name = $1 AND host = 'github' LIMIT 1;" ,
129
- & [ & format ! ( "{}/{}" , name. owner, name. repo) ] ,
89
+ "SELECT id FROM repositories WHERE name = $1 AND host = $2 LIMIT 1;" ,
90
+ & [ & format ! ( "{}/{}" , name. owner, name. repo) , & name . host ] ,
130
91
) ? {
131
92
return Ok ( Some ( row. get ( "id" ) ) ) ;
132
93
}
@@ -140,7 +101,17 @@ impl Updater for GithubUpdater {
140
101
} ) ,
141
102
) ?;
142
103
if let Some ( repo) = response. data . repository {
143
- Ok ( Some ( self . store_repository ( conn, & repo) ?) )
104
+ Ok ( Some ( self . store_repository (
105
+ conn,
106
+ Self :: hosts ( ) [ 0 ] ,
107
+ & repo. id ,
108
+ & repo. name_with_owner ,
109
+ & repo. description ,
110
+ & repo. pushed_at ,
111
+ repo. stargazer_count ,
112
+ repo. fork_count ,
113
+ repo. issues . total_count ,
114
+ ) ?) )
144
115
} else if let Some ( error) = response. errors . get ( 0 ) {
145
116
use GraphErrorPath :: * ;
146
117
match ( error. error_type . as_str ( ) , error. path . as_slice ( ) ) {
@@ -156,55 +127,52 @@ impl Updater for GithubUpdater {
156
127
fn update_all_crates ( & self ) -> Result < ( ) > {
157
128
info ! ( "started updating GitHub repository stats" ) ;
158
129
130
+ let mut updated = 0 ;
159
131
let mut conn = self . pool . get ( ) ?;
160
- let needs_update = conn
161
- . query (
162
- "SELECT host_id
163
- FROM repositories
164
- WHERE host = 'github' AND updated_at < NOW() - INTERVAL '1 day';" ,
165
- & [ ] ,
166
- ) ?
167
- . into_iter ( )
168
- . map ( |row| row. get ( 0 ) )
169
- . collect :: < Vec < String > > ( ) ;
170
-
171
- if needs_update. is_empty ( ) {
172
- info ! ( "no GitHub repository stats needed to be updated" ) ;
173
- return Ok ( ( ) ) ;
174
- }
175
-
176
- for chunk in needs_update. chunks ( UPDATE_CHUNK_SIZE ) {
177
- if let Err ( err) = self . update_repositories ( & mut conn, & chunk) {
178
- if err. downcast_ref :: < RateLimitReached > ( ) . is_some ( ) {
179
- warn ! ( "rate limit reached, blocked the GitHub repository stats updater" ) ;
180
- return Ok ( ( ) ) ;
132
+ for host in Self :: hosts ( ) {
133
+ let needs_update = conn
134
+ . query (
135
+ "SELECT host_id
136
+ FROM repositories
137
+ WHERE host = $1 AND updated_at < NOW() - INTERVAL '1 day';" ,
138
+ & [ & host] ,
139
+ ) ?
140
+ . into_iter ( )
141
+ . map ( |row| row. get ( 0 ) )
142
+ . collect :: < Vec < String > > ( ) ;
143
+
144
+ for chunk in needs_update. chunks ( UPDATE_CHUNK_SIZE ) {
145
+ if let Err ( err) = self . update_repositories ( & mut conn, & chunk) {
146
+ if err. downcast_ref :: < RateLimitReached > ( ) . is_some ( ) {
147
+ warn ! ( "rate limit reached, blocked the GitHub repository stats updater" ) ;
148
+ return Ok ( ( ) ) ;
149
+ }
150
+ return Err ( err) ;
181
151
}
182
- return Err ( err) ;
183
152
}
153
+
154
+ updated += needs_update. len ( ) ;
184
155
}
185
156
186
- info ! ( "finished updating GitHub repository stats" ) ;
157
+ if updated == 0 {
158
+ info ! ( "no GitHub repository stats needed to be updated" ) ;
159
+ } else {
160
+ info ! ( "finished updating GitHub repository stats" ) ;
161
+ }
187
162
Ok ( ( ) )
188
163
}
189
164
190
- fn repository_name ( url : & str ) -> Option < RepositoryName > {
191
- static RE : Lazy < Regex > = Lazy :: new ( || {
192
- Regex :: new ( r"https?://(www.)?github\.com/(?P<owner>[\w\._-]+)/(?P<repo>[\w\._-]+)" )
193
- . unwrap ( )
194
- } ) ;
195
-
196
- let cap = RE . captures ( url) ?;
197
- let owner = cap. name ( "owner" ) . expect ( "missing group 'owner'" ) . as_str ( ) ;
198
- let repo = cap. name ( "repo" ) . expect ( "missing group 'repo'" ) . as_str ( ) ;
199
- Some ( RepositoryName {
200
- owner,
201
- repo : repo. strip_suffix ( ".git" ) . unwrap_or ( repo) ,
202
- } )
203
- }
204
-
205
165
fn name ( ) -> & ' static str {
206
166
"Github"
207
167
}
168
+
169
+ fn hosts ( ) -> & ' static [ & ' static str ] {
170
+ & [ "github.com" ]
171
+ }
172
+
173
+ fn pool ( & self ) -> & Pool {
174
+ & self . pool
175
+ }
208
176
}
209
177
210
178
impl GithubUpdater {
@@ -226,19 +194,31 @@ impl GithubUpdater {
226
194
return Err ( RateLimitReached . into ( ) ) ;
227
195
}
228
196
197
+ let host = Self :: hosts ( ) [ 0 ] ;
198
+
229
199
// When a node is missing (for example if the repository was deleted or made private) the
230
200
// GraphQL API will return *both* a `null` instead of the data in the nodes list and a
231
201
// `NOT_FOUND` error in the errors list.
232
202
for node in & response. data . nodes {
233
203
if let Some ( node) = node {
234
- self . store_repository ( conn, & node) ?;
204
+ self . store_repository (
205
+ conn,
206
+ host,
207
+ & node. id ,
208
+ & node. name_with_owner ,
209
+ & node. description ,
210
+ & node. pushed_at ,
211
+ node. stargazer_count ,
212
+ node. fork_count ,
213
+ node. issues . total_count ,
214
+ ) ?;
235
215
}
236
216
}
237
217
for error in & response. errors {
238
218
use GraphErrorPath :: * ;
239
219
match ( error. error_type . as_str ( ) , error. path . as_slice ( ) ) {
240
220
( "NOT_FOUND" , [ Segment ( nodes) , Index ( idx) ] ) if nodes == "nodes" => {
241
- self . delete_repository ( conn, & node_ids[ * idx as usize ] , "github" ) ?;
221
+ self . delete_repository ( conn, & node_ids[ * idx as usize ] , host ) ?;
242
222
}
243
223
_ => failure:: bail!( "error updating repositories: {}" , error. message) ,
244
224
}
@@ -263,38 +243,6 @@ impl GithubUpdater {
263
243
. error_for_status ( ) ?
264
244
. json ( ) ?)
265
245
}
266
-
267
- fn store_repository ( & self , conn : & mut Client , repo : & GraphRepository ) -> Result < i32 > {
268
- trace ! (
269
- "storing GitHub repository stats for {}" ,
270
- repo. name_with_owner
271
- ) ;
272
- let data = conn. query_one (
273
- "INSERT INTO repositories (
274
- host, host_id, name, description, last_commit, stars, forks, issues, updated_at
275
- ) VALUES ('github', $1, $2, $3, $4, $5, $6, $7, NOW())
276
- ON CONFLICT (host, host_id) DO
277
- UPDATE SET
278
- name = $2,
279
- description = $3,
280
- last_commit = $4,
281
- stars = $5,
282
- forks = $6,
283
- issues = $7,
284
- updated_at = NOW()
285
- RETURNING id;" ,
286
- & [
287
- & repo. id ,
288
- & repo. name_with_owner ,
289
- & repo. description ,
290
- & repo. pushed_at ,
291
- & ( repo. stargazer_count as i32 ) ,
292
- & ( repo. fork_count as i32 ) ,
293
- & ( repo. issues . total_count as i32 ) ,
294
- ] ,
295
- ) ?;
296
- Ok ( data. get ( 0 ) )
297
- }
298
246
}
299
247
300
248
#[ derive( Debug , failure:: Fail ) ]
@@ -361,29 +309,31 @@ struct GraphIssues {
361
309
#[ cfg( test) ]
362
310
mod test {
363
311
use super :: * ;
312
+ use crate :: utils:: RepositoryName ;
364
313
365
314
#[ test]
366
315
fn test_repository_name ( ) {
367
316
macro_rules! assert_name {
368
- ( $url: expr => ( $owner: expr, $repo: expr) ) => {
317
+ ( $url: expr => ( $owner: expr, $repo: expr , $host : expr) ) => {
369
318
assert_eq!(
370
319
GithubUpdater :: repository_name( $url) ,
371
320
Some ( RepositoryName {
372
321
owner: $owner,
373
- repo: $repo
322
+ repo: $repo,
323
+ host: $host,
374
324
} )
375
325
) ;
376
326
} ;
377
327
}
378
328
379
- assert_name ! ( "https://github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" ) ) ;
380
- assert_name ! ( "http://github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" ) ) ;
381
- assert_name ! ( "https://www.github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" ) ) ;
382
- assert_name ! ( "http://www.github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" ) ) ;
383
- assert_name ! ( "https://github.com/onur/cratesfyi.git" => ( "onur" , "cratesfyi" ) ) ;
384
- assert_name ! ( "https://github.com/docopt/docopt.rs" => ( "docopt" , "docopt.rs" ) ) ;
329
+ assert_name ! ( "https://github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" , "github.com" ) ) ;
330
+ assert_name ! ( "http://github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" , "github.com" ) ) ;
331
+ assert_name ! ( "https://www.github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" , "www.github.com" ) ) ;
332
+ assert_name ! ( "http://www.github.com/onur/cratesfyi" => ( "onur" , "cratesfyi" , "www.github.com" ) ) ;
333
+ assert_name ! ( "https://github.com/onur/cratesfyi.git" => ( "onur" , "cratesfyi" , "github.com" ) ) ;
334
+ assert_name ! ( "https://github.com/docopt/docopt.rs" => ( "docopt" , "docopt.rs" , "github.com" ) ) ;
385
335
assert_name ! ( "https://github.com/onur23cmD_M_R_L_/crates_fy-i" => (
386
- "onur23cmD_M_R_L_" , "crates_fy-i"
336
+ "onur23cmD_M_R_L_" , "crates_fy-i" , "github.com"
387
337
) ) ;
388
338
}
389
339
}
0 commit comments