1
1
use crate :: error:: Result ;
2
+ use crate :: { db:: Pool , Config } ;
2
3
use chrono:: { DateTime , Utc } ;
3
4
use failure:: err_msg;
4
- use log:: debug;
5
+ use log:: { debug, warn } ;
5
6
use postgres:: Connection ;
6
7
use regex:: Regex ;
7
- use std:: str:: FromStr ;
8
+ use reqwest:: header:: { HeaderValue , ACCEPT , AUTHORIZATION , USER_AGENT } ;
9
+ use serde:: Deserialize ;
10
+
11
+ const APP_USER_AGENT : & str = concat ! (
12
+ env!( "CARGO_PKG_NAME" ) ,
13
+ " " ,
14
+ include_str!( concat!( env!( "OUT_DIR" ) , "/git_version" ) )
15
+ ) ;
8
16
9
17
/// Fields we need use in cratesfyi
10
18
#[ derive( Debug ) ]
@@ -16,115 +24,153 @@ struct GitHubFields {
16
24
last_commit : DateTime < Utc > ,
17
25
}
18
26
19
- /// Updates github fields in crates table
20
- pub fn github_updater ( conn : & Connection ) -> Result < ( ) > {
21
- // TODO: This query assumes repository field in Cargo.toml is
22
- // always the same across all versions of a crate
23
- for row in & conn. query (
24
- "SELECT DISTINCT ON (crates.name)
25
- crates.name,
26
- crates.id,
27
- releases.repository_url
28
- FROM crates
29
- INNER JOIN releases ON releases.crate_id = crates.id
30
- WHERE releases.repository_url ~ '^https?://github.com' AND
31
- (crates.github_last_update < NOW() - INTERVAL '1 day' OR
32
- crates.github_last_update IS NULL)
33
- ORDER BY crates.name, releases.release_time DESC" ,
34
- & [ ] ,
35
- ) ? {
36
- let crate_name: String = row. get ( 0 ) ;
37
- let crate_id: i32 = row. get ( 1 ) ;
38
- let repository_url: String = row. get ( 2 ) ;
39
-
40
- if let Err ( err) = get_github_path ( & repository_url[ ..] )
41
- . ok_or_else ( || err_msg ( "Failed to get github path" ) )
42
- . and_then ( |path| get_github_fields ( & path[ ..] ) )
43
- . and_then ( |fields| {
44
- conn. execute (
45
- "UPDATE crates
46
- SET github_description = $1,
47
- github_stars = $2, github_forks = $3,
48
- github_issues = $4, github_last_commit = $5,
49
- github_last_update = NOW()
50
- WHERE id = $6" ,
51
- & [
52
- & fields. description ,
53
- & ( fields. stars as i32 ) ,
54
- & ( fields. forks as i32 ) ,
55
- & ( fields. issues as i32 ) ,
56
- & fields. last_commit . naive_utc ( ) ,
57
- & crate_id,
58
- ] ,
59
- )
60
- . or_else ( |e| Err ( e. into ( ) ) )
61
- } )
62
- {
63
- debug ! ( "Failed to update github fields of: {} {}" , crate_name, err) ;
27
+ pub struct GithubUpdater {
28
+ client : reqwest:: blocking:: Client ,
29
+ pool : Pool ,
30
+ }
31
+
32
+ impl GithubUpdater {
33
+ pub fn new ( config : & Config , pool : Pool ) -> Result < Self > {
34
+ let mut headers = vec ! [
35
+ ( USER_AGENT , HeaderValue :: from_static( APP_USER_AGENT ) ) ,
36
+ ( ACCEPT , HeaderValue :: from_static( "application/json" ) ) ,
37
+ ] ;
38
+
39
+ if let Some ( ( username, accesstoken) ) = config. github_auth ( ) {
40
+ let basicauth = format ! (
41
+ "Basic {}" ,
42
+ base64:: encode( format!( "{}:{}" , username, accesstoken) )
43
+ ) ;
44
+ headers. push ( ( AUTHORIZATION , HeaderValue :: from_str ( & basicauth) . unwrap ( ) ) ) ;
45
+ } else {
46
+ warn ! ( "No GitHub authorization specified, will be working with very low rate limits" ) ;
64
47
}
65
48
66
- // sleep for rate limits
67
- use std:: thread;
68
- use std:: time:: Duration ;
69
- thread:: sleep ( Duration :: from_secs ( 2 ) ) ;
49
+ let client = reqwest:: blocking:: Client :: builder ( )
50
+ . default_headers ( headers. into_iter ( ) . collect ( ) )
51
+ . build ( ) ?;
52
+
53
+ Ok ( GithubUpdater { client, pool } )
70
54
}
71
55
72
- Ok ( ( ) )
73
- }
56
+ /// Updates github fields in crates table
57
+ pub fn update_all_crates ( & self ) -> Result < ( ) > {
58
+ debug ! ( "Starting update of all crates" ) ;
59
+
60
+ if self . is_rate_limited ( ) ? {
61
+ warn ! ( "Skipping update because of rate limit" ) ;
62
+ return Ok ( ( ) ) ;
63
+ }
64
+
65
+ let conn = self . pool . get ( ) ?;
66
+ // TODO: This query assumes repository field in Cargo.toml is
67
+ // always the same across all versions of a crate
68
+ let rows = conn. query (
69
+ "SELECT DISTINCT ON (crates.name)
70
+ crates.name,
71
+ crates.id,
72
+ releases.repository_url
73
+ FROM crates
74
+ INNER JOIN releases ON releases.crate_id = crates.id
75
+ WHERE releases.repository_url ~ '^https?://github.com' AND
76
+ (crates.github_last_update < NOW() - INTERVAL '1 day' OR
77
+ crates.github_last_update IS NULL)
78
+ ORDER BY crates.name, releases.release_time DESC" ,
79
+ & [ ] ,
80
+ ) ?;
81
+
82
+ for row in & rows {
83
+ let crate_name: String = row. get ( 0 ) ;
84
+ let crate_id: i32 = row. get ( 1 ) ;
85
+ let repository_url: String = row. get ( 2 ) ;
86
+
87
+ debug ! ( "Updating {}" , crate_name) ;
88
+ if let Err ( err) = self . update_crate ( & conn, crate_id, & repository_url) {
89
+ if self . is_rate_limited ( ) ? {
90
+ warn ! ( "Skipping remaining updates because of rate limit" ) ;
91
+ return Ok ( ( ) ) ;
92
+ }
93
+ warn ! ( "Failed to update {}: {}" , crate_name, err) ;
94
+ }
95
+ }
96
+
97
+ debug ! ( "Completed all updates" ) ;
98
+ Ok ( ( ) )
99
+ }
100
+
101
+ fn is_rate_limited ( & self ) -> Result < bool > {
102
+ #[ derive( Deserialize ) ]
103
+ struct Response {
104
+ resources : Resources ,
105
+ }
106
+
107
+ #[ derive( Deserialize ) ]
108
+ struct Resources {
109
+ core : Resource ,
110
+ }
111
+
112
+ #[ derive( Deserialize ) ]
113
+ struct Resource {
114
+ remaining : u64 ,
115
+ }
116
+
117
+ let url = "https://api.github.com/rate_limit" ;
118
+ let response: Response = self . client . get ( url) . send ( ) ?. error_for_status ( ) ?. json ( ) ?;
74
119
75
- fn get_github_fields ( path : & str ) -> Result < GitHubFields > {
76
- use serde_json:: Value ;
77
-
78
- let body = {
79
- use reqwest:: { blocking:: Client , header:: USER_AGENT , StatusCode } ;
80
- use std:: { env, io:: Read } ;
81
-
82
- let client = Client :: new ( ) ;
83
- let mut body = String :: new ( ) ;
84
-
85
- let mut resp = client
86
- . get ( & format ! ( "https://api.github.com/repos/{}" , path) [ ..] )
87
- . header (
88
- USER_AGENT ,
89
- format ! ( "cratesfyi/{}" , env!( "CARGO_PKG_VERSION" ) ) ,
90
- )
91
- . basic_auth (
92
- env:: var ( "CRATESFYI_GITHUB_USERNAME" )
93
- . ok ( )
94
- . unwrap_or_default ( ) ,
95
- env:: var ( "CRATESFYI_GITHUB_ACCESSTOKEN" ) . ok ( ) ,
96
- )
97
- . send ( ) ?;
98
-
99
- if resp. status ( ) != StatusCode :: OK {
100
- return Err ( err_msg ( "Failed to get github data" ) ) ;
120
+ Ok ( response. resources . core . remaining == 0 )
121
+ }
122
+
123
+ fn update_crate ( & self , conn : & Connection , crate_id : i32 , repository_url : & str ) -> Result < ( ) > {
124
+ let path =
125
+ get_github_path ( repository_url) . ok_or_else ( || err_msg ( "Failed to get github path" ) ) ?;
126
+ let fields = self . get_github_fields ( & path) ?;
127
+
128
+ conn. execute (
129
+ "UPDATE crates
130
+ SET github_description = $1,
131
+ github_stars = $2, github_forks = $3,
132
+ github_issues = $4, github_last_commit = $5,
133
+ github_last_update = NOW()
134
+ WHERE id = $6" ,
135
+ & [
136
+ & fields. description ,
137
+ & ( fields. stars as i32 ) ,
138
+ & ( fields. forks as i32 ) ,
139
+ & ( fields. issues as i32 ) ,
140
+ & fields. last_commit . naive_utc ( ) ,
141
+ & crate_id,
142
+ ] ,
143
+ ) ?;
144
+
145
+ Ok ( ( ) )
146
+ }
147
+
148
+ fn get_github_fields ( & self , path : & str ) -> Result < GitHubFields > {
149
+ #[ derive( Deserialize ) ]
150
+ struct Response {
151
+ #[ serde( default ) ]
152
+ description : Option < String > ,
153
+ #[ serde( default ) ]
154
+ stargazers_count : i64 ,
155
+ #[ serde( default ) ]
156
+ forks_count : i64 ,
157
+ #[ serde( default ) ]
158
+ open_issues : i64 ,
159
+ #[ serde( default = "Utc::now" ) ]
160
+ pushed_at : DateTime < Utc > ,
101
161
}
102
162
103
- resp. read_to_string ( & mut body) ?;
104
- body
105
- } ;
106
-
107
- let json = Value :: from_str ( & body[ ..] ) ?;
108
- let obj = json. as_object ( ) . unwrap ( ) ;
109
-
110
- Ok ( GitHubFields {
111
- description : obj
112
- . get ( "description" )
113
- . and_then ( |d| d. as_str ( ) )
114
- . unwrap_or ( "" )
115
- . to_string ( ) ,
116
- stars : obj
117
- . get ( "stargazers_count" )
118
- . and_then ( |d| d. as_i64 ( ) )
119
- . unwrap_or ( 0 ) ,
120
- forks : obj. get ( "forks_count" ) . and_then ( |d| d. as_i64 ( ) ) . unwrap_or ( 0 ) ,
121
- issues : obj. get ( "open_issues" ) . and_then ( |d| d. as_i64 ( ) ) . unwrap_or ( 0 ) ,
122
- last_commit : DateTime :: parse_from_rfc3339 (
123
- obj. get ( "pushed_at" ) . and_then ( |d| d. as_str ( ) ) . unwrap_or ( "" ) ,
124
- )
125
- . map ( |datetime| datetime. with_timezone ( & Utc ) )
126
- . unwrap_or_else ( |_| Utc :: now ( ) ) ,
127
- } )
163
+ let url = format ! ( "https://api.github.com/repos/{}" , path) ;
164
+ let response: Response = self . client . get ( & url) . send ( ) ?. error_for_status ( ) ?. json ( ) ?;
165
+
166
+ Ok ( GitHubFields {
167
+ description : response. description . unwrap_or_default ( ) ,
168
+ stars : response. stargazers_count ,
169
+ forks : response. forks_count ,
170
+ issues : response. open_issues ,
171
+ last_commit : response. pushed_at ,
172
+ } )
173
+ }
128
174
}
129
175
130
176
fn get_github_path ( url : & str ) -> Option < String > {
0 commit comments