@@ -40,6 +40,10 @@ use util::{RequestUtils, CargoResult, internal, ChainError, human};
40
40
use version:: { EncodableVersion , NewVersion } ;
41
41
use { Model , User , Keyword , Version , Category , Badge , Replica } ;
42
42
43
+ /// Hosts in this blacklist are known to not be hosting documentation,
44
+ /// and are possibly of malicious intent e.g. ad tracking networks, etc.
45
+ const DOCUMENTATION_BLACKLIST : [ & ' static str ; 1 ] = [ "rust-ci.org" ] ;
46
+
43
47
#[ derive( Debug , Insertable , Queryable , Identifiable , Associations , AsChangeset ) ]
44
48
#[ belongs_to( Crate ) ]
45
49
#[ primary_key( crate_id, date) ]
@@ -538,6 +542,8 @@ impl Crate {
538
542
let keyword_ids = keywords. map ( |kws| kws. iter ( ) . map ( |kw| kw. keyword . clone ( ) ) . collect ( ) ) ;
539
543
let category_ids = categories. map ( |cats| cats. iter ( ) . map ( |cat| cat. slug . clone ( ) ) . collect ( ) ) ;
540
544
let badges = badges. map ( |bs| bs. into_iter ( ) . map ( |b| b. encodable ( ) ) . collect ( ) ) ;
545
+ let documentation = Crate :: remove_blacklisted_documentation_urls ( documentation) ;
546
+
541
547
EncodableCrate {
542
548
id : name. clone ( ) ,
543
549
name : name. clone ( ) ,
@@ -566,6 +572,34 @@ impl Crate {
566
572
}
567
573
}
568
574
575
+ /// Return `None` if the documentation URL host matches a blacklisted host
576
+ fn remove_blacklisted_documentation_urls ( url : Option < String > ) -> Option < String > {
577
+ // Handles if documentation URL is None
578
+ let url = match url {
579
+ Some ( url) => url,
580
+ None => return None ,
581
+ } ;
582
+
583
+ // Handles unsuccessful parsing of documentation URL
584
+ let parsed_url = match Url :: parse ( & url) {
585
+ Ok ( parsed_url) => parsed_url,
586
+ Err ( _) => return None ,
587
+ } ;
588
+
589
+ // Extract host string from documentation URL
590
+ let url_host = match parsed_url. host_str ( ) {
591
+ Some ( url_host) => url_host,
592
+ None => return None ,
593
+ } ;
594
+
595
+ // Match documentation URL host against blacklisted host array elements
596
+ if DOCUMENTATION_BLACKLIST . contains ( & url_host) {
597
+ None
598
+ } else {
599
+ Some ( url)
600
+ }
601
+ }
602
+
569
603
pub fn max_version ( & self , conn : & PgConnection ) -> CargoResult < semver:: Version > {
570
604
use schema:: versions:: dsl:: * ;
571
605
@@ -1594,3 +1628,41 @@ pub fn reverse_dependencies(req: &mut Request) -> CargoResult<Response> {
1594
1628
use diesel:: types:: { Text , Date } ;
1595
1629
sql_function ! ( canon_crate_name, canon_crate_name_t, ( x: Text ) -> Text ) ;
1596
1630
sql_function ! ( to_char, to_char_t, ( a: Date , b: Text ) -> Text ) ;
1631
+
1632
+ #[ cfg( test) ]
1633
+ mod tests {
1634
+ use super :: Crate ;
1635
+
1636
+ #[ test]
1637
+ fn documentation_blacklist_no_url_provided ( ) {
1638
+ assert_eq ! ( Crate :: remove_blacklisted_documentation_urls( None ) , None ) ;
1639
+ }
1640
+
1641
+ #[ test]
1642
+ fn documentation_blacklist_invalid_url ( ) {
1643
+ assert_eq ! (
1644
+ Crate :: remove_blacklisted_documentation_urls( Some ( String :: from( "not a url" ) ) ) ,
1645
+ None
1646
+ ) ;
1647
+ }
1648
+
1649
+ #[ test]
1650
+ fn documentation_blacklist_url_contains_partial_match ( ) {
1651
+ assert_eq ! (
1652
+ Crate :: remove_blacklisted_documentation_urls(
1653
+ Some ( String :: from( "http://rust-ci.organists.com" ) ) ,
1654
+ ) ,
1655
+ Some ( String :: from( "http://rust-ci.organists.com" ) )
1656
+ ) ;
1657
+ }
1658
+
1659
+ #[ test]
1660
+ fn documentation_blacklist_blacklisted_url ( ) {
1661
+ assert_eq ! (
1662
+ Crate :: remove_blacklisted_documentation_urls( Some ( String :: from(
1663
+ "http://rust-ci.org/crate/crate-0.1/doc/crate-0.1" ,
1664
+ ) ) ) ,
1665
+ None
1666
+ ) ;
1667
+ }
1668
+ }
0 commit comments