@@ -36,10 +36,11 @@ use crate::core::profiles::{Profiles, UnitFor};
3636use crate :: core:: resolver:: features:: { self , FeaturesFor , RequestedFeatures } ;
3737use crate :: core:: resolver:: { HasDevUnits , Resolve , ResolveOpts } ;
3838use crate :: core:: { FeatureValue , Package , PackageSet , Shell , Summary , Target } ;
39- use crate :: core:: { PackageId , PackageIdSpec , TargetKind , Workspace } ;
39+ use crate :: core:: { PackageId , PackageIdSpec , SourceId , TargetKind , Workspace } ;
4040use crate :: ops;
4141use crate :: ops:: resolve:: WorkspaceResolve ;
4242use crate :: util:: config:: Config ;
43+ use crate :: util:: interning:: InternedString ;
4344use crate :: util:: restricted_names:: is_glob_pattern;
4445use crate :: util:: { closest_msg, profile, CargoResult , StableHasher } ;
4546
@@ -508,6 +509,12 @@ pub fn create_bcx<'a, 'cfg>(
508509 interner,
509510 ) ?;
510511
512+ // TODO: In theory, Cargo should also dedupe the roots, but I'm uncertain
513+ // what heuristics to use in that case.
514+ if build_config. mode == ( CompileMode :: Doc { deps : true } ) {
515+ remove_duplicate_doc ( build_config, & mut unit_graph) ;
516+ }
517+
511518 if build_config
512519 . requested_kinds
513520 . iter ( )
@@ -1490,3 +1497,113 @@ fn opt_patterns_and_names(
14901497 }
14911498 Ok ( ( opt_patterns, opt_names) )
14921499}
1500+
1501+ /// Removes duplicate CompileMode::Doc units that would cause problems with
1502+ /// filename collisions.
1503+ ///
1504+ /// Rustdoc only separates units by crate name in the file directory
1505+ /// structure. If any two units with the same crate name exist, this would
1506+ /// cause a filename collision, causing different rustdoc invocations to stomp
1507+ /// on one another's files.
1508+ ///
1509+ /// Unfortunately this does not remove all duplicates, as some of them are
1510+ /// either user error, or difficult to remove. Cases that I can think of:
1511+ ///
1512+ /// - Same target name in different packages. See the `collision_doc` test.
1513+ /// - Different sources. See `collision_doc_sources` test.
1514+ ///
1515+ /// Ideally this would not be necessary.
1516+ fn remove_duplicate_doc ( build_config : & BuildConfig , unit_graph : & mut UnitGraph ) {
1517+ // NOTE: There is some risk that this can introduce problems because it
1518+ // may create orphans in the unit graph (parts of the tree get detached
1519+ // from the roots). I currently can't think of any ways this will cause a
1520+ // problem because all other parts of Cargo traverse the graph starting
1521+ // from the roots. Perhaps this should scan for detached units and remove
1522+ // them too?
1523+ //
1524+ // First, create a mapping of crate_name -> Unit so we can see where the
1525+ // duplicates are.
1526+ let mut all_docs: HashMap < String , Vec < Unit > > = HashMap :: new ( ) ;
1527+ for unit in unit_graph. keys ( ) {
1528+ if unit. mode . is_doc ( ) {
1529+ all_docs
1530+ . entry ( unit. target . crate_name ( ) )
1531+ . or_default ( )
1532+ . push ( unit. clone ( ) ) ;
1533+ }
1534+ }
1535+ // Keep track of units to remove so that they can be efficiently removed
1536+ // from the unit_deps.
1537+ let mut removed_units: HashSet < Unit > = HashSet :: new ( ) ;
1538+ let mut remove = |units : Vec < Unit > , reason : & str | {
1539+ for unit in units {
1540+ log:: debug!(
1541+ "removing duplicate doc due to {} for package {} target `{}`" ,
1542+ reason,
1543+ unit. pkg,
1544+ unit. target. name( )
1545+ ) ;
1546+ unit_graph. remove ( & unit) ;
1547+ removed_units. insert ( unit) ;
1548+ }
1549+ } ;
1550+ // Iterate over the duplicates and try to remove them from unit_graph.
1551+ for ( _crate_name, mut units) in all_docs {
1552+ if units. len ( ) == 1 {
1553+ continue ;
1554+ }
1555+ // Prefer target over host if --target was not specified.
1556+ if build_config
1557+ . requested_kinds
1558+ . iter ( )
1559+ . all ( CompileKind :: is_host)
1560+ {
1561+ let ( to_remove, remaining_units) : ( Vec < Unit > , Vec < Unit > ) =
1562+ units. into_iter ( ) . partition ( |unit| unit. kind . is_host ( ) ) ;
1563+ // Note these duplicates may not be real duplicates, since they
1564+ // might get merged in rebuild_unit_graph_shared. Either way, it
1565+ // shouldn't hurt to remove them early (although the report in the
1566+ // log might be confusing).
1567+ remove ( to_remove, "host/target merger" ) ;
1568+ units = remaining_units;
1569+ if units. len ( ) == 1 {
1570+ continue ;
1571+ }
1572+ }
1573+ // Prefer newer versions over older.
1574+ let mut source_map: HashMap < ( InternedString , SourceId , CompileKind ) , Vec < Unit > > =
1575+ HashMap :: new ( ) ;
1576+ for unit in units {
1577+ let pkg_id = unit. pkg . package_id ( ) ;
1578+ // Note, this does not detect duplicates from different sources.
1579+ source_map
1580+ . entry ( ( pkg_id. name ( ) , pkg_id. source_id ( ) , unit. kind ) )
1581+ . or_default ( )
1582+ . push ( unit) ;
1583+ }
1584+ let mut remaining_units = Vec :: new ( ) ;
1585+ for ( _key, mut units) in source_map {
1586+ if units. len ( ) > 1 {
1587+ units. sort_by ( |a, b| a. pkg . version ( ) . partial_cmp ( b. pkg . version ( ) ) . unwrap ( ) ) ;
1588+ // Remove any entries with version < newest.
1589+ let newest_version = units. last ( ) . unwrap ( ) . pkg . version ( ) . clone ( ) ;
1590+ let ( to_remove, keep_units) : ( Vec < Unit > , Vec < Unit > ) = units
1591+ . into_iter ( )
1592+ . partition ( |unit| unit. pkg . version ( ) < & newest_version) ;
1593+ remove ( to_remove, "older version" ) ;
1594+ remaining_units. extend ( keep_units) ;
1595+ } else {
1596+ remaining_units. extend ( units) ;
1597+ }
1598+ }
1599+ if remaining_units. len ( ) == 1 {
1600+ continue ;
1601+ }
1602+ // Are there other heuristics to remove duplicates that would make
1603+ // sense? Maybe prefer path sources over all others?
1604+ }
1605+ // Also remove units from the unit_deps so there aren't any dangling edges.
1606+ for unit_deps in unit_graph. values_mut ( ) {
1607+ unit_deps. retain ( |unit_dep| !removed_units. contains ( & unit_dep. unit ) ) ;
1608+ }
1609+ }
0 commit comments