1
1
from __future__ import annotations
2
2
3
- import warnings
4
3
from collections .abc import Hashable , Iterator , Mapping , Sequence
5
4
from contextlib import contextmanager
6
5
from typing import (
24
23
)
25
24
from xarray .core .merge import merge_coordinates_without_align , merge_coords
26
25
from xarray .core .types import Self , T_DataArray
27
- from xarray .core .utils import Frozen , ReprObject
26
+ from xarray .core .utils import Frozen , ReprObject , emit_user_level_warning
28
27
from xarray .core .variable import Variable , as_variable , calculate_dimensions
29
28
30
29
if TYPE_CHECKING :
@@ -83,7 +82,7 @@ def variables(self):
83
82
def _update_coords (self , coords , indexes ):
84
83
raise NotImplementedError ()
85
84
86
- def _maybe_drop_multiindex_coords (self , coords ):
85
+ def _drop_coords (self , coord_names ):
87
86
raise NotImplementedError ()
88
87
89
88
def __iter__ (self ) -> Iterator [Hashable ]:
@@ -379,9 +378,9 @@ def _update_coords(
379
378
# redirect to DatasetCoordinates._update_coords
380
379
self ._data .coords ._update_coords (coords , indexes )
381
380
382
- def _maybe_drop_multiindex_coords (self , coords : set [ Hashable ]) -> None :
383
- # redirect to DatasetCoordinates._maybe_drop_multiindex_coords
384
- self ._data .coords ._maybe_drop_multiindex_coords ( coords )
381
+ def _drop_coords (self , coord_names ) :
382
+ # redirect to DatasetCoordinates._drop_coords
383
+ self ._data .coords ._drop_coords ( coord_names )
385
384
386
385
def _merge_raw (self , other , reflexive ):
387
386
"""For use with binary arithmetic."""
@@ -454,22 +453,40 @@ def __setitem__(self, key: Hashable, value: Any) -> None:
454
453
455
454
def update (self , other : Mapping [Any , Any ]) -> None :
456
455
"""Update this Coordinates variables with other coordinate variables."""
457
- other_obj : Coordinates | Mapping [Hashable , Variable ]
456
+
457
+ if not len (other ):
458
+ return
459
+
460
+ other_coords : Coordinates
458
461
459
462
if isinstance (other , Coordinates ):
460
- # special case: default indexes won't be created
461
- other_obj = other
463
+ # Coordinates object: just pass it ( default indexes won't be created)
464
+ other_coords = other
462
465
else :
463
- other_obj = getattr (other , "variables" , other )
466
+ other_coords = create_coords_with_default_indexes (
467
+ getattr (other , "variables" , other )
468
+ )
464
469
465
- self ._maybe_drop_multiindex_coords (set (other_obj ))
470
+ # Discard original indexed coordinates prior to merge allows to:
471
+ # - fail early if the new coordinates don't preserve the integrity of existing
472
+ # multi-coordinate indexes
473
+ # - drop & replace coordinates without alignment (note: we must keep indexed
474
+ # coordinates extracted from the DataArray objects passed as values to
475
+ # `other` - if any - as those are still used for aligning the old/new coordinates)
476
+ coords_to_align = drop_indexed_coords (set (other_coords ) & set (other ), self )
466
477
467
478
coords , indexes = merge_coords (
468
- [self . variables , other_obj ],
479
+ [coords_to_align , other_coords ],
469
480
priority_arg = 1 ,
470
- indexes = self .xindexes ,
481
+ indexes = coords_to_align .xindexes ,
471
482
)
472
483
484
+ # special case for PandasMultiIndex: updating only its dimension coordinate
485
+ # is still allowed but depreciated.
486
+ # It is the only case where we need to actually drop coordinates here (multi-index levels)
487
+ # TODO: remove when removing PandasMultiIndex's dimension coordinate.
488
+ self ._drop_coords (self ._names - coords_to_align ._names )
489
+
473
490
self ._update_coords (coords , indexes )
474
491
475
492
def _overwrite_indexes (
@@ -610,15 +627,20 @@ def _update_coords(
610
627
original_indexes .update (indexes )
611
628
self ._data ._indexes = original_indexes
612
629
613
- def _maybe_drop_multiindex_coords (self , coords : set [Hashable ]) -> None :
614
- """Drops variables in coords, and any associated variables as well."""
630
+ def _drop_coords (self , coord_names ):
631
+ # should drop indexed coordinates only
632
+ for name in coord_names :
633
+ del self ._data ._variables [name ]
634
+ del self ._data ._indexes [name ]
635
+ self ._data ._coord_names .difference_update (coord_names )
636
+
637
+ def _drop_indexed_coords (self , coords_to_drop : set [Hashable ]) -> None :
615
638
assert self ._data .xindexes is not None
616
- variables , indexes = drop_coords (
617
- coords , self ._data ._variables , self ._data .xindexes
618
- )
619
- self ._data ._coord_names .intersection_update (variables )
620
- self ._data ._variables = variables
621
- self ._data ._indexes = indexes
639
+ new_coords = drop_indexed_coords (coords_to_drop , self )
640
+ for name in self ._data ._coord_names - new_coords ._names :
641
+ del self ._data ._variables [name ]
642
+ self ._data ._indexes = dict (new_coords .xindexes )
643
+ self ._data ._coord_names .intersection_update (new_coords ._names )
622
644
623
645
def __delitem__ (self , key : Hashable ) -> None :
624
646
if key in self :
@@ -691,13 +713,11 @@ def _update_coords(
691
713
original_indexes .update (indexes )
692
714
self ._data ._indexes = original_indexes
693
715
694
- def _maybe_drop_multiindex_coords (self , coords : set [Hashable ]) -> None :
695
- """Drops variables in coords, and any associated variables as well."""
696
- variables , indexes = drop_coords (
697
- coords , self ._data ._coords , self ._data .xindexes
698
- )
699
- self ._data ._coords = variables
700
- self ._data ._indexes = indexes
716
+ def _drop_coords (self , coord_names ):
717
+ # should drop indexed coordinates only
718
+ for name in coord_names :
719
+ del self ._data ._coords [name ]
720
+ del self ._data ._indexes [name ]
701
721
702
722
@property
703
723
def variables (self ):
@@ -724,35 +744,48 @@ def _ipython_key_completions_(self):
724
744
return self ._data ._ipython_key_completions_ ()
725
745
726
746
727
- def drop_coords (
728
- coords_to_drop : set [Hashable ], variables , indexes : Indexes
729
- ) -> tuple [dict , dict ]:
730
- """Drop index variables associated with variables in coords_to_drop."""
731
- # Only warn when we're dropping the dimension with the multi-indexed coordinate
732
- # If asked to drop a subset of the levels in a multi-index, we raise an error
733
- # later but skip the warning here.
734
- new_variables = dict (variables .copy ())
735
- new_indexes = dict (indexes .copy ())
736
- for key in coords_to_drop & set (indexes ):
737
- maybe_midx = indexes [key ]
738
- idx_coord_names = set (indexes .get_all_coords (key ))
739
- if (
740
- isinstance (maybe_midx , PandasMultiIndex )
741
- and key == maybe_midx .dim
742
- and (idx_coord_names - coords_to_drop )
743
- ):
744
- warnings .warn (
745
- f"Updating MultiIndexed coordinate { key !r} would corrupt indices for "
746
- f"other variables: { list (maybe_midx .index .names )!r} . "
747
- f"This will raise an error in the future. Use `.drop_vars({ idx_coord_names !r} )` before "
747
+ def drop_indexed_coords (
748
+ coords_to_drop : set [Hashable ], coords : Coordinates
749
+ ) -> Coordinates :
750
+ """Drop indexed coordinates associated with coordinates in coords_to_drop.
751
+
752
+ This will raise an error in case it corrupts any passed index and its
753
+ coordinate variables.
754
+
755
+ """
756
+ new_variables = dict (coords .variables )
757
+ new_indexes = dict (coords .xindexes )
758
+
759
+ for idx , idx_coords in coords .xindexes .group_by_index ():
760
+ idx_drop_coords = set (idx_coords ) & coords_to_drop
761
+
762
+ # special case for pandas multi-index: still allow but deprecate
763
+ # dropping only its dimension coordinate.
764
+ # TODO: remove when removing PandasMultiIndex's dimension coordinate.
765
+ if isinstance (idx , PandasMultiIndex ) and idx_drop_coords == {idx .dim }:
766
+ idx_drop_coords .update (idx .index .names )
767
+ emit_user_level_warning (
768
+ f"updating coordinate { idx .dim !r} with a PandasMultiIndex would leave "
769
+ f"the multi-index level coordinates { list (idx .index .names )!r} in an inconsistent state. "
770
+ f"This will raise an error in the future. Use `.drop_vars({ list (idx_coords )!r} )` before "
748
771
"assigning new coordinate values." ,
749
772
FutureWarning ,
750
- stacklevel = 4 ,
751
773
)
752
- for k in idx_coord_names :
753
- del new_variables [k ]
754
- del new_indexes [k ]
755
- return new_variables , new_indexes
774
+
775
+ elif idx_drop_coords and len (idx_drop_coords ) != len (idx_coords ):
776
+ idx_drop_coords_str = ", " .join (f"{ k !r} " for k in idx_drop_coords )
777
+ idx_coords_str = ", " .join (f"{ k !r} " for k in idx_coords )
778
+ raise ValueError (
779
+ f"cannot drop or update coordinate(s) { idx_drop_coords_str } , which would corrupt "
780
+ f"the following index built from coordinates { idx_coords_str } :\n "
781
+ f"{ idx } "
782
+ )
783
+
784
+ for k in idx_drop_coords :
785
+ del new_variables [k ]
786
+ del new_indexes [k ]
787
+
788
+ return Coordinates ._construct_direct (coords = new_variables , indexes = new_indexes )
756
789
757
790
758
791
def assert_coordinate_consistent (
@@ -773,11 +806,15 @@ def assert_coordinate_consistent(
773
806
774
807
775
808
def create_coords_with_default_indexes (
776
- coords : Mapping [Any , Any ], data_vars : Mapping [Any , Variable ] | None = None
809
+ coords : Mapping [Any , Any ], data_vars : Mapping [Any , Any ] | None = None
777
810
) -> Coordinates :
778
- """Maybe create default indexes from a mapping of coordinates."""
811
+ """Returns a Coordinates object from a mapping of coordinates (arbitrary objects).
812
+
813
+ Create default (pandas) indexes for each of the input dimension coordinates.
814
+ Extract coordinates from each input DataArray.
779
815
780
- # Note: data_vars are needed here only because a pd.MultiIndex object
816
+ """
817
+ # Note: data_vars is needed here only because a pd.MultiIndex object
781
818
# can be promoted as coordinates.
782
819
# TODO: It won't be relevant anymore when this behavior will be dropped
783
820
# in favor of the more explicit ``Coordinates.from_pandas_multiindex()``.
@@ -791,34 +828,34 @@ def create_coords_with_default_indexes(
791
828
indexes : dict [Hashable , Index ] = {}
792
829
variables : dict [Hashable , Variable ] = {}
793
830
794
- maybe_index_vars : dict [Hashable , Variable ] = {}
795
- mindex_data_vars : list [Hashable ] = []
831
+ # promote any pandas multi-index in data_vars as coordinates
832
+ coords_promoted : dict [Hashable , Any ] = {}
833
+ pd_mindex_keys : list [Hashable ] = []
796
834
797
835
for k , v in all_variables .items ():
798
- if k in coords :
799
- maybe_index_vars [k ] = v
800
- elif isinstance (v , pd .MultiIndex ):
801
- # TODO: eventually stop promoting multi-index passed via data variables
802
- mindex_data_vars .append (k )
803
- maybe_index_vars [k ] = v
804
-
805
- if mindex_data_vars :
806
- warnings .warn (
807
- f"passing one or more `pandas.MultiIndex` via data variable(s) { mindex_data_vars } "
808
- "will no longer create indexed coordinates in the future. "
809
- "If you want to keep this behavior, pass it as coordinates instead." ,
836
+ if isinstance (v , pd .MultiIndex ):
837
+ coords_promoted [k ] = v
838
+ pd_mindex_keys .append (k )
839
+ elif k in coords :
840
+ coords_promoted [k ] = v
841
+
842
+ if pd_mindex_keys :
843
+ pd_mindex_keys_fmt = "," .join ([f"'{ k } '" for k in pd_mindex_keys ])
844
+ emit_user_level_warning (
845
+ f"the `pandas.MultiIndex` object(s) passed as { pd_mindex_keys_fmt } coordinate(s) or "
846
+ "data variable(s) will no longer be implicitly promoted and wrapped into "
847
+ "multiple indexed coordinates in the future "
848
+ "(i.e., one coordinate for each multi-index level + one dimension coordinate). "
849
+ "If you want to keep this behavior, you need to first wrap it explicitly using "
850
+ "`mindex_coords = xarray.Coordinates.from_pandas_multiindex(mindex_obj, 'dim')` "
851
+ "and pass it as coordinates, e.g., `xarray.Dataset(coords=mindex_coords)`, "
852
+ "`dataset.assign_coords(mindex_coords)` or `dataarray.assign_coords(mindex_coords)`." ,
810
853
FutureWarning ,
811
854
)
812
855
813
- maybe_index_vars = {
814
- k : v
815
- for k , v in all_variables .items ()
816
- if k in coords or isinstance (v , pd .MultiIndex )
817
- }
818
-
819
856
dataarray_coords : list [DataArrayCoordinates ] = []
820
857
821
- for name , obj in maybe_index_vars .items ():
858
+ for name , obj in coords_promoted .items ():
822
859
if isinstance (obj , DataArray ):
823
860
dataarray_coords .append (obj .coords )
824
861
0 commit comments