1
1
import itertools
2
+ import warnings
2
3
from collections import Counter
3
4
4
5
import pandas as pd
8
9
from .dataarray import DataArray
9
10
from .dataset import Dataset
10
11
from .merge import merge
12
+ from .utils import iterate_nested
11
13
12
14
13
15
def _infer_concat_order_from_positions (datasets ):
@@ -544,6 +546,15 @@ def combine_nested(
544
546
concat
545
547
merge
546
548
"""
549
+ mixed_datasets_and_arrays = any (
550
+ isinstance (obj , Dataset ) for obj in iterate_nested (datasets )
551
+ ) and any (
552
+ isinstance (obj , DataArray ) and obj .name is None
553
+ for obj in iterate_nested (datasets )
554
+ )
555
+ if mixed_datasets_and_arrays :
556
+ raise ValueError ("Can't combine datasets with unnamed arrays." )
557
+
547
558
if isinstance (concat_dim , (str , DataArray )) or concat_dim is None :
548
559
concat_dim = [concat_dim ]
549
560
@@ -565,18 +576,79 @@ def vars_as_keys(ds):
565
576
return tuple (sorted (ds ))
566
577
567
578
568
- def combine_by_coords (
579
+ def _combine_single_variable_hypercube (
569
580
datasets ,
581
+ fill_value = dtypes .NA ,
582
+ data_vars = "all" ,
583
+ coords = "different" ,
584
+ compat = "no_conflicts" ,
585
+ join = "outer" ,
586
+ combine_attrs = "no_conflicts" ,
587
+ ):
588
+ """
589
+ Attempt to combine a list of Datasets into a hypercube using their
590
+ coordinates.
591
+
592
+ All provided Datasets must belong to a single variable, ie. must be
593
+ assigned the same variable name. This precondition is not checked by this
594
+ function, so the caller is assumed to know what it's doing.
595
+
596
+ This function is NOT part of the public API.
597
+ """
598
+ if len (datasets ) == 0 :
599
+ raise ValueError (
600
+ "At least one Dataset is required to resolve variable names "
601
+ "for combined hypercube."
602
+ )
603
+
604
+ combined_ids , concat_dims = _infer_concat_order_from_coords (list (datasets ))
605
+
606
+ if fill_value is None :
607
+ # check that datasets form complete hypercube
608
+ _check_shape_tile_ids (combined_ids )
609
+ else :
610
+ # check only that all datasets have same dimension depth for these
611
+ # vars
612
+ _check_dimension_depth_tile_ids (combined_ids )
613
+
614
+ # Concatenate along all of concat_dims one by one to create single ds
615
+ concatenated = _combine_nd (
616
+ combined_ids ,
617
+ concat_dims = concat_dims ,
618
+ data_vars = data_vars ,
619
+ coords = coords ,
620
+ compat = compat ,
621
+ fill_value = fill_value ,
622
+ join = join ,
623
+ combine_attrs = combine_attrs ,
624
+ )
625
+
626
+ # Check the overall coordinates are monotonically increasing
627
+ for dim in concat_dims :
628
+ indexes = concatenated .indexes .get (dim )
629
+ if not (indexes .is_monotonic_increasing or indexes .is_monotonic_decreasing ):
630
+ raise ValueError (
631
+ "Resulting object does not have monotonic"
632
+ " global indexes along dimension {}" .format (dim )
633
+ )
634
+
635
+ return concatenated
636
+
637
+
638
+ # TODO remove empty list default param after version 0.19, see PR4696
639
+ def combine_by_coords (
640
+ data_objects = [],
570
641
compat = "no_conflicts" ,
571
642
data_vars = "all" ,
572
643
coords = "different" ,
573
644
fill_value = dtypes .NA ,
574
645
join = "outer" ,
575
646
combine_attrs = "no_conflicts" ,
647
+ datasets = None ,
576
648
):
577
649
"""
578
- Attempt to auto-magically combine the given datasets into one by using
579
- dimension coordinates.
650
+ Attempt to auto-magically combine the given datasets (or data arrays)
651
+ into one by using dimension coordinates.
580
652
581
653
This method attempts to combine a group of datasets along any number of
582
654
dimensions into a single entity by inspecting coords and metadata and using
@@ -600,8 +672,9 @@ def combine_by_coords(
600
672
601
673
Parameters
602
674
----------
603
- datasets : sequence of xarray.Dataset
604
- Dataset objects to combine.
675
+ data_objects : sequence of xarray.Dataset or sequence of xarray.DataArray
676
+ Data objects to combine.
677
+
605
678
compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional
606
679
String indicating how to compare variables of the same name for
607
680
potential conflicts:
@@ -776,51 +849,62 @@ def combine_by_coords(
776
849
precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
777
850
"""
778
851
779
- # Group by data vars
780
- sorted_datasets = sorted (datasets , key = vars_as_keys )
781
- grouped_by_vars = itertools .groupby (sorted_datasets , key = vars_as_keys )
782
-
783
- # Perform the multidimensional combine on each group of data variables
784
- # before merging back together
785
- concatenated_grouped_by_data_vars = []
786
- for vars , datasets_with_same_vars in grouped_by_vars :
787
- combined_ids , concat_dims = _infer_concat_order_from_coords (
788
- list (datasets_with_same_vars )
852
+ # TODO remove after version 0.19, see PR4696
853
+ if datasets is not None :
854
+ warnings .warn (
855
+ "The datasets argument has been renamed to `data_objects`."
856
+ " In future passing a value for datasets will raise an error."
789
857
)
858
+ data_objects = datasets
790
859
791
- if fill_value is None :
792
- # check that datasets form complete hypercube
793
- _check_shape_tile_ids (combined_ids )
794
- else :
795
- # check only that all datasets have same dimension depth for these
796
- # vars
797
- _check_dimension_depth_tile_ids (combined_ids )
860
+ if not data_objects :
861
+ return Dataset ()
798
862
799
- # Concatenate along all of concat_dims one by one to create single ds
800
- concatenated = _combine_nd (
801
- combined_ids ,
802
- concat_dims = concat_dims ,
863
+ mixed_arrays_and_datasets = any (
864
+ isinstance (data_object , DataArray ) and data_object .name is None
865
+ for data_object in data_objects
866
+ ) and any (isinstance (data_object , Dataset ) for data_object in data_objects )
867
+ if mixed_arrays_and_datasets :
868
+ raise ValueError ("Can't automatically combine datasets with unnamed arrays." )
869
+
870
+ all_unnamed_data_arrays = all (
871
+ isinstance (data_object , DataArray ) and data_object .name is None
872
+ for data_object in data_objects
873
+ )
874
+ if all_unnamed_data_arrays :
875
+ unnamed_arrays = data_objects
876
+ temp_datasets = [data_array ._to_temp_dataset () for data_array in unnamed_arrays ]
877
+
878
+ combined_temp_dataset = _combine_single_variable_hypercube (
879
+ temp_datasets ,
880
+ fill_value = fill_value ,
803
881
data_vars = data_vars ,
804
882
coords = coords ,
805
883
compat = compat ,
806
- fill_value = fill_value ,
807
884
join = join ,
808
885
combine_attrs = combine_attrs ,
809
886
)
887
+ return DataArray ()._from_temp_dataset (combined_temp_dataset )
810
888
811
- # Check the overall coordinates are monotonically increasing
812
- # TODO (benbovy - flexible indexes): only with pandas.Index?
813
- for dim in concat_dims :
814
- indexes = concatenated .xindexes .get (dim )
815
- if not (
816
- indexes .array .is_monotonic_increasing
817
- or indexes .array .is_monotonic_decreasing
818
- ):
819
- raise ValueError (
820
- "Resulting object does not have monotonic"
821
- " global indexes along dimension {}" .format (dim )
822
- )
823
- concatenated_grouped_by_data_vars .append (concatenated )
889
+ else :
890
+ # Group by data vars
891
+ sorted_datasets = sorted (data_objects , key = vars_as_keys )
892
+ grouped_by_vars = itertools .groupby (sorted_datasets , key = vars_as_keys )
893
+
894
+ # Perform the multidimensional combine on each group of data variables
895
+ # before merging back together
896
+ concatenated_grouped_by_data_vars = []
897
+ for vars , datasets_with_same_vars in grouped_by_vars :
898
+ concatenated = _combine_single_variable_hypercube (
899
+ list (datasets_with_same_vars ),
900
+ fill_value = fill_value ,
901
+ data_vars = data_vars ,
902
+ coords = coords ,
903
+ compat = compat ,
904
+ join = join ,
905
+ combine_attrs = combine_attrs ,
906
+ )
907
+ concatenated_grouped_by_data_vars .append (concatenated )
824
908
825
909
return merge (
826
910
concatenated_grouped_by_data_vars ,
0 commit comments