Skip to content

Commit 3a05c50

Browse files
author
darothen
committed
Tweak auxiliary groupby apply and reduce methods
1 parent 4f70131 commit 3a05c50

File tree

2 files changed

+115
-17
lines changed

2 files changed

+115
-17
lines changed

xarray/core/common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,6 @@ def resample(self, freq=None, dim=None, how='mean', skipna=None,
542542
.. [1] http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
543543
"""
544544
from .dataarray import DataArray
545-
RESAMPLE_DIM = '__resample_dim__'
546545

547546
if dim is not None:
548547
return self._resample_immediately(freq, dim, how, skipna, closed,
@@ -559,15 +558,16 @@ def resample(self, freq=None, dim=None, how='mean', skipna=None,
559558
if isinstance(dim, basestring):
560559
dim_name = dim
561560
dim = self[dim]
561+
resample_dim = "resampled_" + dim_name
562562
else:
563563
raise ValueError("Dimension name should be a string; "
564564
"was passed %r" % dim)
565-
group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
565+
group = DataArray(dim, [(dim.dims, dim)], name=resample_dim)
566566
time_grouper = pd.TimeGrouper(freq=freq, closed=closed,
567567
label=label, base=base)
568568
resampler = self.resample_cls(self, group=group, dim=dim_name,
569569
grouper=time_grouper,
570-
resample_dim=RESAMPLE_DIM)
570+
resample_dim=resample_dim)
571571

572572
return resampler
573573

xarray/core/groupby.py

Lines changed: 112 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,9 @@ def _combine(self, applied, shortcut=False):
532532
combined = self._concat_shortcut(applied, dim, positions)
533533
else:
534534
combined = concat(applied, dim)
535-
combined = _maybe_reorder(combined, dim, positions)
535+
print(combined[dim])
536+
#combined = _maybe_reorder(combined, dim, positions)
537+
print(combined[dim])
536538

537539
if isinstance(combined, type(self._obj)):
538540
# only restore dimension order for arrays
@@ -585,18 +587,79 @@ def reduce_array(ar):
585587

586588
RESAMPLE_DIM = '__resample_dim__'
587589
class DataArrayResample(DataArrayGroupBy):
588-
"""DataArrayGroupBy object specialized to resampling a specified dimension
590+
"""DataArrayGroupBy object specialized to time resampling operations over a
591+
specified dimension
589592
"""
590593

591-
def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
592-
self._dim = dim
593-
self._resample_dim = resample_dim
594-
if dim == resample_dim:
594+
def __init__(self, *args, **kwargs):
595+
596+
self._dim = kwargs.pop('dim', None)
597+
self._resample_dim = kwargs.pop('resample_dim', None)
598+
599+
if self._dim == self._resample_dim:
595600
raise ValueError("Proxy resampling dimension ('{_resample_dim}') "
596601
"cannot have the same name as actual dimension "
597602
"('{_dim}')! ".format(self))
598603
super(DataArrayResample, self).__init__(*args, **kwargs)
599604

605+
606+
def apply(self, func, shortcut=False, **kwargs):
607+
"""Apply a function over each array in the group and concatenate them
608+
together into a new array.
609+
610+
`func` is called like `func(ar, *args, **kwargs)` for each array `ar`
611+
in this group.
612+
613+
Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
614+
to stack together the array. The rule is:
615+
1. If the dimension along which the group coordinate is defined is
616+
still in the first grouped array after applying `func`, then stack
617+
over this dimension.
618+
2. Otherwise, stack over the new dimension given by name of this
619+
grouping (the argument to the `groupby` function).
620+
621+
Parameters
622+
----------
623+
func : function
624+
Callable to apply to each array.
625+
shortcut : bool, optional
626+
Whether or not to shortcut evaluation under the assumptions that:
627+
(1) The action of `func` does not depend on any of the array
628+
metadata (attributes or coordinates) but only on the data and
629+
dimensions.
630+
(2) The action of `func` creates arrays with homogeneous metadata,
631+
that is, with the same dimensions and attributes.
632+
If these conditions are satisfied `shortcut` provides significant
633+
speedup. This should be the case for many common groupby operations
634+
(e.g., applying numpy ufuncs).
635+
**kwargs
636+
Used to call `func(ar, **kwargs)` for each array `ar`.
637+
638+
Returns
639+
-------
640+
applied : DataArray or DataArray
641+
The result of splitting, applying and combining this array.
642+
"""
643+
if shortcut:
644+
grouped = self._iter_grouped_shortcut()
645+
else:
646+
grouped = self._iter_grouped()
647+
applied = (maybe_wrap_array(arr, func(arr, **kwargs))
648+
for arr in grouped)
649+
combined = self._combine(applied, shortcut=shortcut)
650+
651+
# If the aggregation function didn't drop the original resampling
652+
# dimension, then we need to do so before we can rename the proxy
653+
# dimension we used.
654+
if self._dim in combined:
655+
combined = combined.drop(self._dim)
656+
657+
if self._resample_dim in combined.dims:
658+
combined = combined.rename({self._resample_dim: self._dim})
659+
660+
return combined
661+
662+
600663
def reduce(self, func, dim=None, axis=None, shortcut=True,
601664
keep_attrs=False, **kwargs):
602665
"""Reduce the items in this group by applying `func` along the
@@ -629,9 +692,9 @@ def reduce(self, func, dim=None, axis=None, shortcut=True,
629692
def reduce_array(ar):
630693
return ar.reduce(func, self._dim, axis=None, keep_attrs=keep_attrs,
631694
**kwargs)
632-
result = self.apply(reduce_array, shortcut=shortcut)
695+
return self.apply(reduce_array, shortcut=shortcut)
633696

634-
return result.rename({self._resample_dim: self._dim})
697+
# return result.rename({self._resample_dim: self._dim})
635698

636699
ops.inject_reduce_methods(DataArrayResample)
637700
ops.inject_binary_ops(DataArrayResample)
@@ -731,15 +794,50 @@ class DatasetResample(DatasetGroupBy):
731794
"""DatasetGroupBy object specialized to resampling a specified dimension
732795
"""
733796

734-
def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
735-
self._dim = dim
736-
self._resample_dim = resample_dim
737-
if dim == resample_dim:
797+
def __init__(self, *args, **kwargs):
798+
799+
self._dim = kwargs.pop('dim', None)
800+
self._resample_dim = kwargs.pop('resample_dim', None)
801+
802+
if self._dim == self._resample_dim:
738803
raise ValueError("Proxy resampling dimension ('{_resample_dim}') "
739804
"cannot have the same name as actual dimension "
740805
"('{_dim}')! ".format(self))
741806
super(DatasetResample, self).__init__(*args, **kwargs)
742807

808+
def apply(self, func, **kwargs):
809+
"""Apply a function over each Dataset in the groups generated for
810+
resampling and concatenate them together into a new Dataset.
811+
812+
`func` is called like `func(ds, *args, **kwargs)` for each dataset `ds`
813+
in this group.
814+
815+
Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
816+
to stack together the datasets. The rule is:
817+
1. If the dimension along which the group coordinate is defined is
818+
still in the first grouped item after applying `func`, then stack
819+
over this dimension.
820+
2. Otherwise, stack over the new dimension given by name of this
821+
grouping (the argument to the `groupby` function).
822+
823+
Parameters
824+
----------
825+
func : function
826+
Callable to apply to each sub-dataset.
827+
**kwargs
828+
Used to call `func(ds, **kwargs)` for each sub-dataset `ar`.
829+
830+
Returns
831+
-------
832+
applied : Dataset or DataArray
833+
The result of splitting, applying and combining this dataset.
834+
"""
835+
kwargs.pop('shortcut', None) # ignore shortcut if set (for now)
836+
applied = (func(ds, **kwargs) for ds in self._iter_grouped())
837+
combined = self._combine(applied)
838+
839+
return combined.rename({self._resample_dim: self._dim})
840+
743841
def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
744842
"""Reduce the items in this group by applying `func` along the
745843
pre-defined resampling dimension.
@@ -769,9 +867,9 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
769867

770868
def reduce_dataset(ds):
771869
return ds.reduce(func, self._dim, keep_attrs=keep_attrs, **kwargs)
772-
result = self.apply(reduce_dataset)
870+
return self.apply(reduce_dataset)
773871

774-
return result.rename({self._resample_dim: self._dim})
872+
# return result.rename({self._resample_dim: self._dim})
775873

776874
ops.inject_reduce_methods(DatasetResample)
777875
ops.inject_binary_ops(DatasetResample)

0 commit comments

Comments
 (0)