Skip to content

Commit 2995ad7

Browse files
author
darothen
committed
Tweak auxiliary groupby apply and reduce methods
1 parent 70f332f commit 2995ad7

File tree

2 files changed

+115
-17
lines changed

2 files changed

+115
-17
lines changed

xarray/core/common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,6 @@ def resample(self, freq=None, dim=None, how='mean', skipna=None,
573573
.. [1] http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
574574
"""
575575
from .dataarray import DataArray
576-
RESAMPLE_DIM = '__resample_dim__'
577576

578577
if dim is not None:
579578
return self._resample_immediately(freq, dim, how, skipna, closed,
@@ -590,15 +589,16 @@ def resample(self, freq=None, dim=None, how='mean', skipna=None,
590589
if isinstance(dim, basestring):
591590
dim_name = dim
592591
dim = self[dim]
592+
resample_dim = "resampled_" + dim_name
593593
else:
594594
raise ValueError("Dimension name should be a string; "
595595
"was passed %r" % dim)
596-
group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
596+
group = DataArray(dim, [(dim.dims, dim)], name=resample_dim)
597597
time_grouper = pd.TimeGrouper(freq=freq, closed=closed,
598598
label=label, base=base)
599599
resampler = self.resample_cls(self, group=group, dim=dim_name,
600600
grouper=time_grouper,
601-
resample_dim=RESAMPLE_DIM)
601+
resample_dim=resample_dim)
602602

603603
return resampler
604604

xarray/core/groupby.py

Lines changed: 112 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,9 @@ def _combine(self, applied, shortcut=False):
531531
combined = self._concat_shortcut(applied, dim, positions)
532532
else:
533533
combined = concat(applied, dim)
534-
combined = _maybe_reorder(combined, dim, positions)
534+
print(combined[dim])
535+
#combined = _maybe_reorder(combined, dim, positions)
536+
print(combined[dim])
535537

536538
if isinstance(combined, type(self._obj)):
537539
# only restore dimension order for arrays
@@ -584,18 +586,79 @@ def reduce_array(ar):
584586

585587
RESAMPLE_DIM = '__resample_dim__'
586588
class DataArrayResample(DataArrayGroupBy):
587-
"""DataArrayGroupBy object specialized to resampling a specified dimension
589+
"""DataArrayGroupBy object specialized to time resampling operations over a
590+
specified dimension
588591
"""
589592

590-
def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
591-
self._dim = dim
592-
self._resample_dim = resample_dim
593-
if dim == resample_dim:
593+
def __init__(self, *args, **kwargs):
594+
595+
self._dim = kwargs.pop('dim', None)
596+
self._resample_dim = kwargs.pop('resample_dim', None)
597+
598+
if self._dim == self._resample_dim:
594599
raise ValueError("Proxy resampling dimension ('{_resample_dim}') "
595600
"cannot have the same name as actual dimension "
596601
"('{_dim}')! ".format(self))
597602
super(DataArrayResample, self).__init__(*args, **kwargs)
598603

604+
605+
def apply(self, func, shortcut=False, **kwargs):
606+
"""Apply a function over each array in the group and concatenate them
607+
together into a new array.
608+
609+
`func` is called like `func(ar, *args, **kwargs)` for each array `ar`
610+
in this group.
611+
612+
Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
613+
to stack together the array. The rule is:
614+
1. If the dimension along which the group coordinate is defined is
615+
still in the first grouped array after applying `func`, then stack
616+
over this dimension.
617+
2. Otherwise, stack over the new dimension given by name of this
618+
grouping (the argument to the `groupby` function).
619+
620+
Parameters
621+
----------
622+
func : function
623+
Callable to apply to each array.
624+
shortcut : bool, optional
625+
Whether or not to shortcut evaluation under the assumptions that:
626+
(1) The action of `func` does not depend on any of the array
627+
metadata (attributes or coordinates) but only on the data and
628+
dimensions.
629+
(2) The action of `func` creates arrays with homogeneous metadata,
630+
that is, with the same dimensions and attributes.
631+
If these conditions are satisfied `shortcut` provides significant
632+
speedup. This should be the case for many common groupby operations
633+
(e.g., applying numpy ufuncs).
634+
**kwargs
635+
Used to call `func(ar, **kwargs)` for each array `ar`.
636+
637+
Returns
638+
-------
639+
applied : DataArray or DataArray
640+
The result of splitting, applying and combining this array.
641+
"""
642+
if shortcut:
643+
grouped = self._iter_grouped_shortcut()
644+
else:
645+
grouped = self._iter_grouped()
646+
applied = (maybe_wrap_array(arr, func(arr, **kwargs))
647+
for arr in grouped)
648+
combined = self._combine(applied, shortcut=shortcut)
649+
650+
# If the aggregation function didn't drop the original resampling
651+
# dimension, then we need to do so before we can rename the proxy
652+
# dimension we used.
653+
if self._dim in combined:
654+
combined = combined.drop(self._dim)
655+
656+
if self._resample_dim in combined.dims:
657+
combined = combined.rename({self._resample_dim: self._dim})
658+
659+
return combined
660+
661+
599662
def reduce(self, func, dim=None, axis=None, shortcut=True,
600663
keep_attrs=False, **kwargs):
601664
"""Reduce the items in this group by applying `func` along the
@@ -628,9 +691,9 @@ def reduce(self, func, dim=None, axis=None, shortcut=True,
628691
def reduce_array(ar):
629692
return ar.reduce(func, self._dim, axis=None, keep_attrs=keep_attrs,
630693
**kwargs)
631-
result = self.apply(reduce_array, shortcut=shortcut)
694+
return self.apply(reduce_array, shortcut=shortcut)
632695

633-
return result.rename({self._resample_dim: self._dim})
696+
# return result.rename({self._resample_dim: self._dim})
634697

635698
ops.inject_reduce_methods(DataArrayResample)
636699
ops.inject_binary_ops(DataArrayResample)
@@ -730,15 +793,50 @@ class DatasetResample(DatasetGroupBy):
730793
"""DatasetGroupBy object specialized to resampling a specified dimension
731794
"""
732795

733-
def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
734-
self._dim = dim
735-
self._resample_dim = resample_dim
736-
if dim == resample_dim:
796+
def __init__(self, *args, **kwargs):
797+
798+
self._dim = kwargs.pop('dim', None)
799+
self._resample_dim = kwargs.pop('resample_dim', None)
800+
801+
if self._dim == self._resample_dim:
737802
raise ValueError("Proxy resampling dimension ('{_resample_dim}') "
738803
"cannot have the same name as actual dimension "
739804
"('{_dim}')! ".format(self))
740805
super(DatasetResample, self).__init__(*args, **kwargs)
741806

807+
def apply(self, func, **kwargs):
808+
"""Apply a function over each Dataset in the groups generated for
809+
resampling and concatenate them together into a new Dataset.
810+
811+
`func` is called like `func(ds, *args, **kwargs)` for each dataset `ds`
812+
in this group.
813+
814+
Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
815+
to stack together the datasets. The rule is:
816+
1. If the dimension along which the group coordinate is defined is
817+
still in the first grouped item after applying `func`, then stack
818+
over this dimension.
819+
2. Otherwise, stack over the new dimension given by name of this
820+
grouping (the argument to the `groupby` function).
821+
822+
Parameters
823+
----------
824+
func : function
825+
Callable to apply to each sub-dataset.
826+
**kwargs
827+
Used to call `func(ds, **kwargs)` for each sub-dataset `ar`.
828+
829+
Returns
830+
-------
831+
applied : Dataset or DataArray
832+
The result of splitting, applying and combining this dataset.
833+
"""
834+
kwargs.pop('shortcut', None) # ignore shortcut if set (for now)
835+
applied = (func(ds, **kwargs) for ds in self._iter_grouped())
836+
combined = self._combine(applied)
837+
838+
return combined.rename({self._resample_dim: self._dim})
839+
742840
def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
743841
"""Reduce the items in this group by applying `func` along the
744842
pre-defined resampling dimension.
@@ -768,9 +866,9 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
768866

769867
def reduce_dataset(ds):
770868
return ds.reduce(func, self._dim, keep_attrs=keep_attrs, **kwargs)
771-
result = self.apply(reduce_dataset)
869+
return self.apply(reduce_dataset)
772870

773-
return result.rename({self._resample_dim: self._dim})
871+
# return result.rename({self._resample_dim: self._dim})
774872

775873
ops.inject_reduce_methods(DatasetResample)
776874
ops.inject_binary_ops(DatasetResample)

0 commit comments

Comments
 (0)