@@ -101,31 +101,22 @@ def __init__(self, obj, group, squeeze=False, grouper=None):
101
101
"""
102
102
from .dataset import as_dataset
103
103
104
+ if getattr (group , 'name' , None ) is None :
105
+ raise ValueError ('`group` must have a name' )
106
+ self ._stacked_dim = None
104
107
if group .ndim != 1 :
105
108
# try to stack the dims of the group into a single dim
106
109
# TODO: figure out how to exclude dimensions from the stacking
107
110
# (e.g. group over space dims but leave time dim intact)
108
111
orig_dims = group .dims
109
112
stacked_dim_name = 'stacked_' + '_' .join (orig_dims )
110
- # the copy is necessary here
113
+ # the copy is necessary here, otherwise read only array raises error
114
+ # in pandas: https://github.com/pydata/pandas/issues/12813
115
+ # Is there a performance penalty for calling copy?
111
116
group = group .stack (** {stacked_dim_name : orig_dims }).copy ()
112
- # without it, an error is raised deep in pandas
113
- ########################
114
- # xarray/core/groupby.py
115
- # ---> 31 inverse, values = pd.factorize(ar, sort=True)
116
- # pandas/core/algorithms.pyc in factorize(values, sort, order, na_sentinel, size_hint)
117
- # --> 196 labels = table.get_labels(vals, uniques, 0, na_sentinel, True)
118
- # pandas/hashtable.pyx in pandas.hashtable.Float64HashTable.get_labels (pandas/hashtable.c:10302)()
119
- # pandas/hashtable.so in View.MemoryView.memoryview_cwrapper (pandas/hashtable.c:29882)()
120
- # pandas/hashtable.so in View.MemoryView.memoryview.__cinit__ (pandas/hashtable.c:26251)()
121
- # ValueError: buffer source array is read-only
122
- #######################
123
- # seems related to
124
- # https://github.com/pydata/pandas/issues/10043
125
- # https://github.com/pydata/pandas/pull/10070
126
117
obj = obj .stack (** {stacked_dim_name : orig_dims })
127
- if getattr ( group , 'name' , None ) is None :
128
- raise ValueError ( '`group` must have a name' )
118
+ self . _stacked_dim = stacked_dim_name
119
+ self . _unstacked_dims = orig_dims
129
120
if not hasattr (group , 'dims' ):
130
121
raise ValueError ("`group` must have a 'dims' attribute" )
131
122
group_dim , = group .dims
@@ -249,6 +240,13 @@ def _maybe_restore_empty_groups(self, combined):
249
240
combined = combined .reindex (** indexers )
250
241
return combined
251
242
243
+ def _maybe_unstack_array (self , arr ):
244
+ """This gets called if we are applying on an array with a
245
+ multidimensional group."""
246
+ if self ._stacked_dim is not None and self ._stacked_dim in arr .dims :
247
+ arr = arr .unstack (self ._stacked_dim )
248
+ return arr
249
+
252
250
def fillna (self , value ):
253
251
"""Fill missing values in this object by group.
254
252
@@ -358,6 +356,11 @@ def lookup_order(dimension):
358
356
new_order = sorted (stacked .dims , key = lookup_order )
359
357
return stacked .transpose (* new_order )
360
358
359
+ def _restore_multiindex (self , combined ):
360
+ if self ._stacked_dim is not None and self ._stacked_dim in combined .dims :
361
+ combined [self ._stacked_dim ] = self .group [self ._stacked_dim ]
362
+ return combined
363
+
361
364
def apply (self , func , shortcut = False , ** kwargs ):
362
365
"""Apply a function over each array in the group and concatenate them
363
366
together into a new array.
@@ -399,23 +402,23 @@ def apply(self, func, shortcut=False, **kwargs):
399
402
grouped = self ._iter_grouped_shortcut ()
400
403
else :
401
404
grouped = self ._iter_grouped ()
402
- applied = (maybe_wrap_array (arr , func (arr , ** kwargs )) for arr in grouped )
405
+ applied = (maybe_wrap_array (arr ,func (arr , ** kwargs )) for arr in grouped )
403
406
combined = self ._concat (applied , shortcut = shortcut )
404
- result = self ._maybe_restore_empty_groups (combined )
407
+ result = self ._maybe_restore_empty_groups (
408
+ self ._maybe_unstack_array (combined ))
405
409
return result
406
410
407
411
def _concat (self , applied , shortcut = False ):
408
412
# peek at applied to determine which coordinate to stack over
409
413
applied_example , applied = peek_at (applied )
410
414
concat_dim , positions = self ._infer_concat_args (applied_example )
411
-
412
415
if shortcut :
413
416
combined = self ._concat_shortcut (applied , concat_dim , positions )
414
417
else :
415
418
combined = concat (applied , concat_dim , positions = positions )
416
-
417
419
if isinstance (combined , type (self .obj )):
418
420
combined = self ._restore_dim_order (combined )
421
+ combined = self ._restore_multiindex (combined )
419
422
return combined
420
423
421
424
def reduce (self , func , dim = None , axis = None , keep_attrs = False ,
0 commit comments