3
3
from __future__ import print_function
4
4
from datetime import timedelta
5
5
from collections import defaultdict , Hashable
6
+ import functools
6
7
import operator
7
8
import numpy as np
8
9
import pandas as pd
9
10
10
11
from . import nputils
11
12
from . import utils
13
+ from . import duck_array_ops
12
14
from .pycompat import (iteritems , range , integer_types , dask_array_type ,
13
15
suppress )
14
16
from .utils import is_dict_like
@@ -589,27 +591,23 @@ def as_indexable(array):
589
591
raise TypeError ('Invalid array type: {}' .format (type (array )))
590
592
591
593
592
- def _outer_to_numpy_indexer (key , shape ):
593
- """Convert an OuterIndexer into an indexer for NumPy .
594
+ def _outer_to_vectorized_indexer (key , shape ):
595
+ """Convert an OuterIndexer into an vectorized indexer .
594
596
595
597
Parameters
596
598
----------
597
599
key : tuple
598
- Outer indexing tuple to convert.
600
+ Tuple from an OuterIndexer to convert.
599
601
shape : tuple
600
602
Shape of the array subject to the indexing.
601
603
602
604
Returns
603
605
-------
604
606
tuple
605
- Base tuple suitable for use to index a NumPy array.
607
+ Tuple suitable for use to index a NumPy array with vectorized indexing.
608
+ Each element is an integer or array: broadcasting them together gives
609
+ the shape of the result.
606
610
"""
607
- if len ([k for k in key if not isinstance (k , slice )]) <= 1 :
608
- # If there is only one vector and all others are slice,
609
- # it can be safely used in mixed basic/advanced indexing.
610
- # Boolean index should already be converted to integer array.
611
- return tuple (key )
612
-
613
611
n_dim = len ([k for k in key if not isinstance (k , integer_types )])
614
612
i_dim = 0
615
613
new_key = []
@@ -627,6 +625,149 @@ def _outer_to_numpy_indexer(key, shape):
627
625
return tuple (new_key )
628
626
629
627
628
+ def _outer_to_numpy_indexer (key , shape ):
629
+ """Convert an OuterIndexer into an indexer for NumPy.
630
+
631
+ Parameters
632
+ ----------
633
+ key : tuple
634
+ Tuple from an OuterIndexer to convert.
635
+ shape : tuple
636
+ Shape of the array subject to the indexing.
637
+
638
+ Returns
639
+ -------
640
+ tuple
641
+ Tuple suitable for use to index a NumPy array.
642
+ """
643
+ if len ([k for k in key if not isinstance (k , slice )]) <= 1 :
644
+ # If there is only one vector and all others are slice,
645
+ # it can be safely used in mixed basic/advanced indexing.
646
+ # Boolean index should already be converted to integer array.
647
+ return tuple (key )
648
+ else :
649
+ return _outer_to_vectorized_indexer (key , shape )
650
+
651
+
652
+ def _dask_array_with_chunks_hint (array , chunks ):
653
+ """Create a dask array using the chunks hint for dimensions of size > 1."""
654
+ import dask .array as da
655
+ if len (chunks ) < array .ndim :
656
+ raise ValueError ('not enough chunks in hint' )
657
+ new_chunks = []
658
+ for chunk , size in zip (chunks , array .shape ):
659
+ new_chunks .append (chunk if size > 1 else (1 ,))
660
+ return da .from_array (array , new_chunks )
661
+
662
+
663
+ def _logical_any (args ):
664
+ return functools .reduce (operator .or_ , args )
665
+
666
+
667
+ def _masked_result_drop_slice (key , chunks_hint = None ):
668
+ key = (k for k in key if not isinstance (k , slice ))
669
+ if chunks_hint is not None :
670
+ key = [_dask_array_with_chunks_hint (k , chunks_hint )
671
+ if isinstance (k , np .ndarray ) else k
672
+ for k in key ]
673
+ return _logical_any (k == - 1 for k in key )
674
+
675
+
676
+ def create_mask (indexer , shape , chunks_hint = None ):
677
+ """Create a mask for indexing with a fill-value.
678
+
679
+ Parameters
680
+ ----------
681
+ indexer : ExplicitIndexer
682
+ Indexer with -1 in integer or ndarray value to indicate locations in
683
+ the result that should be masked.
684
+ shape : tuple
685
+ Shape of the array being indexed.
686
+ chunks_hint : tuple, optional
687
+ Optional tuple indicating desired chunks for the result. If provided,
688
+ used as a hint for chunks on the resulting dask. Must have a hint for
689
+ each dimension on the result array.
690
+
691
+ Returns
692
+ -------
693
+ mask : bool, np.ndarray or dask.array.Array with dtype=bool
694
+ Dask array if chunks_hint is provided, otherwise a NumPy array. Has the
695
+ same shape as the indexing result.
696
+ """
697
+ if isinstance (indexer , OuterIndexer ):
698
+ key = _outer_to_vectorized_indexer (indexer .tuple , shape )
699
+ assert not any (isinstance (k , slice ) for k in key )
700
+ mask = _masked_result_drop_slice (key , chunks_hint )
701
+
702
+ elif isinstance (indexer , VectorizedIndexer ):
703
+ key = indexer .tuple
704
+ base_mask = _masked_result_drop_slice (key , chunks_hint )
705
+ slice_shape = tuple (np .arange (* k .indices (size )).size
706
+ for k , size in zip (key , shape )
707
+ if isinstance (k , slice ))
708
+ expanded_mask = base_mask [
709
+ (Ellipsis ,) + (np .newaxis ,) * len (slice_shape )]
710
+ mask = duck_array_ops .broadcast_to (
711
+ expanded_mask , base_mask .shape + slice_shape )
712
+
713
+ elif isinstance (indexer , BasicIndexer ):
714
+ mask = any (k == - 1 for k in indexer .tuple )
715
+
716
+ else :
717
+ raise TypeError ('unexpected key type: {}' .format (type (indexer )))
718
+
719
+ return mask
720
+
721
+
722
+ def _posify_mask_subindexer (index ):
723
+ """Convert masked indices in a flat array to the nearest unmasked index.
724
+
725
+ Parameters
726
+ ----------
727
+ index : np.ndarray
728
+ One dimensional ndarray with dtype=int.
729
+
730
+ Returns
731
+ -------
732
+ np.ndarray
733
+ One dimensional ndarray with all values equal to -1 replaced by an
734
+ adjacent non-masked element.
735
+ """
736
+ masked = index == - 1
737
+ unmasked_locs = np .flatnonzero (~ masked )
738
+ if not unmasked_locs .size :
739
+ # indexing unmasked_locs is invalid
740
+ return np .zeros_like (index )
741
+ masked_locs = np .flatnonzero (masked )
742
+ prev_value = np .maximum (0 , np .searchsorted (unmasked_locs , masked_locs ) - 1 )
743
+ new_index = index .copy ()
744
+ new_index [masked_locs ] = index [unmasked_locs [prev_value ]]
745
+ return new_index
746
+
747
+
748
+ def posify_mask_indexer (indexer ):
749
+ """Convert masked values (-1) in an indexer to nearest unmasked values.
750
+
751
+ This routine is useful for dask, where it can be much faster to index
752
+ adjacent points than arbitrary points from the end of an array.
753
+
754
+ Parameters
755
+ ----------
756
+ indexer : ExplicitIndexer
757
+ Input indexer.
758
+
759
+ Returns
760
+ -------
761
+ ExplicitIndexer
762
+ Same type of input, with all values in ndarray keys equal to -1
763
+ replaced by an adjacent non-masked element.
764
+ """
765
+ key = tuple (_posify_mask_subindexer (k .ravel ()).reshape (k .shape )
766
+ if isinstance (k , np .ndarray ) else k
767
+ for k in indexer .tuple )
768
+ return type (indexer )(key )
769
+
770
+
630
771
class NumpyIndexingAdapter (ExplicitlyIndexedNDArrayMixin ):
631
772
"""Wrap a NumPy array to use explicit indexing."""
632
773
0 commit comments