7
7
algos as libalgos , join as libjoin ,
8
8
Timestamp , Timedelta , )
9
9
from pandas ._libs .lib import is_datetime_array
10
+ from pandas ._libs .tslibs import parsing
10
11
11
12
from pandas .compat import range , u
12
13
from pandas .compat .numpy import function as nv
27
28
is_integer ,
28
29
is_float ,
29
30
is_dtype_equal ,
31
+ is_dtype_union_equal ,
30
32
is_object_dtype ,
31
33
is_categorical_dtype ,
32
34
is_interval_dtype ,
40
42
needs_i8_conversion ,
41
43
is_iterator , is_list_like ,
42
44
is_scalar )
43
- from pandas .core .common import (is_bool_indexer ,
44
- _values_from_object ,
45
- _asarray_tuplesafe )
45
+ from pandas .core .common import (is_bool_indexer , _values_from_object ,
46
+ _asarray_tuplesafe , _not_none ,
47
+ _index_labels_to_array )
46
48
47
49
from pandas .core .base import PandasObject , IndexOpsMixin
48
50
import pandas .core .base as base
49
51
from pandas .util ._decorators import (
50
52
Appender , Substitution , cache_readonly , deprecate_kwarg )
51
53
from pandas .core .indexes .frozen import FrozenList
52
- import pandas .core .common as com
53
54
import pandas .core .dtypes .concat as _concat
54
55
import pandas .core .missing as missing
55
56
import pandas .core .algorithms as algos
56
57
import pandas .core .sorting as sorting
57
58
from pandas .io .formats .printing import pprint_thing
58
59
from pandas .core .ops import _comp_method_OBJECT_ARRAY
59
- from pandas .core import strings
60
+ from pandas .core import strings , accessor
60
61
from pandas .core .config import get_option
61
62
62
63
@@ -121,6 +122,23 @@ class Index(IndexOpsMixin, PandasObject):
121
122
Notes
122
123
-----
123
124
An Index instance can **only** contain hashable objects
125
+
126
+ Examples
127
+ --------
128
+ >>> pd.Index([1, 2, 3])
129
+ Int64Index([1, 2, 3], dtype='int64')
130
+
131
+ >>> pd.Index(list('abc'))
132
+ Index(['a', 'b', 'c'], dtype='object')
133
+
134
+ See Also
135
+ ---------
136
+ RangeIndex : Index implementing a monotonic integer range
137
+ CategoricalIndex : Index of :class:`Categorical` s.
138
+ MultiIndex : A multi-level, or hierarchical, Index
139
+ IntervalIndex : an Index of :class:`Interval` s.
140
+ DatetimeIndex, TimedeltaIndex, PeriodIndex
141
+ Int64Index, UInt64Index, Float64Index
124
142
"""
125
143
# To hand over control to subclasses
126
144
_join_precedence = 1
@@ -158,7 +176,7 @@ class Index(IndexOpsMixin, PandasObject):
158
176
_accessors = frozenset (['str' ])
159
177
160
178
# String Methods
161
- str = base .AccessorProperty (strings .StringMethods )
179
+ str = accessor .AccessorProperty (strings .StringMethods )
162
180
163
181
def __new__ (cls , data = None , dtype = None , copy = False , name = None ,
164
182
fastpath = False , tupleize_cols = True , ** kwargs ):
@@ -847,7 +865,7 @@ def _formatter_func(self):
847
865
"""
848
866
return default_pprint
849
867
850
- def _format_data (self ):
868
+ def _format_data (self , name = None ):
851
869
"""
852
870
Return the formatted data as a unicode string
853
871
"""
@@ -856,9 +874,11 @@ def _format_data(self):
856
874
display_width , _ = get_console_size ()
857
875
if display_width is None :
858
876
display_width = get_option ('display.width' ) or 80
877
+ if name is None :
878
+ name = self .__class__ .__name__
859
879
860
- space1 = "\n %s" % (' ' * (len (self . __class__ . __name__ ) + 1 ))
861
- space2 = "\n %s" % (' ' * (len (self . __class__ . __name__ ) + 2 ))
880
+ space1 = "\n %s" % (' ' * (len (name ) + 1 ))
881
+ space2 = "\n %s" % (' ' * (len (name ) + 2 ))
862
882
863
883
n = len (self )
864
884
sep = ','
@@ -984,6 +1004,29 @@ def to_series(self, **kwargs):
984
1004
index = self ._shallow_copy (),
985
1005
name = self .name )
986
1006
1007
+ def to_frame (self , index = True ):
1008
+ """
1009
+ Create a DataFrame with a column containing the Index.
1010
+
1011
+ .. versionadded:: 0.21.0
1012
+
1013
+ Parameters
1014
+ ----------
1015
+ index : boolean, default True
1016
+ Set the index of the returned DataFrame as the original Index.
1017
+
1018
+ Returns
1019
+ -------
1020
+ DataFrame : a DataFrame containing the original Index data.
1021
+ """
1022
+
1023
+ from pandas import DataFrame
1024
+ result = DataFrame (self ._shallow_copy (), columns = [self .name or 0 ])
1025
+
1026
+ if index :
1027
+ result .index = self
1028
+ return result
1029
+
987
1030
def _to_embed (self , keep_tz = False ):
988
1031
"""
989
1032
*this is an internal non-public method*
@@ -1034,7 +1077,7 @@ def to_datetime(self, dayfirst=False):
1034
1077
if self .inferred_type == 'string' :
1035
1078
from dateutil .parser import parse
1036
1079
parser = lambda x : parse (x , dayfirst = dayfirst )
1037
- parsed = lib .try_parse_dates (self .values , parser = parser )
1080
+ parsed = parsing .try_parse_dates (self .values , parser = parser )
1038
1081
return DatetimeIndex (parsed )
1039
1082
else :
1040
1083
return DatetimeIndex (self .values )
@@ -2140,7 +2183,7 @@ def _get_consensus_name(self, other):
2140
2183
return self ._shallow_copy (name = name )
2141
2184
return self
2142
2185
2143
- def union (self , other ):
2186
+ def union (self , other , sort = True ):
2144
2187
"""
2145
2188
Form the union of two Index objects and sorts if possible.
2146
2189
@@ -2170,7 +2213,11 @@ def union(self, other):
2170
2213
if len (self ) == 0 :
2171
2214
return other ._get_consensus_name (self )
2172
2215
2173
- if not is_dtype_equal (self .dtype , other .dtype ):
2216
+ # TODO: is_dtype_union_equal is a hack around
2217
+ # 1. buggy set ops with duplicates (GH #13432)
2218
+ # 2. CategoricalIndex lacking setops (GH #10186)
2219
+ # Once those are fixed, this workaround can be removed
2220
+ if not is_dtype_union_equal (self .dtype , other .dtype ):
2174
2221
this = self .astype ('O' )
2175
2222
other = other .astype ('O' )
2176
2223
return this .union (other )
@@ -2194,27 +2241,29 @@ def union(self, other):
2194
2241
allow_fill = False )
2195
2242
result = _concat ._concat_compat ((self ._values , other_diff ))
2196
2243
2197
- try :
2198
- self ._values [0 ] < other_diff [0 ]
2199
- except TypeError as e :
2200
- warnings .warn ("%s, sort order is undefined for "
2201
- "incomparable objects" % e , RuntimeWarning ,
2202
- stacklevel = 3 )
2203
- else :
2204
- types = frozenset ((self .inferred_type ,
2205
- other .inferred_type ))
2206
- if not types & _unsortable_types :
2207
- result .sort ()
2244
+ if sort :
2245
+ try :
2246
+ self ._values [0 ] < other_diff [0 ]
2247
+ except TypeError as e :
2248
+ warnings .warn ("%s, sort order is undefined for "
2249
+ "incomparable objects" % e , RuntimeWarning ,
2250
+ stacklevel = 3 )
2251
+ else :
2252
+ types = frozenset ((self .inferred_type ,
2253
+ other .inferred_type ))
2254
+ if not types & _unsortable_types :
2255
+ result .sort ()
2208
2256
2209
2257
else :
2210
2258
result = self ._values
2211
2259
2212
- try :
2213
- result = np .sort (result )
2214
- except TypeError as e :
2215
- warnings .warn ("%s, sort order is undefined for "
2216
- "incomparable objects" % e , RuntimeWarning ,
2217
- stacklevel = 3 )
2260
+ if sort :
2261
+ try :
2262
+ result = np .sort (result )
2263
+ except TypeError as e :
2264
+ warnings .warn ("%s, sort order is undefined for "
2265
+ "incomparable objects" % e , RuntimeWarning ,
2266
+ stacklevel = 3 )
2218
2267
2219
2268
# for subclasses
2220
2269
return self ._wrap_union_result (other , result )
@@ -2279,7 +2328,7 @@ def intersection(self, other):
2279
2328
taken .name = None
2280
2329
return taken
2281
2330
2282
- def difference (self , other ):
2331
+ def difference (self , other , sort = True ):
2283
2332
"""
2284
2333
Return a new Index with elements from the index that are not in
2285
2334
`other`.
@@ -2319,14 +2368,15 @@ def difference(self, other):
2319
2368
label_diff = np .setdiff1d (np .arange (this .size ), indexer ,
2320
2369
assume_unique = True )
2321
2370
the_diff = this .values .take (label_diff )
2322
- try :
2323
- the_diff = sorting .safe_sort (the_diff )
2324
- except TypeError :
2325
- pass
2371
+ if sort :
2372
+ try :
2373
+ the_diff = sorting .safe_sort (the_diff )
2374
+ except TypeError :
2375
+ pass
2326
2376
2327
2377
return this ._shallow_copy (the_diff , name = result_name , freq = None )
2328
2378
2329
- def symmetric_difference (self , other , result_name = None ):
2379
+ def symmetric_difference (self , other , result_name = None , sort = True ):
2330
2380
"""
2331
2381
Compute the symmetric difference of two Index objects.
2332
2382
It's sorted if sorting is possible.
@@ -2379,10 +2429,11 @@ def symmetric_difference(self, other, result_name=None):
2379
2429
right_diff = other .values .take (right_indexer )
2380
2430
2381
2431
the_diff = _concat ._concat_compat ([left_diff , right_diff ])
2382
- try :
2383
- the_diff = sorting .safe_sort (the_diff )
2384
- except TypeError :
2385
- pass
2432
+ if sort :
2433
+ try :
2434
+ the_diff = sorting .safe_sort (the_diff )
2435
+ except TypeError :
2436
+ pass
2386
2437
2387
2438
attribs = self ._get_attributes_dict ()
2388
2439
attribs ['name' ] = result_name
@@ -2602,6 +2653,12 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
2602
2653
if tolerance is not None :
2603
2654
tolerance = self ._convert_tolerance (tolerance )
2604
2655
2656
+ # Treat boolean labels passed to a numeric index as not found. Without
2657
+ # this fix False and True would be treated as 0 and 1 respectively.
2658
+ # (GH #16877)
2659
+ if target .is_boolean () and self .is_numeric ():
2660
+ return _ensure_platform_int (np .repeat (- 1 , target .size ))
2661
+
2605
2662
pself , ptarget = self ._maybe_promote (target )
2606
2663
if pself is not self or ptarget is not target :
2607
2664
return pself .get_indexer (ptarget , method = method , limit = limit ,
@@ -2630,7 +2687,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
2630
2687
'backfill or nearest reindexing' )
2631
2688
2632
2689
indexer = self ._engine .get_indexer (target ._values )
2633
-
2634
2690
return _ensure_platform_int (indexer )
2635
2691
2636
2692
def _convert_tolerance (self , tolerance ):
@@ -3115,8 +3171,8 @@ def _join_multi(self, other, how, return_indexers=True):
3115
3171
other_is_mi = isinstance (other , MultiIndex )
3116
3172
3117
3173
# figure out join names
3118
- self_names = [ n for n in self .names if n is not None ]
3119
- other_names = [ n for n in other .names if n is not None ]
3174
+ self_names = _not_none ( * self .names )
3175
+ other_names = _not_none ( * other .names )
3120
3176
overlap = list (set (self_names ) & set (other_names ))
3121
3177
3122
3178
# need at least 1 in common, but not more than 1
@@ -3559,6 +3615,19 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
3559
3615
-------
3560
3616
start, end : int
3561
3617
3618
+ Notes
3619
+ -----
3620
+ This method only works if the index is monotonic or unique.
3621
+
3622
+ Examples
3623
+ ---------
3624
+ >>> idx = pd.Index(list('abcd'))
3625
+ >>> idx.slice_locs(start='b', end='c')
3626
+ (1, 3)
3627
+
3628
+ See Also
3629
+ --------
3630
+ Index.get_loc : Get location for a single label
3562
3631
"""
3563
3632
inc = (step is None or step >= 0 )
3564
3633
@@ -3648,7 +3717,7 @@ def drop(self, labels, errors='raise'):
3648
3717
-------
3649
3718
dropped : Index
3650
3719
"""
3651
- labels = com . _index_labels_to_array (labels )
3720
+ labels = _index_labels_to_array (labels )
3652
3721
indexer = self .get_indexer (labels )
3653
3722
mask = indexer == - 1
3654
3723
if mask .any ():
@@ -3729,7 +3798,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr):
3729
3798
def _evaluate_with_datetime_like (self , other , op , opstr ):
3730
3799
raise TypeError ("can only perform ops with datetime like values" )
3731
3800
3732
- def _evalute_compare (self , op ):
3801
+ def _evaluate_compare (self , op ):
3733
3802
raise base .AbstractMethodError (self )
3734
3803
3735
3804
@classmethod
@@ -4155,3 +4224,4 @@ def _trim_front(strings):
4155
4224
def _validate_join_method (method ):
4156
4225
if method not in ['left' , 'right' , 'inner' , 'outer' ]:
4157
4226
raise ValueError ('do not recognize join method %s' % method )
4227
+
0 commit comments