Skip to content

Commit fac28ce

Browse files
authored
ENH: sort=bool keyword argument for index.difference pandas-dev#17839
union
1 parent 056ffe6 commit fac28ce

File tree

1 file changed

+114
-44
lines changed

1 file changed

+114
-44
lines changed

pandas/core/indexes/base.py

Lines changed: 114 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
algos as libalgos, join as libjoin,
88
Timestamp, Timedelta, )
99
from pandas._libs.lib import is_datetime_array
10+
from pandas._libs.tslibs import parsing
1011

1112
from pandas.compat import range, u
1213
from pandas.compat.numpy import function as nv
@@ -27,6 +28,7 @@
2728
is_integer,
2829
is_float,
2930
is_dtype_equal,
31+
is_dtype_union_equal,
3032
is_object_dtype,
3133
is_categorical_dtype,
3234
is_interval_dtype,
@@ -40,23 +42,22 @@
4042
needs_i8_conversion,
4143
is_iterator, is_list_like,
4244
is_scalar)
43-
from pandas.core.common import (is_bool_indexer,
44-
_values_from_object,
45-
_asarray_tuplesafe)
45+
from pandas.core.common import (is_bool_indexer, _values_from_object,
46+
_asarray_tuplesafe, _not_none,
47+
_index_labels_to_array)
4648

4749
from pandas.core.base import PandasObject, IndexOpsMixin
4850
import pandas.core.base as base
4951
from pandas.util._decorators import (
5052
Appender, Substitution, cache_readonly, deprecate_kwarg)
5153
from pandas.core.indexes.frozen import FrozenList
52-
import pandas.core.common as com
5354
import pandas.core.dtypes.concat as _concat
5455
import pandas.core.missing as missing
5556
import pandas.core.algorithms as algos
5657
import pandas.core.sorting as sorting
5758
from pandas.io.formats.printing import pprint_thing
5859
from pandas.core.ops import _comp_method_OBJECT_ARRAY
59-
from pandas.core import strings
60+
from pandas.core import strings, accessor
6061
from pandas.core.config import get_option
6162

6263

@@ -121,6 +122,23 @@ class Index(IndexOpsMixin, PandasObject):
121122
Notes
122123
-----
123124
An Index instance can **only** contain hashable objects
125+
126+
Examples
127+
--------
128+
>>> pd.Index([1, 2, 3])
129+
Int64Index([1, 2, 3], dtype='int64')
130+
131+
>>> pd.Index(list('abc'))
132+
Index(['a', 'b', 'c'], dtype='object')
133+
134+
See Also
135+
---------
136+
RangeIndex : Index implementing a monotonic integer range
137+
CategoricalIndex : Index of :class:`Categorical` s.
138+
MultiIndex : A multi-level, or hierarchical, Index
139+
IntervalIndex : an Index of :class:`Interval` s.
140+
DatetimeIndex, TimedeltaIndex, PeriodIndex
141+
Int64Index, UInt64Index, Float64Index
124142
"""
125143
# To hand over control to subclasses
126144
_join_precedence = 1
@@ -158,7 +176,7 @@ class Index(IndexOpsMixin, PandasObject):
158176
_accessors = frozenset(['str'])
159177

160178
# String Methods
161-
str = base.AccessorProperty(strings.StringMethods)
179+
str = accessor.AccessorProperty(strings.StringMethods)
162180

163181
def __new__(cls, data=None, dtype=None, copy=False, name=None,
164182
fastpath=False, tupleize_cols=True, **kwargs):
@@ -847,7 +865,7 @@ def _formatter_func(self):
847865
"""
848866
return default_pprint
849867

850-
def _format_data(self):
868+
def _format_data(self, name=None):
851869
"""
852870
Return the formatted data as a unicode string
853871
"""
@@ -856,9 +874,11 @@ def _format_data(self):
856874
display_width, _ = get_console_size()
857875
if display_width is None:
858876
display_width = get_option('display.width') or 80
877+
if name is None:
878+
name = self.__class__.__name__
859879

860-
space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
861-
space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
880+
space1 = "\n%s" % (' ' * (len(name) + 1))
881+
space2 = "\n%s" % (' ' * (len(name) + 2))
862882

863883
n = len(self)
864884
sep = ','
@@ -984,6 +1004,29 @@ def to_series(self, **kwargs):
9841004
index=self._shallow_copy(),
9851005
name=self.name)
9861006

1007+
def to_frame(self, index=True):
1008+
"""
1009+
Create a DataFrame with a column containing the Index.
1010+
1011+
.. versionadded:: 0.21.0
1012+
1013+
Parameters
1014+
----------
1015+
index : boolean, default True
1016+
Set the index of the returned DataFrame as the original Index.
1017+
1018+
Returns
1019+
-------
1020+
DataFrame : a DataFrame containing the original Index data.
1021+
"""
1022+
1023+
from pandas import DataFrame
1024+
result = DataFrame(self._shallow_copy(), columns=[self.name or 0])
1025+
1026+
if index:
1027+
result.index = self
1028+
return result
1029+
9871030
def _to_embed(self, keep_tz=False):
9881031
"""
9891032
*this is an internal non-public method*
@@ -1034,7 +1077,7 @@ def to_datetime(self, dayfirst=False):
10341077
if self.inferred_type == 'string':
10351078
from dateutil.parser import parse
10361079
parser = lambda x: parse(x, dayfirst=dayfirst)
1037-
parsed = lib.try_parse_dates(self.values, parser=parser)
1080+
parsed = parsing.try_parse_dates(self.values, parser=parser)
10381081
return DatetimeIndex(parsed)
10391082
else:
10401083
return DatetimeIndex(self.values)
@@ -2140,7 +2183,7 @@ def _get_consensus_name(self, other):
21402183
return self._shallow_copy(name=name)
21412184
return self
21422185

2143-
def union(self, other):
2186+
def union(self, other, sort=True):
21442187
"""
21452188
Form the union of two Index objects and sorts if possible.
21462189
@@ -2170,7 +2213,11 @@ def union(self, other):
21702213
if len(self) == 0:
21712214
return other._get_consensus_name(self)
21722215

2173-
if not is_dtype_equal(self.dtype, other.dtype):
2216+
# TODO: is_dtype_union_equal is a hack around
2217+
# 1. buggy set ops with duplicates (GH #13432)
2218+
# 2. CategoricalIndex lacking setops (GH #10186)
2219+
# Once those are fixed, this workaround can be removed
2220+
if not is_dtype_union_equal(self.dtype, other.dtype):
21742221
this = self.astype('O')
21752222
other = other.astype('O')
21762223
return this.union(other)
@@ -2194,27 +2241,29 @@ def union(self, other):
21942241
allow_fill=False)
21952242
result = _concat._concat_compat((self._values, other_diff))
21962243

2197-
try:
2198-
self._values[0] < other_diff[0]
2199-
except TypeError as e:
2200-
warnings.warn("%s, sort order is undefined for "
2201-
"incomparable objects" % e, RuntimeWarning,
2202-
stacklevel=3)
2203-
else:
2204-
types = frozenset((self.inferred_type,
2205-
other.inferred_type))
2206-
if not types & _unsortable_types:
2207-
result.sort()
2244+
if sort:
2245+
try:
2246+
self._values[0] < other_diff[0]
2247+
except TypeError as e:
2248+
warnings.warn("%s, sort order is undefined for "
2249+
"incomparable objects" % e, RuntimeWarning,
2250+
stacklevel=3)
2251+
else:
2252+
types = frozenset((self.inferred_type,
2253+
other.inferred_type))
2254+
if not types & _unsortable_types:
2255+
result.sort()
22082256

22092257
else:
22102258
result = self._values
22112259

2212-
try:
2213-
result = np.sort(result)
2214-
except TypeError as e:
2215-
warnings.warn("%s, sort order is undefined for "
2216-
"incomparable objects" % e, RuntimeWarning,
2217-
stacklevel=3)
2260+
if sort:
2261+
try:
2262+
result = np.sort(result)
2263+
except TypeError as e:
2264+
warnings.warn("%s, sort order is undefined for "
2265+
"incomparable objects" % e, RuntimeWarning,
2266+
stacklevel=3)
22182267

22192268
# for subclasses
22202269
return self._wrap_union_result(other, result)
@@ -2279,7 +2328,7 @@ def intersection(self, other):
22792328
taken.name = None
22802329
return taken
22812330

2282-
def difference(self, other):
2331+
def difference(self, other, sort=True):
22832332
"""
22842333
Return a new Index with elements from the index that are not in
22852334
`other`.
@@ -2319,14 +2368,15 @@ def difference(self, other):
23192368
label_diff = np.setdiff1d(np.arange(this.size), indexer,
23202369
assume_unique=True)
23212370
the_diff = this.values.take(label_diff)
2322-
try:
2323-
the_diff = sorting.safe_sort(the_diff)
2324-
except TypeError:
2325-
pass
2371+
if sort:
2372+
try:
2373+
the_diff = sorting.safe_sort(the_diff)
2374+
except TypeError:
2375+
pass
23262376

23272377
return this._shallow_copy(the_diff, name=result_name, freq=None)
23282378

2329-
def symmetric_difference(self, other, result_name=None):
2379+
def symmetric_difference(self, other, result_name=None, sort=True):
23302380
"""
23312381
Compute the symmetric difference of two Index objects.
23322382
It's sorted if sorting is possible.
@@ -2379,10 +2429,11 @@ def symmetric_difference(self, other, result_name=None):
23792429
right_diff = other.values.take(right_indexer)
23802430

23812431
the_diff = _concat._concat_compat([left_diff, right_diff])
2382-
try:
2383-
the_diff = sorting.safe_sort(the_diff)
2384-
except TypeError:
2385-
pass
2432+
if sort:
2433+
try:
2434+
the_diff = sorting.safe_sort(the_diff)
2435+
except TypeError:
2436+
pass
23862437

23872438
attribs = self._get_attributes_dict()
23882439
attribs['name'] = result_name
@@ -2602,6 +2653,12 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
26022653
if tolerance is not None:
26032654
tolerance = self._convert_tolerance(tolerance)
26042655

2656+
# Treat boolean labels passed to a numeric index as not found. Without
2657+
# this fix False and True would be treated as 0 and 1 respectively.
2658+
# (GH #16877)
2659+
if target.is_boolean() and self.is_numeric():
2660+
return _ensure_platform_int(np.repeat(-1, target.size))
2661+
26052662
pself, ptarget = self._maybe_promote(target)
26062663
if pself is not self or ptarget is not target:
26072664
return pself.get_indexer(ptarget, method=method, limit=limit,
@@ -2630,7 +2687,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
26302687
'backfill or nearest reindexing')
26312688

26322689
indexer = self._engine.get_indexer(target._values)
2633-
26342690
return _ensure_platform_int(indexer)
26352691

26362692
def _convert_tolerance(self, tolerance):
@@ -3115,8 +3171,8 @@ def _join_multi(self, other, how, return_indexers=True):
31153171
other_is_mi = isinstance(other, MultiIndex)
31163172

31173173
# figure out join names
3118-
self_names = [n for n in self.names if n is not None]
3119-
other_names = [n for n in other.names if n is not None]
3174+
self_names = _not_none(*self.names)
3175+
other_names = _not_none(*other.names)
31203176
overlap = list(set(self_names) & set(other_names))
31213177

31223178
# need at least 1 in common, but not more than 1
@@ -3559,6 +3615,19 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
35593615
-------
35603616
start, end : int
35613617
3618+
Notes
3619+
-----
3620+
This method only works if the index is monotonic or unique.
3621+
3622+
Examples
3623+
---------
3624+
>>> idx = pd.Index(list('abcd'))
3625+
>>> idx.slice_locs(start='b', end='c')
3626+
(1, 3)
3627+
3628+
See Also
3629+
--------
3630+
Index.get_loc : Get location for a single label
35623631
"""
35633632
inc = (step is None or step >= 0)
35643633

@@ -3648,7 +3717,7 @@ def drop(self, labels, errors='raise'):
36483717
-------
36493718
dropped : Index
36503719
"""
3651-
labels = com._index_labels_to_array(labels)
3720+
labels = _index_labels_to_array(labels)
36523721
indexer = self.get_indexer(labels)
36533722
mask = indexer == -1
36543723
if mask.any():
@@ -3729,7 +3798,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr):
37293798
def _evaluate_with_datetime_like(self, other, op, opstr):
37303799
raise TypeError("can only perform ops with datetime like values")
37313800

3732-
def _evalute_compare(self, op):
3801+
def _evaluate_compare(self, op):
37333802
raise base.AbstractMethodError(self)
37343803

37353804
@classmethod
@@ -4155,3 +4224,4 @@ def _trim_front(strings):
41554224
def _validate_join_method(method):
41564225
if method not in ['left', 'right', 'inner', 'outer']:
41574226
raise ValueError('do not recognize join method %s' % method)
4227+

0 commit comments

Comments
 (0)