diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index edb8cd57b31bd..e425ee1a78de5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2656,12 +2656,59 @@ def union(self, other, sort=None): >>> idx2 = pd.Index([1, 2, 3, 4]) >>> idx1.union(idx2) Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') + + MultiIndex case + + >>> idx1 = pd.MultiIndex.from_arrays( + ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] + ... ) + >>> idx1 + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue')], + ) + >>> idx2 = pd.MultiIndex.from_arrays( + ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] + ... ) + >>> idx2 + MultiIndex([(3, 'Red'), + (3, 'Green'), + (2, 'Red'), + (2, 'Green')], + ) + >>> idx1.union(idx2) + MultiIndex([(1, 'Blue'), + (1, 'Red'), + (2, 'Blue'), + (2, 'Green'), + (2, 'Red'), + (3, 'Green'), + (3, 'Red')], + ) + >>> idx1.union(idx2, sort=False) + MultiIndex([(1, 'Red'), + (1, 'Blue'), + (2, 'Red'), + (2, 'Blue'), + (3, 'Red'), + (3, 'Green'), + (2, 'Green')], + ) """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_name = self._convert_can_do_setop(other) if not is_dtype_equal(self.dtype, other.dtype): + if isinstance(self, ABCMultiIndex) and not is_object_dtype( + unpack_nested_dtype(other) + ): + raise NotImplementedError( + "Can only union MultiIndex with MultiIndex or Index of tuples, " + "try mi.to_flat_index().union(other) instead." + ) + dtype = find_common_type([self.dtype, other.dtype]) if self._is_numeric_dtype and other._is_numeric_dtype: # Right now, we treat union(int, float) a bit special. @@ -2680,6 +2727,14 @@ def union(self, other, sort=None): right = other.astype(dtype, copy=False) return left.union(right, sort=sort) + elif not len(other) or self.equals(other): + # NB: whether this (and the `if not len(self)` check below) come before + # or after the is_dtype_equal check above affects the returned dtype + return self._get_reconciled_name_object(other) + + elif not len(self): + return other._get_reconciled_name_object(self) + result = self._union(other, sort=sort) return self._wrap_setop_result(other, result) @@ -2703,12 +2758,6 @@ def _union(self, other, sort): ------- Index """ - if not len(other) or self.equals(other): - return self - - if not len(self): - return other - # TODO(EA): setops-refactor, clean all this up lvals = self._values rvals = other._values diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 94c055e264e71..d673d1b43f729 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -711,6 +711,9 @@ def _can_fast_intersect(self: _T, other: _T) -> bool: # so intersection will preserve freq return True + elif not len(self) or not len(other): + return False + elif isinstance(self.freq, Tick): # We "line up" if and only if the difference between two of our points # is a multiple of our freq @@ -794,9 +797,6 @@ def _fast_union(self, other, sort=None): return left def _union(self, other, sort): - if not len(other) or self.equals(other) or not len(self): - return super()._union(other, sort=sort) - # We are called by `union`, which is responsible for this validation assert isinstance(other, type(self)) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3c7c0fdc45d70..06a04e5a9b9eb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3502,98 +3502,9 @@ def equal_levels(self, other) -> bool: # -------------------------------------------------------------------- # Set Methods - def union(self, other, sort=None): - """ - Form the union of two MultiIndex objects - - Parameters - ---------- - other : MultiIndex or array / Index of tuples - sort : False or None, default None - Whether to sort the resulting Index. - - * None : Sort the result, except when - - 1. `self` and `other` are equal. - 2. `self` has length 0. - 3. Some values in `self` or `other` cannot be compared. - A RuntimeWarning is issued in this case. - - * False : do not sort the result. - - .. versionadded:: 0.24.0 - - .. versionchanged:: 0.24.1 - - Changed the default value from ``True`` to ``None`` - (without change in behaviour). - - Returns - ------- - Index - - Examples - -------- - >>> idx1 = pd.MultiIndex.from_arrays( - ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] - ... ) - >>> idx1 - MultiIndex([(1, 'Red'), - (1, 'Blue'), - (2, 'Red'), - (2, 'Blue')], - ) - >>> idx2 = pd.MultiIndex.from_arrays( - ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] - ... ) - >>> idx2 - MultiIndex([(3, 'Red'), - (3, 'Green'), - (2, 'Red'), - (2, 'Green')], - ) - - >>> idx1.union(idx2) - MultiIndex([(1, 'Blue'), - (1, 'Red'), - (2, 'Blue'), - (2, 'Green'), - (2, 'Red'), - (3, 'Green'), - (3, 'Red')], - ) - - >>> idx1.union(idx2, sort=False) - MultiIndex([(1, 'Red'), - (1, 'Blue'), - (2, 'Red'), - (2, 'Blue'), - (3, 'Red'), - (3, 'Green'), - (2, 'Green')], - ) - """ - self._validate_sort_keyword(sort) - self._assert_can_do_setop(other) - other, _ = self._convert_can_do_setop(other) - - if not len(other) or self.equals(other): - return self._get_reconciled_name_object(other) - - if not len(self): - return other._get_reconciled_name_object(self) - - return self._union(other, sort=sort) - def _union(self, other, sort): other, result_names = self._convert_can_do_setop(other) - if not self._should_compare(other): - raise NotImplementedError( - "Can only union MultiIndex with MultiIndex or Index of tuples, " - "try mi.to_flat_index().union(other) instead." - ) - # We could get here with CategoricalIndex other rvals = other._values.astype(object, copy=False) uniq_tuples = lib.fast_unique_multiple([self._values, rvals], sort=sort) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 49acb7b5592cb..8df7e6912b1b2 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -646,17 +646,6 @@ def _difference(self, other, sort): return self._setop(other, sort, opname="difference") def _union(self, other, sort): - if not len(other) or self.equals(other) or not len(self): - return super()._union(other, sort=sort) - - # We are called by `union`, which is responsible for this validation - assert isinstance(other, type(self)) - - if not is_dtype_equal(self.dtype, other.dtype): - this = self.astype("O") - other = other.astype("O") - return this._union(other, sort=sort) - return self._setop(other, sort, opname="_union") # ------------------------------------------------------------------------ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e716605245da5..5e5280934dff4 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -576,9 +576,6 @@ def _union(self, other, sort): ------- union : Index """ - if not len(other) or self.equals(other) or not len(self): - return super()._union(other, sort=sort) - if isinstance(other, RangeIndex) and sort is None: start_s, step_s = self.start, self.step end_s = self.start + self.step * (len(self) - 1)