diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2c77b87f8efb6..2dec3b6bdebc7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -609,6 +609,7 @@ Other Deprecations - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) - Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) +- Deprecated :meth:`DatetimeIndex.union_many`, use :meth:`DatetimeIndex.union` instead (:issue:`44091`) - Deprecated :meth:`.Groupby.pad` in favor of :meth:`.Groupby.ffill` (:issue:`33396`) - Deprecated :meth:`.Groupby.backfill` in favor of :meth:`.Groupby.bfill` (:issue:`33396`) - Deprecated :meth:`.Resample.pad` in favor of :meth:`.Resample.ffill` (:issue:`33396`) @@ -708,6 +709,7 @@ Datetimelike - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) - Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`) - Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`) +- Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`) - Timedelta diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index e497012f23b68..922c344510375 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -8,6 +8,8 @@ ) from pandas.errors import InvalidIndexError +from pandas.core.dtypes.common import is_dtype_equal + from pandas.core.indexes.base import ( Index, _new_Index, @@ -213,14 +215,41 @@ def conv(i): if kind == "special": result = indexes[0] + first = result + + dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] + dti_tzs = [x for x in dtis if x.tz is not None] + if len(dti_tzs) not in [0, len(dtis)]: + # TODO: this behavior is not tested (so may not be desired), + # but is kept in order to keep behavior the same when + # deprecating union_many + # test_frame_from_dict_with_mixed_indexes + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if len(dtis) == len(indexes): + sort = True + if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes): + # i.e. timezones mismatch + # TODO(2.0): once deprecation is enforced, this union will + # cast to UTC automatically. + indexes = [x.tz_convert("UTC") for x in indexes] + + result = indexes[0] + + elif len(dtis) > 1: + # If we have mixed timezones, our casting behavior may depend on + # the order of indexes, which we don't want. + sort = False + + # TODO: what about Categorical[dt64]? + # test_frame_from_dict_with_mixed_indexes + indexes = [x.astype(object, copy=False) for x in indexes] + result = indexes[0] + + for other in indexes[1:]: + result = result.union(other, sort=None if sort else False) + return result - if hasattr(result, "union_many"): - # DatetimeIndex - return result.union_many(indexes[1:]) - else: - for other in indexes[1:]: - result = result.union(other, sort=None if sort else False) - return result elif kind == "array": index = indexes[0] if not all(index.equals(other) for other in indexes[1:]): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 187fa2f3aa1aa..5a3da9b65d03b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -387,6 +387,13 @@ def union_many(self, others): """ A bit of a hack to accelerate unioning a collection of indexes. """ + warnings.warn( + "DatetimeIndex.union_many is deprecated and will be removed in " + "a future version. Use obj.union instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + this = self for other in others: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b7d553707a91b..8b677bde2f7e2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2666,6 +2666,50 @@ def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") tm.assert_index_equal(df.index, exp) + def test_frame_from_dict_with_mixed_tzaware_indexes(self): + # GH#44091 + dti = date_range("2016-01-01", periods=3) + + ser1 = Series(range(3), index=dti) + ser2 = Series(range(3), index=dti.tz_localize("UTC")) + ser3 = Series(range(3), index=dti.tz_localize("US/Central")) + ser4 = Series(range(3)) + + # no tz-naive, but we do have mixed tzs and a non-DTI + df1 = DataFrame({"A": ser2, "B": ser3, "C": ser4}) + exp_index = Index( + list(ser2.index) + list(ser3.index) + list(ser4.index), dtype=object + ) + tm.assert_index_equal(df1.index, exp_index) + + df2 = DataFrame({"A": ser2, "C": ser4, "B": ser3}) + exp_index3 = Index( + list(ser2.index) + list(ser4.index) + list(ser3.index), dtype=object + ) + tm.assert_index_equal(df2.index, exp_index3) + + df3 = DataFrame({"B": ser3, "A": ser2, "C": ser4}) + exp_index3 = Index( + list(ser3.index) + list(ser2.index) + list(ser4.index), dtype=object + ) + tm.assert_index_equal(df3.index, exp_index3) + + df4 = DataFrame({"C": ser4, "B": ser3, "A": ser2}) + exp_index4 = Index( + list(ser4.index) + list(ser3.index) + list(ser2.index), dtype=object + ) + tm.assert_index_equal(df4.index, exp_index4) + + # TODO: not clear if these raising is desired (no extant tests), + # but this is de facto behavior 2021-12-22 + msg = "Cannot join tz-naive with tz-aware DatetimeIndex" + with pytest.raises(TypeError, match=msg): + DataFrame({"A": ser2, "B": ser3, "C": ser4, "D": ser1}) + with pytest.raises(TypeError, match=msg): + DataFrame({"A": ser2, "B": ser3, "D": ser1}) + with pytest.raises(TypeError, match=msg): + DataFrame({"D": ser1, "A": ser2, "B": ser3}) + class TestDataFrameConstructorWithDtypeCoercion: def test_floating_values_integer_dtype(self): @@ -2911,7 +2955,7 @@ def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): DataFrame(arr2, columns=["foo", "bar"]) -def get1(obj): +def get1(obj): # TODO: make a helper in tm? if isinstance(obj, Series): return obj.iloc[0] else: diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index ae4ed04f8adac..7c2b3b7f4482d 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -26,6 +26,13 @@ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) +def test_union_many_deprecated(): + dti = date_range("2016-01-01", periods=3) + + with tm.assert_produces_warning(FutureWarning): + dti.union_many([dti, dti]) + + class TestDatetimeIndexSetOps: tz = [ None,