diff --git a/pandas/tests/indexes/datetimelike_/test_value_counts.py b/pandas/tests/indexes/datetimelike_/test_value_counts.py new file mode 100644 index 0000000000000..f0df6dd678ef5 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_value_counts.py @@ -0,0 +1,103 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestValueCounts: + # GH#7735 + + def test_value_counts_unique_datetimeindex(self, tz_naive_fixture): + tz = tz_naive_fixture + orig = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_timedeltaindex(self): + orig = timedelta_range("1 days 09:00:00", freq="H", periods=10) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_periodindex(self): + orig = period_range("2011-01-01 09:00", freq="H", periods=10) + self._check_value_counts_with_repeats(orig) + + def _check_value_counts_with_repeats(self, orig): + # create repeated values, 'n'th element is repeated by n+1 times + idx = type(orig)( + np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype + ) + + exp_idx = orig[::-1] + if not isinstance(exp_idx, PeriodIndex): + exp_idx = exp_idx._with_freq(None) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + tm.assert_index_equal(idx.unique(), orig) + + def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture): + tz = tz_naive_fixture + idx = DatetimeIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + tz=tz, + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_timedeltaindex2(self): + idx = TimedeltaIndex( + [ + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 08:00:00", + "1 days 08:00:00", + NaT, + ] + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_periodindex2(self): + idx = PeriodIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + freq="H", + ) + self._check_value_counts_dropna(idx) + + def _check_value_counts_dropna(self, idx): + exp_idx = idx[[2, 3]] + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = idx[[2, 3, -1]] + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 49288af89ee22..7df94b5820e5d 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,12 +1,10 @@ from datetime import datetime from dateutil.tz import tzlocal -import numpy as np import pytest from pandas.compat import IS64 -import pandas as pd from pandas import ( DateOffset, DatetimeIndex, @@ -69,51 +67,6 @@ def test_resolution(self, request, tz_naive_fixture, freq, expected): idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected - def test_value_counts_unique(self, tz_naive_fixture): - tz = tz_naive_fixture - # GH 7735 - idx = date_range("2011-01-01 09:00", freq="H", periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) - - exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) - expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") - expected.index = expected.index._with_freq(None) - - for obj in [idx, Series(idx)]: - - tm.assert_series_equal(obj.value_counts(), expected) - - expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) - expected = expected._with_freq(None) - tm.assert_index_equal(idx.unique(), expected) - - idx = DatetimeIndex( - [ - "2013-01-01 09:00", - "2013-01-01 09:00", - "2013-01-01 09:00", - "2013-01-01 08:00", - "2013-01-01 08:00", - pd.NaT, - ], - tz=tz, - ) - - exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 4ca98f6bbcb75..9ebe44fb16c8d 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,12 +1,6 @@ -import numpy as np import pytest import pandas as pd -from pandas import ( - NaT, - PeriodIndex, - Series, -) import pandas._testing as tm @@ -29,61 +23,6 @@ def test_resolution(self, freq, expected): idx = pd.period_range(start="2013-04-01", periods=30, freq=freq) assert idx.resolution == expected - def test_value_counts_unique(self): - # GH 7735 - idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H") - - exp_idx = PeriodIndex( - [ - "2011-01-01 18:00", - "2011-01-01 17:00", - "2011-01-01 16:00", - "2011-01-01 15:00", - "2011-01-01 14:00", - "2011-01-01 13:00", - "2011-01-01 12:00", - "2011-01-01 11:00", - "2011-01-01 10:00", - "2011-01-01 09:00", - ], - freq="H", - ) - expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10) - tm.assert_index_equal(idx.unique(), expected) - - idx = PeriodIndex( - [ - "2013-01-01 09:00", - "2013-01-01 09:00", - "2013-01-01 09:00", - "2013-01-01 08:00", - "2013-01-01 08:00", - NaT, - ], - freq="H", - ) - - exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H") - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H") - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - def test_freq_setter_deprecated(self): # GH 20678 idx = pd.period_range("2018Q1", periods=4, freq="Q") diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 8bb86057e7084..2a5051b2982bb 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -2,7 +2,6 @@ import pytest from pandas import ( - NaT, Series, TimedeltaIndex, timedelta_range, @@ -17,50 +16,6 @@ class TestTimedeltaIndexOps: - def test_value_counts_unique(self): - # GH 7735 - idx = timedelta_range("1 days 09:00:00", freq="H", periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) - - exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10) - exp_idx = exp_idx._with_freq(None) - expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") - - obj = idx - tm.assert_series_equal(obj.value_counts(), expected) - - obj = Series(idx) - tm.assert_series_equal(obj.value_counts(), expected) - - expected = timedelta_range("1 days 09:00:00", freq="H", periods=10) - tm.assert_index_equal(idx.unique(), expected) - - idx = TimedeltaIndex( - [ - "1 days 09:00:00", - "1 days 09:00:00", - "1 days 09:00:00", - "1 days 08:00:00", - "1 days 08:00:00", - NaT, - ] - ) - - exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"]) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", NaT]) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - def test_nonunique_contains(self): # GH 9512 for idx in map(