diff --git a/doc/source/api.rst b/doc/source/api.rst index 76e03ce70342f..731cf3d136a8a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -509,6 +509,7 @@ These can be accessed like ``Series.dt.``. Series.dt.tz_localize Series.dt.tz_convert Series.dt.normalize + Series.dt.strftime **Timedelta Properties** diff --git a/doc/source/basics.rst b/doc/source/basics.rst index aae931a4b8319..eb71a8845a6df 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1248,13 +1248,13 @@ For instance, ~~~~~~~~~~~~ ``Series`` has an accessor to succinctly return datetime like properties for the -*values* of the Series, if its a datetime/period like Series. +*values* of the Series, if it is a datetime/period like Series. This will return a Series, indexed like the existing Series. .. ipython:: python # datetime - s = pd.Series(pd.date_range('20130101 09:10:12',periods=4)) + s = pd.Series(pd.date_range('20130101 09:10:12', periods=4)) s s.dt.hour s.dt.second @@ -1280,12 +1280,29 @@ You can also chain these types of operations: s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') +You can also format datetime values as strings with :meth:`Series.dt.strftime` which +supports the same format as the standard :meth:`~datetime.datetime.strftime`. + +.. ipython:: python + + # DatetimeIndex + s = pd.Series(pd.date_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + +.. ipython:: python + + # PeriodIndex + s = pd.Series(pd.period_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + The ``.dt`` accessor works for period and timedelta dtypes. .. ipython:: python # period - s = pd.Series(pd.period_range('20130101', periods=4,freq='D')) + s = pd.Series(pd.period_range('20130101', periods=4, freq='D')) s s.dt.year s.dt.day @@ -1293,7 +1310,7 @@ The ``.dt`` accessor works for period and timedelta dtypes. .. ipython:: python # timedelta - s = pd.Series(pd.timedelta_range('1 day 00:00:05',periods=4,freq='s')) + s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s')) s s.dt.days s.dt.seconds diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index fe5e7371bddf6..c19aaaf16a5a5 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -18,6 +18,7 @@ Highlights include: previously this would return the original input, see :ref:`here ` - The default for ``dropna`` in ``HDFStore`` has changed to ``False``, to store by default all rows even if they are all ``NaN``, see :ref:`here ` + - Support .strftime for datetime-likes, see :ref:`here ` - Development installed versions of pandas will now have ``PEP440`` compliant version strings (:issue:`9518`) Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -60,6 +61,29 @@ Releasing of the GIL could benefit an application that uses threads for user int .. _dask: https://dask.readthedocs.org/en/latest/ .. _QT: https://wiki.python.org/moin/PyQt +.. _whatsnew_0170.strftime: + +Support strftime for Datetimelikes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We are now supporting a ``.strftime`` method for datetime-likes (:issue:`10110`). Examples: + + .. ipython:: python + + # DatetimeIndex + s = pd.Series(pd.date_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + + .. ipython:: python + + # PeriodIndex + s = pd.Series(pd.period_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + +The string format is as the python standard library and details can be found `here `_ + .. _whatsnew_0170.enhancements.other: Other enhancements diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 4beba4ee3751c..59baf810fffc5 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -83,9 +83,10 @@ def test_dt_namespace_accessor(self): ok_for_base = ['year','month','day','hour','minute','second','weekofyear','week','dayofweek','weekday','dayofyear','quarter','freq','days_in_month','daysinmonth'] ok_for_period = ok_for_base + ['qyear'] + ok_for_period_methods = ['strftime'] ok_for_dt = ok_for_base + ['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'tz'] - ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize'] + ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert', 'normalize', 'strftime'] ok_for_td = ['days','seconds','microseconds','nanoseconds'] ok_for_td_methods = ['components','to_pytimedelta'] @@ -111,13 +112,12 @@ def compare(s, name): Series(date_range('20130101',periods=5,freq='s')), Series(date_range('20130101 00:00:00',periods=5,freq='ms'))]: for prop in ok_for_dt: - # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: - getattr(s.dt,prop) + getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result,np.ndarray) @@ -142,13 +142,12 @@ def compare(s, name): Series(timedelta_range('1 day 01:23:45',periods=5,freq='s')), Series(timedelta_range('2 days 01:23:45.012345',periods=5,freq='ms'))]: for prop in ok_for_td: - # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: - getattr(s.dt,prop) + getattr(s.dt, prop) result = s.dt.components self.assertIsInstance(result,DataFrame) @@ -171,13 +170,14 @@ def compare(s, name): # periodindex for s in [Series(period_range('20130101',periods=5,freq='D'))]: - for prop in ok_for_period: - # we test freq below if prop != 'freq': compare(s, prop) + for prop in ok_for_period_methods: + getattr(s.dt, prop) + freq_result = s.dt.freq self.assertEqual(freq_result, PeriodIndex(s.values).freq) @@ -192,7 +192,7 @@ def get_dir(s): s = Series(period_range('20130101',periods=5,freq='D').asobject) results = get_dir(s) - tm.assert_almost_equal(results,list(sorted(set(ok_for_period)))) + tm.assert_almost_equal(results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # no setting allowed s = Series(date_range('20130101',periods=5,freq='D')) @@ -205,6 +205,62 @@ def f(): s.dt.hour[0] = 5 self.assertRaises(com.SettingWithCopyError, f) + def test_strftime(self): + # GH 10086 + s = Series(date_range('20130101', periods=5)) + result = s.dt.strftime('%Y/%m/%d') + expected = Series(['2013/01/01', '2013/01/02', '2013/01/03', '2013/01/04', '2013/01/05']) + tm.assert_series_equal(result, expected) + + s = Series(date_range('2015-02-03 11:22:33.4567', periods=5)) + result = s.dt.strftime('%Y/%m/%d %H-%M-%S') + expected = Series(['2015/02/03 11-22-33', '2015/02/04 11-22-33', '2015/02/05 11-22-33', + '2015/02/06 11-22-33', '2015/02/07 11-22-33']) + tm.assert_series_equal(result, expected) + + s = Series(period_range('20130101', periods=5)) + result = s.dt.strftime('%Y/%m/%d') + expected = Series(['2013/01/01', '2013/01/02', '2013/01/03', '2013/01/04', '2013/01/05']) + tm.assert_series_equal(result, expected) + + s = Series(period_range('2015-02-03 11:22:33.4567', periods=5, freq='s')) + result = s.dt.strftime('%Y/%m/%d %H-%M-%S') + expected = Series(['2015/02/03 11-22-33', '2015/02/03 11-22-34', '2015/02/03 11-22-35', + '2015/02/03 11-22-36', '2015/02/03 11-22-37']) + tm.assert_series_equal(result, expected) + + s = Series(date_range('20130101', periods=5)) + s.iloc[0] = pd.NaT + result = s.dt.strftime('%Y/%m/%d') + expected = Series(['NaT', '2013/01/02', '2013/01/03', '2013/01/04', '2013/01/05']) + tm.assert_series_equal(result, expected) + + datetime_index = date_range('20150301', periods=5) + result = datetime_index.strftime("%Y/%m/%d") + expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', '2015/03/05'], dtype=object) + self.assert_numpy_array_equal(result, expected) + + period_index = period_range('20150301', periods=5) + result = period_index.strftime("%Y/%m/%d") + expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', '2015/03/05'], dtype=object) + self.assert_numpy_array_equal(result, expected) + + s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) + result = s.dt.strftime('%Y-%m-%d %H:%M:%S') + expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"]) + tm.assert_series_equal(result, expected) + + s = Series(period_range('20130101', periods=4, freq='H')) + result = s.dt.strftime('%Y/%m/%d %H:%M:%S') + expected = Series(["2013/01/01 00:00:00", "2013/01/01 01:00:00", + "2013/01/01 02:00:00", "2013/01/01 03:00:00"]) + + s = Series(period_range('20130101', periods=4, freq='L')) + result = s.dt.strftime('%Y/%m/%d %H:%M:%S.%l') + expected = Series(["2013/01/01 00:00:00.000", "2013/01/01 00:00:00.001", + "2013/01/01 00:00:00.002", "2013/01/01 00:00:00.003"]) + tm.assert_series_equal(result, expected) + def test_valid_dt_with_missing_values(self): from datetime import date, time diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index ae869ce9bd794..b3d10a80e0b50 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -17,6 +17,29 @@ import pandas.algos as _algos + +class DatelikeOps(object): + """ common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex """ + + def strftime(self, date_format): + """ + Return an array of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in the `python string format doc + `__ + + Parameters + ---------- + date_format : str + date format string (e.g. "%Y-%m-%d") + + Returns + ------- + ndarray of formatted strings + """ + return np.asarray(self.format(date_format=date_format)) + + class DatetimeIndexOpsMixin(object): """ common ops mixin to support a unified inteface datetimelike Index """ diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index c273906ef3d05..a4d5939d386ae 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -125,7 +125,7 @@ def to_pydatetime(self): accessors=DatetimeIndex._datetimelike_ops, typ='property') DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex, - accessors=["to_period","tz_localize","tz_convert","normalize"], + accessors=["to_period","tz_localize","tz_convert","normalize","strftime"], typ='method') class TimedeltaProperties(Properties): @@ -181,6 +181,9 @@ class PeriodProperties(Properties): PeriodProperties._add_delegate_accessors(delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_ops, typ='property') +PeriodProperties._add_delegate_accessors(delegate=PeriodIndex, + accessors=["strftime"], + typ='method') class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index ec60edb6a78d6..8ee6a1bc64e4e 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -13,7 +13,7 @@ from pandas.tseries.frequencies import ( to_offset, get_period_alias, Resolution) -from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date from pandas.util.decorators import cache_readonly, deprecate_kwarg @@ -117,7 +117,7 @@ def _new_DatetimeIndex(cls, d): result.tz = tz return result -class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): +class DatetimeIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray of datetime64 data, represented internally as int64, and which can be boxed to Timestamp objects that are subclasses of datetime and diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 242d9a7757556..6413ce9cd5a03 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -4,7 +4,7 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries.index import DatetimeIndex, Int64Index, Index -from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets @@ -92,7 +92,7 @@ def wrapper(self, other): return wrapper -class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): +class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in time such as particular years, quarters, months, etc. A value of 1 is the @@ -737,14 +737,18 @@ def __getitem__(self, key): return PeriodIndex(result, name=self.name, freq=self.freq) - def _format_native_types(self, na_rep=u('NaT'), **kwargs): + def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): values = np.array(list(self), dtype=object) mask = isnull(self.values) values[mask] = na_rep - imask = ~mask - values[imask] = np.array([u('%s') % dt for dt in values[imask]]) + + if date_format: + formatter = lambda dt: dt.strftime(date_format) + else: + formatter = lambda dt: u('%s') % dt + values[imask] = np.array([formatter(dt) for dt in values[imask]]) return values def __array_finalize__(self, obj): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 26acbb2073ab8..2bf763b023bef 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -2365,7 +2365,7 @@ def test_map(self): f = lambda x: x.strftime('%Y%m%d') result = rng.map(f) exp = [f(x) for x in rng] - self.assert_numpy_array_equal(result, exp) + tm.assert_almost_equal(result, exp) def test_iteration_preserves_tz(self):